274 files changed, 17384 insertions, 3470 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 2a5723343aba..206cbf538b59 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -41,3 +41,11 @@ Description:
 		xgmii, moca, qsgmii, trgmii, 1000base-x, 2500base-x, rxaui,
 		xaui, 10gbase-kr, unknown
 
+What:		/sys/class/mdio_bus/<bus>/<device>/phy_standalone
+Date:		May 2019
+KernelVersion:	5.3
+Contact:	netdev@vger.kernel.org
+Description:
+		Boolean value indicating whether the PHY device is used in
+		standalone mode, without a net_device associated, by PHYLINK.
+		Attribute created only when this is the case.
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index e88c3641d613..5475682bf06e 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -43,6 +43,7 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
   * "rxaui"
   * "xaui"
   * "10gbase-kr" (10GBASE-KR, XFI, SFI)
+  * "usxgmii"
 - phy-connection-type: the same as "phy-mode" property but described in the
   Devicetree Specification;
 - phy-handle: phandle, specifies a reference to a node representing a PHY
diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.txt b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
new file mode 100644
index 000000000000..2a33e71ba72b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
@@ -0,0 +1,45 @@
+Required properties:
+- compatible:	Should be "qca,<soc>-eth". Currently support compatibles are:
+		qca,ar7100-eth - Atheros AR7100
+		qca,ar7240-eth - Atheros AR7240
+		qca,ar7241-eth - Atheros AR7241
+		qca,ar7242-eth - Atheros AR7242
+		qca,ar9130-eth - Atheros AR9130
+		qca,ar9330-eth - Atheros AR9330
+		qca,ar9340-eth - Atheros AR9340
+		qca,qca9530-eth - Qualcomm Atheros QCA9530
+		qca,qca9550-eth - Qualcomm Atheros QCA9550
+		qca,qca9560-eth - Qualcomm Atheros QCA9560
+
+- reg : Address and length of the register set for the device
+- interrupts : Should contain eth interrupt
+- phy-mode : See ethernet.txt file in the same directory
+- clocks: the clock used by the core
+- clock-names: the names of the clock listed in the clocks property. These are
+	"eth" and "mdio".
+- resets: Should contain phandles to the reset signals
+- reset-names: Should contain the names of reset signal listed in the resets
+		property. These are "mac" and "mdio"
+
+Optional properties:
+- phy-handle : phandle to the PHY device connected to this device.
+- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
+  Use instead of phy-handle.
+
+Optional subnodes:
+- mdio : specifies the mdio bus, used as a container for phy nodes
+  according to phy.txt in the same directory
+
+Example:
+
+ethernet@1a000000 {
+	compatible = "qca,ar9330-eth";
+	reg = <0x1a000000 0x200>;
+	interrupts = <5>;
+	resets = <&rst 13>, <&rst 23>;
+	reset-names = "mac", "mdio";
+	clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_MDIO>;
+	clock-names = "eth", "mdio";
+
+	phy-mode = "gmii";
+};
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 9ef9338aaee1..db6aa3f2215b 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -11,6 +11,14 @@ Required properties:
 	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
 		for applicable values
 
+Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
+      will be left at their default values, as set by the PHY's pin strapping.
+      The default strapping will use a delay of 2.00 ns.  Thus
+      PHY_INTERFACE_MODE_RGMII, by default, does not behave as RGMII with no
+      internal delay, but as PHY_INTERFACE_MODE_RGMII_ID.  The device tree
+      should use "rgmii-id" if internal delays are desired as this may be
+      changed in future to cause "rgmii" mode to disable delays.
+
 Optional property:
 	- ti,min-output-impedance - MAC Interface Impedance control to set
 				    the programmable output impedance to
@@ -25,8 +33,10 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
-	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
-				    for applicable values.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin.  See dt-bindings/net/ti-dp83867.h
+			      for applicable values.  The CLK_OUT pin can also
+			      be disabled by this property.  When omitted, the
+			      PHY's default will be left as is.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
index 454c937076a2..6ec053449278 100644
--- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@ -4,6 +4,7 @@ General Properties:
 
   - compatible   Should be "fsl,etsec-ptp" for eTSEC
                  Should be "fsl,fman-ptp-timer" for DPAA FMan
+		 Should be "fsl,enetc-ptp" for ENETC
   - reg          Offset and length of the register set for the device
   - interrupts   There should be at least two interrupts. Some devices
                  have as many as four PTP related interrupts.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..a73b3a02e49a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
 	0 to disable the blackhole detection.
 	By default, it is set to 1hr.
 
+tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
+	The list consists of a primary key and an optional backup key. The
+	primary key is used for both creating and validating cookies, while the
+	optional backup key is only used for validating cookies. The purpose of
+	the backup key is to maximize TFO validation when keys are rotated.
+
+	A randomly chosen primary key may be configured by the kernel if
+	the tcp_fastopen sysctl is set to 0x400 (see above), or if the
+	TCP_FASTOPEN setsockopt() optname is set and a key has not been
+	previously configured via sysctl. If keys are configured via
+	setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
+	per-socket keys will be used instead of any keys that are specified via
+	sysctl.
+
+	A key is specified as 4 8-digit hexadecimal integers which are separated
+	by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
+	omitted. A primary and a backup key may be specified by separating them
+	by a comma. If only one key is specified, it becomes the primary key and
+	any previously configured backup keys are removed.
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 127. Default value
diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index 5bd26cb07244..91446b431b70 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -98,6 +98,7 @@ this documentation.
 4. Add::
 
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 
    to the driver's private data structure.  We shall refer to the
    driver's private data pointer as ``priv`` below, and the driver's
@@ -223,8 +224,10 @@ this documentation.
    .. code-block:: c
 
 	struct phylink *phylink;
+	priv->phylink_config.dev = &dev.dev;
+	priv->phylink_config.type = PHYLINK_NETDEV;
 
-	phylink = phylink_create(dev, node, phy_mode, &phylink_ops);
+	phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
 		fail probe;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index b04581249f0b..4cdf84c63320 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -431,6 +431,12 @@
 				compatible = "fsl,enetc";
 				reg = <0x000100 0 0 0 0>;
 			};
+			ethernet@0,4 {
+				compatible = "fsl,enetc-ptp";
+				reg = <0x000400 0 0 0 0>;
+				clocks = <&clockgen 4 0>;
+				little-endian;
+			};
 		};
 	};
 };
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 2bae201aa365..63a9f33aa43e 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -116,6 +116,32 @@
 			};
 		};
 
+		eth0: ethernet@19000000 {
+			compatible = "qca,ar9330-eth";
+			reg = <0x19000000 0x200>;
+			interrupts = <4>;
+
+			resets = <&rst 9>, <&rst 22>;
+			reset-names = "mac", "mdio";
+			clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+			clock-names = "eth", "mdio";
+
+			status = "disabled";
+		};
+
+		eth1: ethernet@1a000000 {
+			compatible = "qca,ar9330-eth";
+			reg = <0x1a000000 0x200>;
+			interrupts = <5>;
+
+			resets = <&rst 13>, <&rst 23>;
+			reset-names = "mac", "mdio";
+			clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+			clock-names = "eth", "mdio";
+
+			status = "disabled";
+		};
+
 		usb: usb@1b000100 {
 			compatible = "chipidea,usb2";
 			reg = <0x1b000000 0x200>;
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index e7af2cf5f4c1..77bab823eb3b 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -76,3 +76,11 @@
 		reg = <0>;
 	};
 };
+
+&eth0 {
+	status = "okay";
+};
+
+&eth1 {
+	status = "okay";
+};
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 272800692088..964687534754 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -337,7 +337,7 @@ static int i2c_acpi_find_match_device(struct device *dev, void *data)
 	return ACPI_COMPANION(dev) == data;
 }
 
-static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
 {
 	struct device *dev;
 
@@ -345,6 +345,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
 			      i2c_acpi_find_match_adapter);
 	return dev ? i2c_verify_adapter(dev) : NULL;
 }
+EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);
 
 static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
 {
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 083c2c00a8e9..5ebf3c53b3fb 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -312,7 +312,8 @@ static void qedr_free_mem_sb(struct qedr_dev *dev,
 			     struct qed_sb_info *sb_info, int sb_id)
 {
 	if (sb_info->sb_virt) {
-		dev->ops->common->sb_release(dev->cdev, sb_info, sb_id);
+		dev->ops->common->sb_release(dev->cdev, sb_info, sb_id,
+					     QED_SB_TYPE_CNQ);
 		dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt),
 				  (void *)sb_info->sb_virt, sb_info->sb_phys);
 	}
@@ -504,11 +505,13 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
 static void qedr_sync_free_irqs(struct qedr_dev *dev)
 {
 	u32 vector;
+	u16 idx;
 	int i;
 
 	for (i = 0; i < dev->int_info.used_cnt; i++) {
 		if (dev->int_info.msix_cnt) {
-			vector = dev->int_info.msix[i * dev->num_hwfns].vector;
+			idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+			vector = dev->int_info.msix[idx].vector;
 			synchronize_irq(vector);
 			free_irq(vector, &dev->cnq_array[i]);
 		}
@@ -520,6 +523,7 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
 static int qedr_req_msix_irqs(struct qedr_dev *dev)
 {
 	int i, rc = 0;
+	u16 idx;
 
 	if (dev->num_cnq > dev->int_info.msix_cnt) {
 		DP_ERR(dev,
@@ -529,7 +533,8 @@ static int qedr_req_msix_irqs(struct qedr_dev *dev)
 	}
 
 	for (i = 0; i < dev->num_cnq; i++) {
-		rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector,
+		idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+		rc = request_irq(dev->int_info.msix[idx].vector,
 				 qedr_irq_handler, 0, dev->cnq_array[i].name,
 				 &dev->cnq_array[i]);
 		if (rc) {
@@ -866,6 +871,16 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 	dev->user_dpm_enabled = dev_info.user_dpm_enabled;
 	dev->rdma_type = dev_info.rdma_type;
 	dev->num_hwfns = dev_info.common.num_hwfns;
+
+	if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) {
+		rc = dev->ops->iwarp_set_engine_affin(cdev, false);
+		if (rc) {
+			DP_ERR(dev, "iWARP is disabled over a 100g device Enabling it may impact L2 performance. To enable it run devlink dev param set <dev> name iwarp_cmt value true cmode runtime\n");
+			goto init_err;
+		}
+	}
+	dev->affin_hwfn_idx = dev->ops->common->get_affin_hwfn_idx(cdev);
+
 	dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev);
 
 	dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
@@ -926,6 +941,10 @@ static void qedr_remove(struct qedr_dev *dev)
 	qedr_stop_hw(dev);
 	qedr_sync_free_irqs(dev);
 	qedr_free_resources(dev);
+
+	if (IS_IWARP(dev) && QEDR_IS_CMT(dev))
+		dev->ops->iwarp_set_engine_affin(dev->cdev, true);
+
 	ib_dealloc_device(&dev->ibdev);
 }
 
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 6175d1e98717..a92ca22e5de1 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -157,6 +157,8 @@ struct qedr_dev {
 	u32			dp_module;
 	u8			dp_level;
 	u8			num_hwfns;
+#define QEDR_IS_CMT(dev)        ((dev)->num_hwfns > 1)
+	u8			affin_hwfn_idx;
 	u8			gsi_ll2_handle;
 
 	uint			wq_multiplier;
diff --git a/drivers/media/dvb-frontends/tua6100.c b/drivers/media/dvb-frontends/tua6100.c
index b233b7be0b84..e6aaf4973aef 100644
--- a/drivers/media/dvb-frontends/tua6100.c
+++ b/drivers/media/dvb-frontends/tua6100.c
@@ -75,8 +75,8 @@ static int tua6100_set_params(struct dvb_frontend *fe)
 	struct i2c_msg msg1 = { .addr = priv->i2c_address, .flags = 0, .buf = reg1, .len = 4 };
 	struct i2c_msg msg2 = { .addr = priv->i2c_address, .flags = 0, .buf = reg2, .len = 3 };
 
-#define _R 4
-#define _P 32
+#define _R_VAL 4
+#define _P_VAL 32
 #define _ri 4000000
 
 	// setup register 0
@@ -91,14 +91,14 @@ static int tua6100_set_params(struct dvb_frontend *fe)
 	else
 		reg1[1] = 0x0c;
 
-	if (_P == 64)
+	if (_P_VAL == 64)
 		reg1[1] |= 0x40;
 	if (c->frequency >= 1525000)
 		reg1[1] |= 0x80;
 
 	// register 2
-	reg2[1] = (_R >> 8) & 0x03;
-	reg2[2] = _R;
+	reg2[1] = (_R_VAL >> 8) & 0x03;
+	reg2[2] = _R_VAL;
 	if (c->frequency < 1455000)
 		reg2[1] |= 0x1c;
 	else if (c->frequency < 1630000)
@@ -110,18 +110,18 @@ static int tua6100_set_params(struct dvb_frontend *fe)
 	 * The N divisor ratio (note: c->frequency is in kHz, but we
 	 * need it in Hz)
 	 */
-	prediv = (c->frequency * _R) / (_ri / 1000);
-	div = prediv / _P;
+	prediv = (c->frequency * _R_VAL) / (_ri / 1000);
+	div = prediv / _P_VAL;
 	reg1[1] |= (div >> 9) & 0x03;
 	reg1[2] = div >> 1;
 	reg1[3] = (div << 7);
-	priv->frequency = ((div * _P) * (_ri / 1000)) / _R;
+	priv->frequency = ((div * _P_VAL) * (_ri / 1000)) / _R_VAL;
 
 	// Finally, calculate and store the value for A
-	reg1[3] |= (prediv - (div*_P)) & 0x7f;
+	reg1[3] |= (prediv - (div*_P_VAL)) & 0x7f;
 
-#undef _R
-#undef _P
+#undef _R_VAL
+#undef _P_VAL
 #undef _ri
 
 	if (fe->ops.i2c_gate_ctrl)
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 0663b78a2f6c..cfdefd9f1905 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -734,15 +734,16 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	return sja1105_clocking_setup_port(priv, port);
 }
 
-static void sja1105_adjust_link(struct dsa_switch *ds, int port,
-				struct phy_device *phydev)
+static void sja1105_mac_config(struct dsa_switch *ds, int port,
+			       unsigned int link_an_mode,
+			       const struct phylink_link_state *state)
 {
 	struct sja1105_private *priv = ds->priv;
 
-	if (!phydev->link)
+	if (!state->link)
 		sja1105_adjust_port_config(priv, port, 0, false);
 	else
-		sja1105_adjust_port_config(priv, port, phydev->speed, true);
+		sja1105_adjust_port_config(priv, port, state->speed, true);
 }
 
 static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
@@ -1515,9 +1516,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds,
 static const struct dsa_switch_ops sja1105_switch_ops = {
 	.get_tag_protocol	= sja1105_get_tag_protocol,
 	.setup			= sja1105_setup,
-	.adjust_link		= sja1105_adjust_link,
 	.set_ageing_time	= sja1105_set_ageing_time,
 	.phylink_validate	= sja1105_phylink_validate,
+	.phylink_mac_config	= sja1105_mac_config,
 	.get_strings		= sja1105_get_strings,
 	.get_ethtool_stats	= sja1105_get_ethtool_stats,
 	.get_sset_count		= sja1105_get_sset_count,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index b3c992b0abb0..7e90e62da389 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -442,7 +442,7 @@ const char *sja1105_static_config_error_msg[] = {
 		"vl-forwarding-parameters-table.partspc.",
 };
 
-sja1105_config_valid_t
+static sja1105_config_valid_t
 static_config_check_memory_size(const struct sja1105_table *tables)
 {
 	const struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index 953ff1f9ac70..0058051ba925 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -6,7 +6,7 @@
 config NET_VENDOR_ATHEROS
 	bool "Atheros devices"
 	default y
-	depends on PCI
+	depends on (PCI || ATH79)
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -17,6 +17,14 @@ config NET_VENDOR_ATHEROS
 
 if NET_VENDOR_ATHEROS
 
+config AG71XX
+	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
+	depends on ATH79
+	select PHYLIB
+	help
+	  If you wish to compile a kernel for AR7XXX/91XXX and enable
+	  ethernet support, then you should always answer Y to this.
+
 config ATL2
 	tristate "Atheros L2 Fast Ethernet support"
 	depends on PCI
diff --git a/drivers/net/ethernet/atheros/Makefile b/drivers/net/ethernet/atheros/Makefile
index aa3d394b87e6..aca696cb6425 100644
--- a/drivers/net/ethernet/atheros/Makefile
+++ b/drivers/net/ethernet/atheros/Makefile
@@ -3,6 +3,7 @@
 # Makefile for the Atheros network device drivers.
 #
 
+obj-$(CONFIG_AG71XX) += ag71xx.o
 obj-$(CONFIG_ATL1) += atlx/
 obj-$(CONFIG_ATL2) += atlx/
 obj-$(CONFIG_ATL1E) += atl1e/
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
new file mode 100644
index 000000000000..72a57c6cd254
--- /dev/null
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -0,0 +1,1898 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Atheros AR71xx built-in ethernet mac driver
+ *
+ *  Copyright (C) 2019 Oleksij Rempel <o.rempel@pengutronix.de>
+ *
+ *  List of authors contributed to this driver before mainlining:
+ *  Alexander Couzens <lynxis@fe80.eu>
+ *  Christian Lamparter <chunkeey@gmail.com>
+ *  Chuanhong Guo <gch981213@gmail.com>
+ *  Daniel F. Dickinson <cshored@thecshore.com>
+ *  David Bauer <mail@david-bauer.net>
+ *  Felix Fietkau <nbd@nbd.name>
+ *  Gabor Juhos <juhosg@freemail.hu>
+ *  Hauke Mehrtens <hauke@hauke-m.de>
+ *  Johann Neuhauser <johann@it-neuhauser.de>
+ *  John Crispin <john@phrozen.org>
+ *  Jo-Philipp Wich <jo@mein.io>
+ *  Koen Vandeputte <koen.vandeputte@ncentric.com>
+ *  Lucian Cristian <lucian.cristian@gmail.com>
+ *  Matt Merhar <mattmerhar@protonmail.com>
+ *  Milan Krstic <milan.krstic@gmail.com>
+ *  Petr Štetiar <ynezz@true.cz>
+ *  Rosen Penev <rosenp@gmail.com>
+ *  Stephen Walker <stephendwalker+github@gmail.com>
+ *  Vittorio Gambaletta <openwrt@vittgam.net>
+ *  Weijie Gao <hackpascal@gmail.com>
+ *  Imre Kaloz <kaloz@openwrt.org>
+ */
+
+#include <linux/if_vlan.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+
+/* For our NAPI weight bigger does *NOT* mean better - it means more
+ * D-cache misses and lots more wasted cycles than we'll ever
+ * possibly gain from saving instructions.
+ */
+#define AG71XX_NAPI_WEIGHT	32
+#define AG71XX_OOM_REFILL	(1 + HZ / 10)
+
+#define AG71XX_INT_ERR	(AG71XX_INT_RX_BE | AG71XX_INT_TX_BE)
+#define AG71XX_INT_TX	(AG71XX_INT_TX_PS)
+#define AG71XX_INT_RX	(AG71XX_INT_RX_PR | AG71XX_INT_RX_OF)
+
+#define AG71XX_INT_POLL	(AG71XX_INT_RX | AG71XX_INT_TX)
+#define AG71XX_INT_INIT	(AG71XX_INT_ERR | AG71XX_INT_POLL)
+
+#define AG71XX_TX_MTU_LEN	1540
+
+#define AG71XX_TX_RING_SPLIT		512
+#define AG71XX_TX_RING_DS_PER_PKT	DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+						     AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT	128
+#define AG71XX_RX_RING_SIZE_DEFAULT	256
+
+#define AG71XX_MDIO_RETRY	1000
+#define AG71XX_MDIO_DELAY	5
+#define AG71XX_MDIO_MAX_CLK	5000000
+
+/* Register offsets */
+#define AG71XX_REG_MAC_CFG1	0x0000
+#define MAC_CFG1_TXE		BIT(0)	/* Tx Enable */
+#define MAC_CFG1_STX		BIT(1)	/* Synchronize Tx Enable */
+#define MAC_CFG1_RXE		BIT(2)	/* Rx Enable */
+#define MAC_CFG1_SRX		BIT(3)	/* Synchronize Rx Enable */
+#define MAC_CFG1_TFC		BIT(4)	/* Tx Flow Control Enable */
+#define MAC_CFG1_RFC		BIT(5)	/* Rx Flow Control Enable */
+#define MAC_CFG1_SR		BIT(31)	/* Soft Reset */
+#define MAC_CFG1_INIT	(MAC_CFG1_RXE | MAC_CFG1_TXE | \
+			 MAC_CFG1_SRX | MAC_CFG1_STX)
+
+#define AG71XX_REG_MAC_CFG2	0x0004
+#define MAC_CFG2_FDX		BIT(0)
+#define MAC_CFG2_PAD_CRC_EN	BIT(2)
+#define MAC_CFG2_LEN_CHECK	BIT(4)
+#define MAC_CFG2_IF_1000	BIT(9)
+#define MAC_CFG2_IF_10_100	BIT(8)
+
+#define AG71XX_REG_MAC_MFL	0x0010
+
+#define AG71XX_REG_MII_CFG	0x0020
+#define MII_CFG_CLK_DIV_4	0
+#define MII_CFG_CLK_DIV_6	2
+#define MII_CFG_CLK_DIV_8	3
+#define MII_CFG_CLK_DIV_10	4
+#define MII_CFG_CLK_DIV_14	5
+#define MII_CFG_CLK_DIV_20	6
+#define MII_CFG_CLK_DIV_28	7
+#define MII_CFG_CLK_DIV_34	8
+#define MII_CFG_CLK_DIV_42	9
+#define MII_CFG_CLK_DIV_50	10
+#define MII_CFG_CLK_DIV_58	11
+#define MII_CFG_CLK_DIV_66	12
+#define MII_CFG_CLK_DIV_74	13
+#define MII_CFG_CLK_DIV_82	14
+#define MII_CFG_CLK_DIV_98	15
+#define MII_CFG_RESET		BIT(31)
+
+#define AG71XX_REG_MII_CMD	0x0024
+#define MII_CMD_READ		BIT(0)
+
+#define AG71XX_REG_MII_ADDR	0x0028
+#define MII_ADDR_SHIFT		8
+
+#define AG71XX_REG_MII_CTRL	0x002c
+#define AG71XX_REG_MII_STATUS	0x0030
+#define AG71XX_REG_MII_IND	0x0034
+#define MII_IND_BUSY		BIT(0)
+#define MII_IND_INVALID		BIT(2)
+
+#define AG71XX_REG_MAC_IFCTL	0x0038
+#define MAC_IFCTL_SPEED		BIT(16)
+
+#define AG71XX_REG_MAC_ADDR1	0x0040
+#define AG71XX_REG_MAC_ADDR2	0x0044
+#define AG71XX_REG_FIFO_CFG0	0x0048
+#define FIFO_CFG0_WTM		BIT(0)	/* Watermark Module */
+#define FIFO_CFG0_RXS		BIT(1)	/* Rx System Module */
+#define FIFO_CFG0_RXF		BIT(2)	/* Rx Fabric Module */
+#define FIFO_CFG0_TXS		BIT(3)	/* Tx System Module */
+#define FIFO_CFG0_TXF		BIT(4)	/* Tx Fabric Module */
+#define FIFO_CFG0_ALL	(FIFO_CFG0_WTM | FIFO_CFG0_RXS | FIFO_CFG0_RXF \
+			| FIFO_CFG0_TXS | FIFO_CFG0_TXF)
+#define FIFO_CFG0_INIT	(FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT)
+
+#define FIFO_CFG0_ENABLE_SHIFT	8
+
+#define AG71XX_REG_FIFO_CFG1	0x004c
+#define AG71XX_REG_FIFO_CFG2	0x0050
+#define AG71XX_REG_FIFO_CFG3	0x0054
+#define AG71XX_REG_FIFO_CFG4	0x0058
+#define FIFO_CFG4_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG4_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG4_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG4_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG4_CR		BIT(4)	/* CRC error */
+#define FIFO_CFG4_LM		BIT(5)	/* Length Mismatch */
+#define FIFO_CFG4_LO		BIT(6)	/* Length out of range */
+#define FIFO_CFG4_OK		BIT(7)	/* Packet is OK */
+#define FIFO_CFG4_MC		BIT(8)	/* Multicast Packet */
+#define FIFO_CFG4_BC		BIT(9)	/* Broadcast Packet */
+#define FIFO_CFG4_DR		BIT(10)	/* Dribble */
+#define FIFO_CFG4_LE		BIT(11)	/* Long Event */
+#define FIFO_CFG4_CF		BIT(12)	/* Control Frame */
+#define FIFO_CFG4_PF		BIT(13)	/* Pause Frame */
+#define FIFO_CFG4_UO		BIT(14)	/* Unsupported Opcode */
+#define FIFO_CFG4_VT		BIT(15)	/* VLAN tag detected */
+#define FIFO_CFG4_FT		BIT(16)	/* Frame Truncated */
+#define FIFO_CFG4_UC		BIT(17)	/* Unicast Packet */
+#define FIFO_CFG4_INIT	(FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \
+			 FIFO_CFG4_CE | FIFO_CFG4_CR | FIFO_CFG4_LM | \
+			 FIFO_CFG4_LO | FIFO_CFG4_OK | FIFO_CFG4_MC | \
+			 FIFO_CFG4_BC | FIFO_CFG4_DR | FIFO_CFG4_LE | \
+			 FIFO_CFG4_CF | FIFO_CFG4_PF | FIFO_CFG4_UO | \
+			 FIFO_CFG4_VT)
+
+#define AG71XX_REG_FIFO_CFG5	0x005c
+#define FIFO_CFG5_DE		BIT(0)	/* Drop Event */
+#define FIFO_CFG5_DV		BIT(1)	/* RX_DV Event */
+#define FIFO_CFG5_FC		BIT(2)	/* False Carrier */
+#define FIFO_CFG5_CE		BIT(3)	/* Code Error */
+#define FIFO_CFG5_LM		BIT(4)	/* Length Mismatch */
+#define FIFO_CFG5_LO		BIT(5)	/* Length Out of Range */
+#define FIFO_CFG5_OK		BIT(6)	/* Packet is OK */
+#define FIFO_CFG5_MC		BIT(7)	/* Multicast Packet */
+#define FIFO_CFG5_BC		BIT(8)	/* Broadcast Packet */
+#define FIFO_CFG5_DR		BIT(9)	/* Dribble */
+#define FIFO_CFG5_CF		BIT(10)	/* Control Frame */
+#define FIFO_CFG5_PF		BIT(11)	/* Pause Frame */
+#define FIFO_CFG5_UO		BIT(12)	/* Unsupported Opcode */
+#define FIFO_CFG5_VT		BIT(13)	/* VLAN tag detected */
+#define FIFO_CFG5_LE		BIT(14)	/* Long Event */
+#define FIFO_CFG5_FT		BIT(15)	/* Frame Truncated */
+#define FIFO_CFG5_16		BIT(16)	/* unknown */
+#define FIFO_CFG5_17		BIT(17)	/* unknown */
+#define FIFO_CFG5_SF		BIT(18)	/* Short Frame */
+#define FIFO_CFG5_BM		BIT(19)	/* Byte Mode */
+#define FIFO_CFG5_INIT	(FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \
+			 FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \
+			 FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \
+			 FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \
+			 FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \
+			 FIFO_CFG5_17 | FIFO_CFG5_SF)
+
+#define AG71XX_REG_TX_CTRL	0x0180
+#define TX_CTRL_TXE		BIT(0)	/* Tx Enable */
+
+#define AG71XX_REG_TX_DESC	0x0184
+#define AG71XX_REG_TX_STATUS	0x0188
+#define TX_STATUS_PS		BIT(0)	/* Packet Sent */
+#define TX_STATUS_UR		BIT(1)	/* Tx Underrun */
+#define TX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_RX_CTRL	0x018c
+#define RX_CTRL_RXE		BIT(0)	/* Rx Enable */
+
+#define AG71XX_DMA_RETRY	10
+#define AG71XX_DMA_DELAY	1
+
+#define AG71XX_REG_RX_DESC	0x0190
+#define AG71XX_REG_RX_STATUS	0x0194
+#define RX_STATUS_PR		BIT(0)	/* Packet Received */
+#define RX_STATUS_OF		BIT(2)	/* Rx Overflow */
+#define RX_STATUS_BE		BIT(3)	/* Bus Error */
+
+#define AG71XX_REG_INT_ENABLE	0x0198
+#define AG71XX_REG_INT_STATUS	0x019c
+#define AG71XX_INT_TX_PS	BIT(0)
+#define AG71XX_INT_TX_UR	BIT(1)
+#define AG71XX_INT_TX_BE	BIT(3)
+#define AG71XX_INT_RX_PR	BIT(4)
+#define AG71XX_INT_RX_OF	BIT(6)
+#define AG71XX_INT_RX_BE	BIT(7)
+
+#define AG71XX_REG_FIFO_DEPTH	0x01a8
+#define AG71XX_REG_RX_SM	0x01b0
+#define AG71XX_REG_TX_SM	0x01b4
+
+#define ETH_SWITCH_HEADER_LEN	2
+
+#define AG71XX_DEFAULT_MSG_ENABLE	\
+	(NETIF_MSG_DRV			\
+	| NETIF_MSG_PROBE		\
+	| NETIF_MSG_LINK		\
+	| NETIF_MSG_TIMER		\
+	| NETIF_MSG_IFDOWN		\
+	| NETIF_MSG_IFUP		\
+	| NETIF_MSG_RX_ERR		\
+	| NETIF_MSG_TX_ERR)
+
+#define DESC_EMPTY		BIT(31)
+#define DESC_MORE		BIT(24)
+#define DESC_PKTLEN_M		0xfff
+struct ag71xx_desc {
+	u32 data;
+	u32 ctrl;
+	u32 next;
+	u32 pad;
+} __aligned(4);
+
+#define AG71XX_DESC_SIZE	roundup(sizeof(struct ag71xx_desc), \
+					L1_CACHE_BYTES)
+
+struct ag71xx_buf {
+	union {
+		struct {
+			struct sk_buff *skb;
+			unsigned int len;
+		} tx;
+		struct {
+			dma_addr_t dma_addr;
+			void *rx_buf;
+		} rx;
+	};
+};
+
+struct ag71xx_ring {
+	/* "Hot" fields in the data path. */
+	unsigned int curr;
+	unsigned int dirty;
+
+	/* "Cold" fields - not used in the data path. */
+	struct ag71xx_buf *buf;
+	u16 order;
+	u16 desc_split;
+	dma_addr_t descs_dma;
+	u8 *descs_cpu;
+};
+
+enum ag71xx_type {
+	AR7100,
+	AR7240,
+	AR9130,
+	AR9330,
+	AR9340,
+	QCA9530,
+	QCA9550,
+};
+
+struct ag71xx_dcfg {
+	u32 max_frame_len;
+	const u32 *fifodata;
+	u16 desc_pktlen_mask;
+	bool tx_hang_workaround;
+	enum ag71xx_type type;
+};
+
+struct ag71xx {
+	/* Critical data related to the per-packet data path are clustered
+	 * early in this structure to help improve the D-cache footprint.
+	 */
+	struct ag71xx_ring rx_ring ____cacheline_aligned;
+	struct ag71xx_ring tx_ring ____cacheline_aligned;
+
+	u16 rx_buf_size;
+	u8 rx_buf_offset;
+
+	struct net_device *ndev;
+	struct platform_device *pdev;
+	struct napi_struct napi;
+	u32 msg_enable;
+	const struct ag71xx_dcfg *dcfg;
+
+	/* From this point onwards we're not looking at per-packet fields. */
+	void __iomem *mac_base;
+
+	struct ag71xx_desc *stop_desc;
+	dma_addr_t stop_desc_dma;
+
+	int phy_if_mode;
+
+	struct delayed_work restart_work;
+	struct timer_list oom_timer;
+
+	struct reset_control *mac_reset;
+
+	u32 fifodata[3];
+	int mac_idx;
+
+	struct reset_control *mdio_reset;
+	struct mii_bus *mii_bus;
+	struct clk *clk_mdio;
+	struct clk *clk_eth;
+};
+
+static int ag71xx_desc_empty(struct ag71xx_desc *desc)
+{
+	return (desc->ctrl & DESC_EMPTY) != 0;
+}
+
+static struct ag71xx_desc *ag71xx_ring_desc(struct ag71xx_ring *ring, int idx)
+{
+	return (struct ag71xx_desc *)&ring->descs_cpu[idx * AG71XX_DESC_SIZE];
+}
+
+static int ag71xx_ring_size_order(int size)
+{
+	return fls(size - 1);
+}
+
+static bool ag71xx_is(struct ag71xx *ag, enum ag71xx_type type)
+{
+	return ag->dcfg->type == type;
+}
+
+static void ag71xx_wr(struct ag71xx *ag, unsigned int reg, u32 value)
+{
+	iowrite32(value, ag->mac_base + reg);
+	/* flush write */
+	(void)ioread32(ag->mac_base + reg);
+}
+
+static u32 ag71xx_rr(struct ag71xx *ag, unsigned int reg)
+{
+	return ioread32(ag->mac_base + reg);
+}
+
+static void ag71xx_sb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) | mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_cb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+	void __iomem *r;
+
+	r = ag->mac_base + reg;
+	iowrite32(ioread32(r) & ~mask, r);
+	/* flush write */
+	(void)ioread32(r);
+}
+
+static void ag71xx_int_enable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_sb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
+{
+	ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_MDIO_RETRY; i++) {
+		u32 busy;
+
+		udelay(AG71XX_MDIO_DELAY);
+
+		busy = ag71xx_rr(ag, AG71XX_REG_MII_IND);
+		if (!busy)
+			return 0;
+
+		udelay(AG71XX_MDIO_DELAY);
+	}
+
+	netif_err(ag, link, ndev, "MDIO operation timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static int ag71xx_mdio_mii_read(struct mii_bus *bus, int addr, int reg)
+{
+	struct ag71xx *ag = bus->priv;
+	int err, val;
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	/* enable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, MII_CMD_READ);
+
+	err = ag71xx_mdio_wait_busy(ag);
+	if (err)
+		return err;
+
+	val = ag71xx_rr(ag, AG71XX_REG_MII_STATUS);
+	/* disable read mode */
+	ag71xx_wr(ag, AG71XX_REG_MII_CMD, 0);
+
+	netif_dbg(ag, link, ag->ndev, "mii_read: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	return val;
+}
+
+static int ag71xx_mdio_mii_write(struct mii_bus *bus, int addr, int reg,
+				 u16 val)
+{
+	struct ag71xx *ag = bus->priv;
+
+	netif_dbg(ag, link, ag->ndev, "mii_write: addr=%04x, reg=%04x, value=%04x\n",
+		  addr, reg, val);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+		  ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+	ag71xx_wr(ag, AG71XX_REG_MII_CTRL, val);
+
+	return ag71xx_mdio_wait_busy(ag);
+}
+
+static const u32 ar71xx_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28,
+};
+
+static const u32 ar7240_mdio_div_table[] = {
+	2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96,
+};
+
+static const u32 ar933x_mdio_div_table[] = {
+	4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98,
+};
+
+static int ag71xx_mdio_get_divider(struct ag71xx *ag, u32 *div)
+{
+	unsigned long ref_clock;
+	const u32 *table;
+	int ndivs, i;
+
+	ref_clock = clk_get_rate(ag->clk_mdio);
+	if (!ref_clock)
+		return -EINVAL;
+
+	if (ag71xx_is(ag, AR9330) || ag71xx_is(ag, AR9340)) {
+		table = ar933x_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar933x_mdio_div_table);
+	} else if (ag71xx_is(ag, AR7240)) {
+		table = ar7240_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar7240_mdio_div_table);
+	} else {
+		table = ar71xx_mdio_div_table;
+		ndivs = ARRAY_SIZE(ar71xx_mdio_div_table);
+	}
+
+	for (i = 0; i < ndivs; i++) {
+		unsigned long t;
+
+		t = ref_clock / table[i];
+		if (t <= AG71XX_MDIO_MAX_CLK) {
+			*div = i;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int ag71xx_mdio_reset(struct mii_bus *bus)
+{
+	struct ag71xx *ag = bus->priv;
+	int err;
+	u32 t;
+
+	err = ag71xx_mdio_get_divider(ag, &t);
+	if (err)
+		return err;
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t | MII_CFG_RESET);
+	usleep_range(100, 200);
+
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, t);
+	usleep_range(100, 200);
+
+	return 0;
+}
+
+static int ag71xx_mdio_probe(struct ag71xx *ag)
+{
+	struct device *dev = &ag->pdev->dev;
+	struct net_device *ndev = ag->ndev;
+	static struct mii_bus *mii_bus;
+	struct device_node *np;
+	int err;
+
+	np = dev->of_node;
+	ag->mii_bus = NULL;
+
+	ag->clk_mdio = devm_clk_get(dev, "mdio");
+	if (IS_ERR(ag->clk_mdio)) {
+		netif_err(ag, probe, ndev, "Failed to get mdio clk.\n");
+		return PTR_ERR(ag->clk_mdio);
+	}
+
+	err = clk_prepare_enable(ag->clk_mdio);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n");
+		return err;
+	}
+
+	mii_bus = devm_mdiobus_alloc(dev);
+	if (!mii_bus) {
+		err = -ENOMEM;
+		goto mdio_err_put_clk;
+	}
+
+	ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio");
+	if (IS_ERR(ag->mdio_reset)) {
+		netif_err(ag, probe, ndev, "Failed to get reset mdio.\n");
+		return PTR_ERR(ag->mdio_reset);
+	}
+
+	mii_bus->name = "ag71xx_mdio";
+	mii_bus->read = ag71xx_mdio_mii_read;
+	mii_bus->write = ag71xx_mdio_mii_write;
+	mii_bus->reset = ag71xx_mdio_reset;
+	mii_bus->priv = ag;
+	mii_bus->parent = dev;
+	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx);
+
+	if (!IS_ERR(ag->mdio_reset)) {
+		reset_control_assert(ag->mdio_reset);
+		msleep(100);
+		reset_control_deassert(ag->mdio_reset);
+		msleep(200);
+	}
+
+	err = of_mdiobus_register(mii_bus, np);
+	if (err)
+		goto mdio_err_put_clk;
+
+	ag->mii_bus = mii_bus;
+
+	return 0;
+
+mdio_err_put_clk:
+	clk_disable_unprepare(ag->clk_mdio);
+	return err;
+}
+
+static void ag71xx_mdio_remove(struct ag71xx *ag)
+{
+	if (ag->mii_bus)
+		mdiobus_unregister(ag->mii_bus);
+	clk_disable_unprepare(ag->clk_mdio);
+}
+
+static void ag71xx_hw_stop(struct ag71xx *ag)
+{
+	/* disable all interrupts and stop the rx/tx engine */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, 0);
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+}
+
+static bool ag71xx_check_dma_stuck(struct ag71xx *ag)
+{
+	unsigned long timestamp;
+	u32 rx_sm, tx_sm, rx_fd;
+
+	timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start;
+	if (likely(time_before(jiffies, timestamp + HZ / 10)))
+		return false;
+
+	if (!netif_carrier_ok(ag->ndev))
+		return false;
+
+	rx_sm = ag71xx_rr(ag, AG71XX_REG_RX_SM);
+	if ((rx_sm & 0x7) == 0x3 && ((rx_sm >> 4) & 0x7) == 0x6)
+		return true;
+
+	tx_sm = ag71xx_rr(ag, AG71XX_REG_TX_SM);
+	rx_fd = ag71xx_rr(ag, AG71XX_REG_FIFO_DEPTH);
+	if (((tx_sm >> 4) & 0x7) == 0 && ((rx_sm & 0x7) == 0) &&
+	    ((rx_sm >> 4) & 0x7) == 0 && rx_fd == 0)
+		return true;
+
+	return false;
+}
+
+static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int sent = 0, bytes_compl = 0, n = 0;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size;
+	bool dma_stuck = false;
+
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, tx_queued, ndev, "processing TX ring\n");
+
+	while (ring->dirty + n != ring->curr) {
+		struct ag71xx_desc *desc;
+		struct sk_buff *skb;
+		unsigned int i;
+
+		i = (ring->dirty + n) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+		skb = ring->buf[i].tx.skb;
+
+		if (!flush && !ag71xx_desc_empty(desc)) {
+			if (ag->dcfg->tx_hang_workaround &&
+			    ag71xx_check_dma_stuck(ag)) {
+				schedule_delayed_work(&ag->restart_work,
+						      HZ / 2);
+				dma_stuck = true;
+			}
+			break;
+		}
+
+		if (flush)
+			desc->ctrl |= DESC_EMPTY;
+
+		n++;
+		if (!skb)
+			continue;
+
+		dev_kfree_skb_any(skb);
+		ring->buf[i].tx.skb = NULL;
+
+		bytes_compl += ring->buf[i].tx.len;
+
+		sent++;
+		ring->dirty += n;
+
+		while (n > 0) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+			n--;
+		}
+	}
+
+	netif_dbg(ag, tx_done, ndev, "%d packets sent out\n", sent);
+
+	if (!sent)
+		return 0;
+
+	ag->ndev->stats.tx_bytes += bytes_compl;
+	ag->ndev->stats.tx_packets += sent;
+
+	netdev_completed_queue(ag->ndev, sent, bytes_compl);
+	if ((ring->curr - ring->dirty) < (ring_size * 3) / 4)
+		netif_wake_queue(ag->ndev);
+
+	if (!dma_stuck)
+		cancel_delayed_work(&ag->restart_work);
+
+	return sent;
+}
+
+static void ag71xx_dma_wait_stop(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	int i;
+
+	for (i = 0; i < AG71XX_DMA_RETRY; i++) {
+		u32 rx, tx;
+
+		mdelay(AG71XX_DMA_DELAY);
+
+		rx = ag71xx_rr(ag, AG71XX_REG_RX_CTRL) & RX_CTRL_RXE;
+		tx = ag71xx_rr(ag, AG71XX_REG_TX_CTRL) & TX_CTRL_TXE;
+		if (!rx && !tx)
+			return;
+	}
+
+	netif_err(ag, hw, ndev, "DMA stop operation timed out\n");
+}
+
+static void ag71xx_dma_reset(struct ag71xx *ag)
+{
+	struct net_device *ndev = ag->ndev;
+	u32 val;
+	int i;
+
+	/* stop RX and TX */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+
+	/* give the hardware some time to really stop all rx/tx activity
+	 * clearing the descriptors too early causes random memory corruption
+	 */
+	ag71xx_dma_wait_stop(ag);
+
+	/* clear descriptor addresses */
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->stop_desc_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->stop_desc_dma);
+
+	/* clear pending RX/TX interrupts */
+	for (i = 0; i < 256; i++) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+		ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+	}
+
+	/* clear pending errors */
+	ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE | RX_STATUS_OF);
+	ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE | TX_STATUS_UR);
+
+	val = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Rx status: %08x\n",
+			  val);
+
+	val = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+
+	/* mask out reserved bits */
+	val &= ~0xff000000;
+
+	if (val)
+		netif_err(ag, hw, ndev, "unable to clear DMA Tx status: %08x\n",
+			  val);
+}
+
+static void ag71xx_hw_setup(struct ag71xx *ag)
+{
+	u32 init = MAC_CFG1_INIT;
+
+	/* setup MAC configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, init);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG2,
+		  MAC_CFG2_PAD_CRC_EN | MAC_CFG2_LEN_CHECK);
+
+	/* setup max frame length to zero */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, 0);
+
+	/* setup FIFO configuration registers */
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG0, FIFO_CFG0_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG1, ag->fifodata[0]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG2, ag->fifodata[1]);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG4, FIFO_CFG4_INIT);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, FIFO_CFG5_INIT);
+}
+
+static unsigned int ag71xx_max_frame_len(unsigned int mtu)
+{
+	return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+}
+
+static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
+{
+	u32 t;
+
+	t = (((u32)mac[5]) << 24) | (((u32)mac[4]) << 16)
+	  | (((u32)mac[3]) << 8) | ((u32)mac[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR1, t);
+
+	t = (((u32)mac[1]) << 24) | (((u32)mac[0]) << 16);
+	ag71xx_wr(ag, AG71XX_REG_MAC_ADDR2, t);
+}
+
+static void ag71xx_fast_reset(struct ag71xx *ag)
+{
+	struct net_device *dev = ag->ndev;
+	u32 rx_ds;
+	u32 mii_reg;
+
+	ag71xx_hw_stop(ag);
+
+	mii_reg = ag71xx_rr(ag, AG71XX_REG_MII_CFG);
+	rx_ds = ag71xx_rr(ag, AG71XX_REG_RX_DESC);
+
+	ag71xx_tx_packets(ag, true);
+
+	reset_control_assert(ag->mac_reset);
+	usleep_range(10, 20);
+	reset_control_deassert(ag->mac_reset);
+	usleep_range(10, 20);
+
+	ag71xx_dma_reset(ag);
+	ag71xx_hw_setup(ag);
+	ag->tx_ring.curr = 0;
+	ag->tx_ring.dirty = 0;
+	netdev_reset_queue(ag->ndev);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ag->ndev->mtu));
+
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, rx_ds);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_MII_CFG, mii_reg);
+
+	ag71xx_hw_set_macaddr(ag, dev->dev_addr);
+}
+
+static void ag71xx_hw_start(struct ag71xx *ag)
+{
+	/* start RX engine */
+	ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+
+	/* enable interrupts */
+	ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, AG71XX_INT_INIT);
+
+	netif_wake_queue(ag->ndev);
+}
+
+static void ag71xx_link_adjust(struct ag71xx *ag, bool update)
+{
+	struct phy_device *phydev = ag->ndev->phydev;
+	u32 cfg2;
+	u32 ifctl;
+	u32 fifo5;
+
+	if (!phydev->link && update) {
+		ag71xx_hw_stop(ag);
+		return;
+	}
+
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag71xx_fast_reset(ag);
+
+	cfg2 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG2);
+	cfg2 &= ~(MAC_CFG2_IF_1000 | MAC_CFG2_IF_10_100 | MAC_CFG2_FDX);
+	cfg2 |= (phydev->duplex) ? MAC_CFG2_FDX : 0;
+
+	ifctl = ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL);
+	ifctl &= ~(MAC_IFCTL_SPEED);
+
+	fifo5 = ag71xx_rr(ag, AG71XX_REG_FIFO_CFG5);
+	fifo5 &= ~FIFO_CFG5_BM;
+
+	switch (phydev->speed) {
+	case SPEED_1000:
+		cfg2 |= MAC_CFG2_IF_1000;
+		fifo5 |= FIFO_CFG5_BM;
+		break;
+	case SPEED_100:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		ifctl |= MAC_IFCTL_SPEED;
+		break;
+	case SPEED_10:
+		cfg2 |= MAC_CFG2_IF_10_100;
+		break;
+	default:
+		WARN(1, "not supported speed %i\n", phydev->speed);
+		return;
+	}
+
+	if (ag->tx_ring.desc_split) {
+		ag->fifodata[2] &= 0xffff;
+		ag->fifodata[2] |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, ag->fifodata[2]);
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG2, cfg2);
+	ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5);
+	ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl);
+
+	ag71xx_hw_start(ag);
+
+	if (update)
+		phy_print_status(phydev);
+}
+
+static void ag71xx_phy_link_adjust(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ag71xx_link_adjust(ag, true);
+}
+
+static int ag71xx_phy_connect(struct ag71xx *ag)
+{
+	struct device_node *np = ag->pdev->dev.of_node;
+	struct net_device *ndev = ag->ndev;
+	struct device_node *phy_node;
+	struct phy_device *phydev;
+	int ret;
+
+	if (of_phy_is_fixed_link(np)) {
+		ret = of_phy_register_fixed_link(np);
+		if (ret < 0) {
+			netif_err(ag, probe, ndev, "Failed to register fixed PHY link: %d\n",
+				  ret);
+			return ret;
+		}
+
+		phy_node = of_node_get(np);
+	} else {
+		phy_node = of_parse_phandle(np, "phy-handle", 0);
+	}
+
+	if (!phy_node) {
+		netif_err(ag, probe, ndev, "Could not find valid phy node\n");
+		return -ENODEV;
+	}
+
+	phydev = of_phy_connect(ag->ndev, phy_node, ag71xx_phy_link_adjust,
+				0, ag->phy_if_mode);
+
+	of_node_put(phy_node);
+
+	if (!phydev) {
+		netif_err(ag, probe, ndev, "Could not connect to PHY device\n");
+		return -ENODEV;
+	}
+
+	phy_attached_info(phydev);
+
+	return 0;
+}
+
+static void ag71xx_ring_tx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	u32 bytes_compl = 0, pkts_compl = 0;
+	struct net_device *ndev = ag->ndev;
+
+	while (ring->curr != ring->dirty) {
+		struct ag71xx_desc *desc;
+		u32 i = ring->dirty & ring_mask;
+
+		desc = ag71xx_ring_desc(ring, i);
+		if (!ag71xx_desc_empty(desc)) {
+			desc->ctrl = 0;
+			ndev->stats.tx_errors++;
+		}
+
+		if (ring->buf[i].tx.skb) {
+			bytes_compl += ring->buf[i].tx.len;
+			pkts_compl++;
+			dev_kfree_skb_any(ring->buf[i].tx.skb);
+		}
+		ring->buf[i].tx.skb = NULL;
+		ring->dirty++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+}
+
+static void ag71xx_ring_tx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->tx_ring;
+	int ring_size = BIT(ring->order);
+	int ring_mask = ring_size - 1;
+	int i;
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		desc->ctrl = DESC_EMPTY;
+		ring->buf[i].tx.skb = NULL;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_ring_rx_clean(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_size = BIT(ring->order);
+	int i;
+
+	if (!ring->buf)
+		return;
+
+	for (i = 0; i < ring_size; i++)
+		if (ring->buf[i].rx.rx_buf) {
+			dma_unmap_single(&ag->pdev->dev,
+					 ring->buf[i].rx.dma_addr,
+					 ag->rx_buf_size, DMA_FROM_DEVICE);
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+		}
+}
+
+static int ag71xx_buffer_size(struct ag71xx *ag)
+{
+	return ag->rx_buf_size +
+	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+			       int offset,
+			       void *(*alloc)(unsigned int size))
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct ag71xx_desc *desc;
+	void *data;
+
+	desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]);
+
+	data = alloc(ag71xx_buffer_size(ag));
+	if (!data)
+		return false;
+
+	buf->rx.rx_buf = data;
+	buf->rx.dma_addr = dma_map_single(&ag->pdev->dev, data, ag->rx_buf_size,
+					  DMA_FROM_DEVICE);
+	desc->data = (u32)buf->rx.dma_addr + offset;
+	return true;
+}
+
+static int ag71xx_ring_rx_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	struct net_device *ndev = ag->ndev;
+	int ring_mask = BIT(ring->order) - 1;
+	int ring_size = BIT(ring->order);
+	unsigned int i;
+	int ret;
+
+	ret = 0;
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		desc->next = (u32)(ring->descs_dma +
+			AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+		netif_dbg(ag, rx_status, ndev, "RX desc at %p, next is %08x\n",
+			  desc, desc->next);
+	}
+
+	for (i = 0; i < ring_size; i++) {
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], ag->rx_buf_offset,
+					netdev_alloc_frag)) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		desc->ctrl = DESC_EMPTY;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	ring->curr = 0;
+	ring->dirty = 0;
+
+	return ret;
+}
+
+static int ag71xx_ring_rx_refill(struct ag71xx *ag)
+{
+	struct ag71xx_ring *ring = &ag->rx_ring;
+	int ring_mask = BIT(ring->order) - 1;
+	int offset = ag->rx_buf_offset;
+	unsigned int count;
+
+	count = 0;
+	for (; ring->curr - ring->dirty > 0; ring->dirty++) {
+		struct ag71xx_desc *desc;
+		unsigned int i;
+
+		i = ring->dirty & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ring->buf[i].rx.rx_buf &&
+		    !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset,
+					napi_alloc_frag))
+			break;
+
+		desc->ctrl = DESC_EMPTY;
+		count++;
+	}
+
+	/* flush descriptors */
+	wmb();
+
+	netif_dbg(ag, rx_status, ag->ndev, "%u rx descriptors refilled\n",
+		  count);
+
+	return count;
+}
+
+static int ag71xx_rings_init(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size, tx_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+	tx_size = BIT(tx->order);
+
+	tx->buf = kcalloc(ring_size, sizeof(*tx->buf), GFP_KERNEL);
+	if (!tx->buf)
+		return -ENOMEM;
+
+	tx->descs_cpu = dma_alloc_coherent(&ag->pdev->dev,
+					   ring_size * AG71XX_DESC_SIZE,
+					   &tx->descs_dma, GFP_ATOMIC);
+	if (!tx->descs_cpu) {
+		kfree(tx->buf);
+		tx->buf = NULL;
+		return -ENOMEM;
+	}
+
+	rx->buf = &tx->buf[BIT(tx->order)];
+	rx->descs_cpu = ((void *)tx->descs_cpu) + tx_size * AG71XX_DESC_SIZE;
+	rx->descs_dma = tx->descs_dma + tx_size * AG71XX_DESC_SIZE;
+
+	ag71xx_ring_tx_init(ag);
+	return ag71xx_ring_rx_init(ag);
+}
+
+static void ag71xx_rings_free(struct ag71xx *ag)
+{
+	struct ag71xx_ring *tx = &ag->tx_ring;
+	struct ag71xx_ring *rx = &ag->rx_ring;
+	int ring_size;
+
+	ring_size = BIT(tx->order) + BIT(rx->order);
+
+	if (tx->descs_cpu)
+		dma_free_coherent(&ag->pdev->dev, ring_size * AG71XX_DESC_SIZE,
+				  tx->descs_cpu, tx->descs_dma);
+
+	kfree(tx->buf);
+
+	tx->descs_cpu = NULL;
+	rx->descs_cpu = NULL;
+	tx->buf = NULL;
+	rx->buf = NULL;
+}
+
+static void ag71xx_rings_cleanup(struct ag71xx *ag)
+{
+	ag71xx_ring_rx_clean(ag);
+	ag71xx_ring_tx_clean(ag);
+	ag71xx_rings_free(ag);
+
+	netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_hw_init(struct ag71xx *ag)
+{
+	ag71xx_hw_stop(ag);
+
+	ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR);
+	usleep_range(20, 30);
+
+	reset_control_assert(ag->mac_reset);
+	msleep(100);
+	reset_control_deassert(ag->mac_reset);
+	msleep(200);
+
+	ag71xx_hw_setup(ag);
+
+	ag71xx_dma_reset(ag);
+}
+
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+	int ret;
+
+	ret = ag71xx_rings_init(ag);
+	if (ret)
+		return ret;
+
+	napi_enable(&ag->napi);
+	ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+	ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+	netif_start_queue(ag->ndev);
+
+	return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+	netif_stop_queue(ag->ndev);
+
+	ag71xx_hw_stop(ag);
+	ag71xx_dma_reset(ag);
+
+	napi_disable(&ag->napi);
+	del_timer_sync(&ag->oom_timer);
+
+	ag71xx_rings_cleanup(ag);
+}
+
+static int ag71xx_open(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+	unsigned int max_frame_len;
+	int ret;
+
+	max_frame_len = ag71xx_max_frame_len(ndev->mtu);
+	ag->rx_buf_size =
+		SKB_DATA_ALIGN(max_frame_len + NET_SKB_PAD + NET_IP_ALIGN);
+
+	/* setup max frame length */
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+	ag71xx_hw_set_macaddr(ag, ndev->dev_addr);
+
+	ret = ag71xx_hw_enable(ag);
+	if (ret)
+		goto err;
+
+	ret = ag71xx_phy_connect(ag);
+	if (ret)
+		goto err;
+
+	phy_start(ndev->phydev);
+
+	return 0;
+
+err:
+	ag71xx_rings_cleanup(ag);
+	return ret;
+}
+
+static int ag71xx_stop(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	phy_stop(ndev->phydev);
+	phy_disconnect(ndev->phydev);
+	ag71xx_hw_disable(ag);
+
+	return 0;
+}
+
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+	int i, ring_mask, ndesc, split;
+	struct ag71xx_desc *desc;
+
+	ring_mask = BIT(ring->order) - 1;
+	ndesc = 0;
+	split = ring->desc_split;
+
+	if (!split)
+		split = len;
+
+	while (len > 0) {
+		unsigned int cur_len = len;
+
+		i = (ring->curr + ndesc) & ring_mask;
+		desc = ag71xx_ring_desc(ring, i);
+
+		if (!ag71xx_desc_empty(desc))
+			return -1;
+
+		if (cur_len > split) {
+			cur_len = split;
+
+			/*  TX will hang if DMA transfers <= 4 bytes,
+			 * make sure next segment is more than 4 bytes long.
+			 */
+			if (len <= split + 4)
+				cur_len -= 4;
+		}
+
+		desc->data = addr;
+		addr += cur_len;
+		len -= cur_len;
+
+		if (len > 0)
+			cur_len |= DESC_MORE;
+
+		/* prevent early tx attempt of this descriptor */
+		if (!ndesc)
+			cur_len |= DESC_EMPTY;
+
+		desc->ctrl = cur_len;
+		ndesc++;
+	}
+
+	return ndesc;
+}
+
+static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
+					  struct net_device *ndev)
+{
+	int i, n, ring_min, ring_mask, ring_size;
+	struct ag71xx *ag = netdev_priv(ndev);
+	struct ag71xx_ring *ring;
+	struct ag71xx_desc *desc;
+	dma_addr_t dma_addr;
+
+	ring = &ag->tx_ring;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	if (skb->len <= 4) {
+		netif_dbg(ag, tx_err, ndev, "packet len is too small\n");
+		goto err_drop;
+	}
+
+	dma_addr = dma_map_single(&ag->pdev->dev, skb->data, skb->len,
+				  DMA_TO_DEVICE);
+
+	i = ring->curr & ring_mask;
+	desc = ag71xx_ring_desc(ring, i);
+
+	/* setup descriptor fields */
+	n = ag71xx_fill_dma_desc(ring, (u32)dma_addr,
+				 skb->len & ag->dcfg->desc_pktlen_mask);
+	if (n < 0)
+		goto err_drop_unmap;
+
+	i = (ring->curr + n - 1) & ring_mask;
+	ring->buf[i].tx.len = skb->len;
+	ring->buf[i].tx.skb = skb;
+
+	netdev_sent_queue(ndev, skb->len);
+
+	skb_tx_timestamp(skb);
+
+	desc->ctrl &= ~DESC_EMPTY;
+	ring->curr += n;
+
+	/* flush descriptor */
+	wmb();
+
+	ring_min = 2;
+	if (ring->desc_split)
+		ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+	if (ring->curr - ring->dirty >= ring_size - ring_min) {
+		netif_dbg(ag, tx_err, ndev, "tx queue full\n");
+		netif_stop_queue(ndev);
+	}
+
+	netif_dbg(ag, tx_queued, ndev, "packet injected into TX queue\n");
+
+	/* enable TX engine */
+	ag71xx_wr(ag, AG71XX_REG_TX_CTRL, TX_CTRL_TXE);
+
+	return NETDEV_TX_OK;
+
+err_drop_unmap:
+	dma_unmap_single(&ag->pdev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_drop:
+	ndev->stats.tx_dropped++;
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+	if (!ndev->phydev)
+		return -EINVAL;
+
+	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
+}
+
+static void ag71xx_oom_timer_handler(struct timer_list *t)
+{
+	struct ag71xx *ag = from_timer(ag, t, oom_timer);
+
+	napi_schedule(&ag->napi);
+}
+
+static void ag71xx_tx_timeout(struct net_device *ndev)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	netif_err(ag, tx_err, ndev, "tx timeout\n");
+
+	schedule_delayed_work(&ag->restart_work, 1);
+}
+
+static void ag71xx_restart_work_func(struct work_struct *work)
+{
+	struct ag71xx *ag = container_of(work, struct ag71xx,
+					 restart_work.work);
+	struct net_device *ndev = ag->ndev;
+
+	rtnl_lock();
+	ag71xx_hw_disable(ag);
+	ag71xx_hw_enable(ag);
+	if (ndev->phydev->link)
+		ag71xx_link_adjust(ag, false);
+	rtnl_unlock();
+}
+
+static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
+{
+	struct net_device *ndev = ag->ndev;
+	int ring_mask, ring_size, done = 0;
+	unsigned int pktlen_mask, offset;
+	struct sk_buff *next, *skb;
+	struct ag71xx_ring *ring;
+	struct list_head rx_list;
+
+	ring = &ag->rx_ring;
+	pktlen_mask = ag->dcfg->desc_pktlen_mask;
+	offset = ag->rx_buf_offset;
+	ring_mask = BIT(ring->order) - 1;
+	ring_size = BIT(ring->order);
+
+	netif_dbg(ag, rx_status, ndev, "rx packets, limit=%d, curr=%u, dirty=%u\n",
+		  limit, ring->curr, ring->dirty);
+
+	INIT_LIST_HEAD(&rx_list);
+
+	while (done < limit) {
+		unsigned int i = ring->curr & ring_mask;
+		struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+		int pktlen;
+		int err = 0;
+
+		if (ag71xx_desc_empty(desc))
+			break;
+
+		if ((ring->dirty + ring_size) == ring->curr) {
+			WARN_ONCE(1, "RX out of ring");
+			break;
+		}
+
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+
+		pktlen = desc->ctrl & pktlen_mask;
+		pktlen -= ETH_FCS_LEN;
+
+		dma_unmap_single(&ag->pdev->dev, ring->buf[i].rx.dma_addr,
+				 ag->rx_buf_size, DMA_FROM_DEVICE);
+
+		ndev->stats.rx_packets++;
+		ndev->stats.rx_bytes += pktlen;
+
+		skb = build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag));
+		if (!skb) {
+			skb_free_frag(ring->buf[i].rx.rx_buf);
+			goto next;
+		}
+
+		skb_reserve(skb, offset);
+		skb_put(skb, pktlen);
+
+		if (err) {
+			ndev->stats.rx_dropped++;
+			kfree_skb(skb);
+		} else {
+			skb->dev = ndev;
+			skb->ip_summed = CHECKSUM_NONE;
+			list_add_tail(&skb->list, &rx_list);
+		}
+
+next:
+		ring->buf[i].rx.rx_buf = NULL;
+		done++;
+
+		ring->curr++;
+	}
+
+	ag71xx_ring_rx_refill(ag);
+
+	list_for_each_entry_safe(skb, next, &rx_list, list)
+		skb->protocol = eth_type_trans(skb, ndev);
+	netif_receive_skb_list(&rx_list);
+
+	netif_dbg(ag, rx_status, ndev, "rx finish, curr=%u, dirty=%u, done=%d\n",
+		  ring->curr, ring->dirty, done);
+
+	return done;
+}
+
+static int ag71xx_poll(struct napi_struct *napi, int limit)
+{
+	struct ag71xx *ag = container_of(napi, struct ag71xx, napi);
+	struct ag71xx_ring *rx_ring = &ag->rx_ring;
+	int rx_ring_size = BIT(rx_ring->order);
+	struct net_device *ndev = ag->ndev;
+	int tx_done, rx_done;
+	u32 status;
+
+	tx_done = ag71xx_tx_packets(ag, false);
+
+	netif_dbg(ag, rx_status, ndev, "processing RX ring\n");
+	rx_done = ag71xx_rx_packets(ag, limit);
+
+	if (!rx_ring->buf[rx_ring->dirty % rx_ring_size].rx.rx_buf)
+		goto oom;
+
+	status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+	if (unlikely(status & RX_STATUS_OF)) {
+		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_OF);
+		ndev->stats.rx_fifo_errors++;
+
+		/* restart RX */
+		ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+	}
+
+	if (rx_done < limit) {
+		if (status & RX_STATUS_PR)
+			goto more;
+
+		status = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+		if (status & TX_STATUS_PS)
+			goto more;
+
+		netif_dbg(ag, rx_status, ndev, "disable polling mode, rx=%d, tx=%d,limit=%d\n",
+			  rx_done, tx_done, limit);
+
+		napi_complete(napi);
+
+		/* enable interrupts */
+		ag71xx_int_enable(ag, AG71XX_INT_POLL);
+		return rx_done;
+	}
+
+more:
+	netif_dbg(ag, rx_status, ndev, "stay in polling mode, rx=%d, tx=%d, limit=%d\n",
+		  rx_done, tx_done, limit);
+	return limit;
+
+oom:
+	netif_err(ag, rx_err, ndev, "out of memory\n");
+
+	mod_timer(&ag->oom_timer, jiffies + AG71XX_OOM_REFILL);
+	napi_complete(napi);
+	return 0;
+}
+
+static irqreturn_t ag71xx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct ag71xx *ag;
+	u32 status;
+
+	ag = netdev_priv(ndev);
+	status = ag71xx_rr(ag, AG71XX_REG_INT_STATUS);
+
+	if (unlikely(!status))
+		return IRQ_NONE;
+
+	if (unlikely(status & AG71XX_INT_ERR)) {
+		if (status & AG71XX_INT_TX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE);
+			netif_err(ag, intr, ndev, "TX BUS error\n");
+		}
+		if (status & AG71XX_INT_RX_BE) {
+			ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE);
+			netif_err(ag, intr, ndev, "RX BUS error\n");
+		}
+	}
+
+	if (likely(status & AG71XX_INT_POLL)) {
+		ag71xx_int_disable(ag, AG71XX_INT_POLL);
+		netif_dbg(ag, intr, ndev, "enable polling mode\n");
+		napi_schedule(&ag->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct ag71xx *ag = netdev_priv(ndev);
+
+	ndev->mtu = new_mtu;
+	ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+		  ag71xx_max_frame_len(ndev->mtu));
+
+	return 0;
+}
+
+static const struct net_device_ops ag71xx_netdev_ops = {
+	.ndo_open		= ag71xx_open,
+	.ndo_stop		= ag71xx_stop,
+	.ndo_start_xmit		= ag71xx_hard_start_xmit,
+	.ndo_do_ioctl		= ag71xx_do_ioctl,
+	.ndo_tx_timeout		= ag71xx_tx_timeout,
+	.ndo_change_mtu		= ag71xx_change_mtu,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static const u32 ar71xx_addr_ar7100[] = {
+	0x19000000, 0x1a000000,
+};
+
+static int ag71xx_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct ag71xx_dcfg *dcfg;
+	struct net_device *ndev;
+	struct resource *res;
+	const void *mac_addr;
+	int tx_size, err, i;
+	struct ag71xx *ag;
+
+	if (!np)
+		return -ENODEV;
+
+	ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*ag));
+	if (!ndev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	dcfg = of_device_get_match_data(&pdev->dev);
+	if (!dcfg)
+		return -EINVAL;
+
+	ag = netdev_priv(ndev);
+	ag->mac_idx = -1;
+	for (i = 0; i < ARRAY_SIZE(ar71xx_addr_ar7100); i++) {
+		if (ar71xx_addr_ar7100[i] == res->start)
+			ag->mac_idx = i;
+	}
+
+	if (ag->mac_idx < 0) {
+		netif_err(ag, probe, ndev, "unknown mac idx\n");
+		return -EINVAL;
+	}
+
+	ag->clk_eth = devm_clk_get(&pdev->dev, "eth");
+	if (IS_ERR(ag->clk_eth)) {
+		netif_err(ag, probe, ndev, "Failed to get eth clk.\n");
+		return PTR_ERR(ag->clk_eth);
+	}
+
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	ag->pdev = pdev;
+	ag->ndev = ndev;
+	ag->dcfg = dcfg;
+	ag->msg_enable = netif_msg_init(-1, AG71XX_DEFAULT_MSG_ENABLE);
+	memcpy(ag->fifodata, dcfg->fifodata, sizeof(ag->fifodata));
+
+	ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+	if (IS_ERR(ag->mac_reset)) {
+		netif_err(ag, probe, ndev, "missing mac reset\n");
+		err = PTR_ERR(ag->mac_reset);
+		goto err_free;
+	}
+
+	ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start,
+					    res->end - res->start + 1);
+	if (!ag->mac_base) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+			       0x0, dev_name(&pdev->dev), ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+			  ndev->irq);
+		goto err_free;
+	}
+
+	ndev->netdev_ops = &ag71xx_netdev_ops;
+
+	INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func);
+	timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0);
+
+	tx_size = AG71XX_TX_RING_SIZE_DEFAULT;
+	ag->rx_ring.order = ag71xx_ring_size_order(AG71XX_RX_RING_SIZE_DEFAULT);
+
+	ndev->min_mtu = 68;
+	ndev->max_mtu = dcfg->max_frame_len - ag71xx_max_frame_len(0);
+
+	ag->rx_buf_offset = NET_SKB_PAD;
+	if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+		ag->rx_buf_offset += NET_IP_ALIGN;
+
+	if (ag71xx_is(ag, AR7100)) {
+		ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+		tx_size *= AG71XX_TX_RING_DS_PER_PKT;
+	}
+	ag->tx_ring.order = ag71xx_ring_size_order(tx_size);
+
+	ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+					    sizeof(struct ag71xx_desc),
+					    &ag->stop_desc_dma, GFP_KERNEL);
+	if (!ag->stop_desc)
+		goto err_free;
+
+	ag->stop_desc->data = 0;
+	ag->stop_desc->ctrl = 0;
+	ag->stop_desc->next = (u32)ag->stop_desc_dma;
+
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
+		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	if (!mac_addr || !is_valid_ether_addr(ndev->dev_addr)) {
+		netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
+		eth_random_addr(ndev->dev_addr);
+	}
+
+	ag->phy_if_mode = of_get_phy_mode(np);
+	if (ag->phy_if_mode < 0) {
+		netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+		err = ag->phy_if_mode;
+		goto err_free;
+	}
+
+	netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+
+	err = clk_prepare_enable(ag->clk_eth);
+	if (err) {
+		netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+		goto err_free;
+	}
+
+	ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+
+	ag71xx_hw_init(ag);
+
+	err = ag71xx_mdio_probe(ag);
+	if (err)
+		goto err_put_clk;
+
+	platform_set_drvdata(pdev, ndev);
+
+	err = register_netdev(ndev);
+	if (err) {
+		netif_err(ag, probe, ndev, "unable to register net device\n");
+		platform_set_drvdata(pdev, NULL);
+		goto err_mdio_remove;
+	}
+
+	netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n",
+		   (unsigned long)ag->mac_base, ndev->irq,
+		   phy_modes(ag->phy_if_mode));
+
+	return 0;
+
+err_mdio_remove:
+	ag71xx_mdio_remove(ag);
+err_put_clk:
+	clk_disable_unprepare(ag->clk_eth);
+err_free:
+	free_netdev(ndev);
+	return err;
+}
+
+static int ag71xx_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct ag71xx *ag;
+
+	if (!ndev)
+		return 0;
+
+	ag = netdev_priv(ndev);
+	unregister_netdev(ndev);
+	ag71xx_mdio_remove(ag);
+	clk_disable_unprepare(ag->clk_eth);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const u32 ar71xx_fifo_ar7100[] = {
+	0x0fff0000, 0x00001fff, 0x00780fff,
+};
+
+static const u32 ar71xx_fifo_ar9130[] = {
+	0x0fff0000, 0x00001fff, 0x008001ff,
+};
+
+static const u32 ar71xx_fifo_ar9330[] = {
+	0x0010ffff, 0x015500aa, 0x01f00140,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7100 = {
+	.type = AR7100,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7240 = {
+	.type = AR7240,
+	.fifodata = ar71xx_fifo_ar7100,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9130 = {
+	.type = AR9130,
+	.fifodata = ar71xx_fifo_ar9130,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9330 = {
+	.type = AR9330,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_4K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9340 = {
+	.type = AR9340,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9530 = {
+	.type = QCA9530,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = SZ_16K - 1,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9550 = {
+	.type = QCA9550,
+	.fifodata = ar71xx_fifo_ar9330,
+	.max_frame_len = 1540,
+	.desc_pktlen_mask = SZ_16K - 1,
+	.tx_hang_workaround = true,
+};
+
+static const struct of_device_id ag71xx_match[] = {
+	{ .compatible = "qca,ar7100-eth", .data = &ag71xx_dcfg_ar7100 },
+	{ .compatible = "qca,ar7240-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7241-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar7242-eth", .data = &ag71xx_dcfg_ar7240 },
+	{ .compatible = "qca,ar9130-eth", .data = &ag71xx_dcfg_ar9130 },
+	{ .compatible = "qca,ar9330-eth", .data = &ag71xx_dcfg_ar9330 },
+	{ .compatible = "qca,ar9340-eth", .data = &ag71xx_dcfg_ar9340 },
+	{ .compatible = "qca,qca9530-eth", .data = &ag71xx_dcfg_qca9530 },
+	{ .compatible = "qca,qca9550-eth", .data = &ag71xx_dcfg_qca9550 },
+	{ .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 },
+	{}
+};
+
+static struct platform_driver ag71xx_driver = {
+	.probe		= ag71xx_probe,
+	.remove		= ag71xx_remove,
+	.driver = {
+		.name	= "ag71xx",
+		.of_match_table = ag71xx_match,
+	}
+};
+
+module_platform_driver(ag71xx_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a8fe0808823d..7c06e2aebc9e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -280,6 +280,7 @@ struct tp_params {
 	unsigned short tx_modq[NCHAN];	/* channel to modulation queue map */
 
 	u32 vlan_pri_map;               /* cached TP_VLAN_PRI_MAP */
+	u32 filter_mask;
 	u32 ingress_config;             /* cached TP_INGRESS_CONFIG */
 
 	/* cached TP_OUT_CONFIG compressed error vector
@@ -600,6 +601,7 @@ struct port_info {
 	u8 vin;
 	u8 vivld;
 	u8 smt_idx;
+	u8 rx_cchan;
 };
 
 struct dentry;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 4107007b6ec4..6232236d7abc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -248,8 +248,9 @@ static int validate_filter(struct net_device *dev,
 	u32 fconf, iconf;
 
 	/* Check for unconfigured fields being used. */
-	fconf = adapter->params.tp.vlan_pri_map;
 	iconf = adapter->params.tp.ingress_config;
+	fconf = fs->hash ? adapter->params.tp.filter_mask :
+			   adapter->params.tp.vlan_pri_map;
 
 	if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
 	    unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
@@ -1041,7 +1042,7 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1081,7 +1082,7 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
 			    RSS_QUEUE_V(f->fs.iq) |
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
-			    RX_CHANNEL_F |
+			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
 			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
 					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
@@ -1833,24 +1834,38 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 	}
 }
 
-int init_hash_filter(struct adapter *adap)
+void init_hash_filter(struct adapter *adap)
 {
+	u32 reg;
+
 	/* On T6, verify the necessary register configs and warn the user in
 	 * case of improper config
 	 */
 	if (is_t6(adap->params.chip)) {
-		if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4)
-			goto err;
+		if (is_offload(adap)) {
+			if (!(t4_read_reg(adap, TP_GLOBAL_CONFIG_A)
+			   & ACTIVEFILTERCOUNTS_F)) {
+				dev_err(adap->pdev_dev, "Invalid hash filter + ofld config\n");
+				return;
+			}
+		} else {
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_0_A);
+			if (TCAM_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
+
+			reg = t4_read_reg(adap, LE_DB_RSP_CODE_1_A);
+			if (HASH_ACTV_HIT_G(reg) != 4) {
+				dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+				return;
+			}
+		}
 
-		if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4)
-			goto err;
 	} else {
 		dev_err(adap->pdev_dev, "Hash filter supported only on T6\n");
-		return -EINVAL;
+		return;
 	}
+
 	adap->params.hash_filter = 1;
-	return 0;
-err:
-	dev_warn(adap->pdev_dev, "Invalid hash filter config!\n");
-	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index 8db5fca6dcc9..b0751c0611ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -50,7 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx);
 
 int writable_filter(struct filter_entry *f);
 void clear_all_filters(struct adapter *adapter);
-int init_hash_filter(struct adapter *adap);
+void init_hash_filter(struct adapter *adap);
 bool is_filter_exact_match(struct adapter *adap,
 			   struct ch_filter_specification *fs);
 #endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 715e4edcf4a2..7d7df59f9a70 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1646,6 +1646,18 @@ unsigned int cxgb4_port_chan(const struct net_device *dev)
 }
 EXPORT_SYMBOL(cxgb4_port_chan);
 
+/**
+ *      cxgb4_port_e2cchan - get the HW c-channel of a port
+ *      @dev: the net device for the port
+ *
+ *      Return the HW RX c-channel of the given port.
+ */
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev)
+{
+	return netdev2pinfo(dev)->rx_cchan;
+}
+EXPORT_SYMBOL(cxgb4_port_e2cchan);
+
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
 {
 	struct adapter *adap = netdev2adap(dev);
@@ -3905,14 +3917,14 @@ static int adap_init0_phy(struct adapter *adap)
  */
 static int adap_init0_config(struct adapter *adapter, int reset)
 {
+	char *fw_config_file, fw_config_file_path[256];
+	u32 finiver, finicsum, cfcsum, param, val;
 	struct fw_caps_config_cmd caps_cmd;
-	const struct firmware *cf;
 	unsigned long mtype = 0, maddr = 0;
-	u32 finiver, finicsum, cfcsum;
-	int ret;
-	int config_issued = 0;
-	char *fw_config_file, fw_config_file_path[256];
+	const struct firmware *cf;
 	char *config_name = NULL;
+	int config_issued = 0;
+	int ret;
 
 	/*
 	 * Reset device if necessary.
@@ -4020,6 +4032,24 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 			goto bye;
 	}
 
+	val = 0;
+
+	/* Ofld + Hash filter is supported. Older fw will fail this request and
+	 * it is fine.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD));
+	ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+			    1, &param, &val);
+
+	/* FW doesn't know about Hash filter + ofld support,
+	 * it's not a problem, don't return an error.
+	 */
+	if (ret < 0) {
+		dev_warn(adapter->pdev_dev,
+			 "Hash filter with ofld is not supported by FW\n");
+	}
+
 	/*
 	 * Issue a Capability Configuration command to the firmware to get it
 	 * to parse the Configuration File.  We don't use t4_fw_config_file()
@@ -4580,6 +4610,13 @@ static int adap_init0(struct adapter *adap)
 	if (ret < 0)
 		goto bye;
 
+	/* hash filter has some mandatory register settings to be tested and for
+	 * that it needs to test whether offload is enabled or not, hence
+	 * checking and setting it here.
+	 */
+	if (caps_cmd.ofldcaps)
+		adap->params.offload = 1;
+
 	if (caps_cmd.ofldcaps ||
 	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
 		/* query offload-related parameters */
@@ -4619,11 +4656,8 @@ static int adap_init0(struct adapter *adap)
 		adap->params.ofldq_wr_cred = val[5];
 
 		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
-			ret = init_hash_filter(adap);
-			if (ret < 0)
-				goto bye;
+			init_hash_filter(adap);
 		} else {
-			adap->params.offload = 1;
 			adap->num_ofld_uld += 1;
 		}
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 21da34a4ca24..42ae28d651e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -393,6 +393,7 @@ int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
 int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
 unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
 unsigned int cxgb4_port_idx(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 93feb258067b..9dd5ed9a2965 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6209,6 +6209,37 @@ unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx)
 }
 
 /**
+ *      t4_get_tp_e2c_map - return the E2C channel map associated with a port
+ *      @adapter: the adapter
+ *      @pidx: the port index
+ */
+static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx)
+{
+	unsigned int nports;
+	u32 param, val = 0;
+	int ret;
+
+	nports = 1 << NUMPORTS_G(t4_read_reg(adapter, MPS_CMN_CTL_A));
+	if (pidx >= nports) {
+		CH_WARN(adapter, "TP E2C Channel Port Index %d >= Nports %d\n",
+			pidx, nports);
+		return 0;
+	}
+
+	/* FW version >= 1.16.44.0 can determine E2C channel map using
+	 * FW_PARAMS_PARAM_DEV_TPCHMAP API.
+	 */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_TPCHMAP));
+	ret = t4_query_params_ns(adapter, adapter->mbox, adapter->pf,
+				 0, 1, &param, &val);
+	if (!ret)
+		return (val >> (8 * pidx)) & 0xff;
+
+	return 0;
+}
+
+/**
  *	t4_get_tp_ch_map - return TP ingress channels associated with a port
  *	@adapter: the adapter
  *	@pidx: the port index
@@ -9368,8 +9399,9 @@ int t4_init_sge_params(struct adapter *adapter)
  */
 int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 {
-	int chan;
-	u32 v;
+	u32 param, val, v;
+	int chan, ret;
+
 
 	v = t4_read_reg(adap, TP_TIMER_RESOLUTION_A);
 	adap->params.tp.tre = TIMERRESOLUTION_G(v);
@@ -9379,11 +9411,47 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 	for (chan = 0; chan < NCHAN; chan++)
 		adap->params.tp.tx_modq[chan] = chan;
 
-	/* Cache the adapter's Compressed Filter Mode and global Incress
+	/* Cache the adapter's Compressed Filter Mode/Mask and global Ingress
 	 * Configuration.
 	 */
-	t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
-		       TP_VLAN_PRI_MAP_A, sleep_ok);
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FILTER) |
+		 FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_FILTER_MODE_MASK));
+
+	/* Read current value */
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+			      &param, &val);
+	if (ret == 0) {
+		dev_info(adap->pdev_dev,
+			 "Current filter mode/mask 0x%x:0x%x\n",
+			 FW_PARAMS_PARAM_FILTER_MODE_G(val),
+			 FW_PARAMS_PARAM_FILTER_MASK_G(val));
+		adap->params.tp.vlan_pri_map =
+			FW_PARAMS_PARAM_FILTER_MODE_G(val);
+		adap->params.tp.filter_mask =
+			FW_PARAMS_PARAM_FILTER_MASK_G(val);
+	} else {
+		dev_info(adap->pdev_dev,
+			 "Failed to read filter mode/mask via fw api, using indirect-reg-read\n");
+
+		/* Incase of older-fw (which doesn't expose the api
+		 * FW_PARAM_DEV_FILTER_MODE_MASK) and newer-driver (which uses
+		 * the fw api) combination, fall-back to older method of reading
+		 * the filter mode from indirect-register
+		 */
+		t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+			       TP_VLAN_PRI_MAP_A, sleep_ok);
+
+		/* With the older-fw and newer-driver combination we might run
+		 * into an issue when user wants to use hash filter region but
+		 * the filter_mask is zero, in this case filter_mask validation
+		 * is tough. To avoid that we set the filter_mask same as filter
+		 * mode, which will behave exactly as the older way of ignoring
+		 * the filter mask validation.
+		 */
+		adap->params.tp.filter_mask = adap->params.tp.vlan_pri_map;
+	}
+
 	t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
 		       TP_INGRESS_CONFIG_A, sleep_ok);
 
@@ -9594,6 +9662,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 	pi->tx_chan = port;
 	pi->lport = port;
 	pi->rss_size = rss_size;
+	pi->rx_cchan = t4_get_tp_e2c_map(pi->adapter, port);
 
 	/* If fw supports returning the VIN as part of FW_VI_CMD,
 	 * save the returned values.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index eb222d40ddbf..a957a6e4d4c4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -1334,6 +1334,10 @@
 #define TP_OUT_CONFIG_A		0x7d04
 #define TP_GLOBAL_CONFIG_A	0x7d08
 
+#define ACTIVEFILTERCOUNTS_S    22
+#define ACTIVEFILTERCOUNTS_V(x) ((x) << ACTIVEFILTERCOUNTS_S)
+#define ACTIVEFILTERCOUNTS_F    ACTIVEFILTERCOUNTS_V(1U)
+
 #define TP_CMM_TCB_BASE_A 0x7d10
 #define TP_CMM_MM_BASE_A 0x7d14
 #define TP_CMM_TIMER_BASE_A 0x7d18
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index b2a618e72fcf..0be4ce520352 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1221,6 +1221,23 @@ enum fw_params_mnem {
 /*
  * device parameters
  */
+
+#define FW_PARAMS_PARAM_FILTER_MODE_S 16
+#define FW_PARAMS_PARAM_FILTER_MODE_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MODE_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MODE_S)
+#define FW_PARAMS_PARAM_FILTER_MODE_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MODE_S) & \
+	FW_PARAMS_PARAM_FILTER_MODE_M)
+
+#define FW_PARAMS_PARAM_FILTER_MASK_S 0
+#define FW_PARAMS_PARAM_FILTER_MASK_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MASK_V(x)          \
+	((x) << FW_PARAMS_PARAM_FILTER_MASK_S)
+#define FW_PARAMS_PARAM_FILTER_MASK_G(x)          \
+	(((x) >> FW_PARAMS_PARAM_FILTER_MASK_S) & \
+	FW_PARAMS_PARAM_FILTER_MASK_M)
+
 enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_CCLK	= 0x00, /* chip core clock in khz */
 	FW_PARAMS_PARAM_DEV_PORTVEC	= 0x01, /* the port vector */
@@ -1250,12 +1267,15 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_TPCHMAP     = 0x1F,
 	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
 	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
 	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 	FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
 };
 
 /*
@@ -1347,6 +1367,11 @@ enum fw_params_param_dev_diag {
 	FW_PARAM_DEV_DIAG_MAXTMPTHRESH	= 0x02,
 };
 
+enum fw_params_param_dev_filter {
+	FW_PARAM_DEV_FILTER_VNIC_MODE   = 0x00,
+	FW_PARAM_DEV_FILTER_MODE_MASK   = 0x01,
+};
+
 enum fw_params_param_dev_fwcache {
 	FW_PARAM_DEV_FWCACHE_FLUSH      = 0x00,
 	FW_PARAM_DEV_FWCACHE_FLUSHINV   = 0x01,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7d2390e3df77..753957ec72be 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2479,14 +2479,9 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	queue.destination.type = DPNI_DEST_DPCON;
 	queue.destination.priority = 1;
 	queue.user_context = (u64)(uintptr_t)fq;
-	queue.flc.stash_control = 1;
-	queue.flc.value &= 0xFFFFFFFFFFFFFFC0;
-	/* 01 01 00 - data, annotation, flow context */
-	queue.flc.value |= 0x14;
 	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
 			     DPNI_QUEUE_RX, 0, fq->flowid,
-			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST |
-			     DPNI_QUEUE_OPT_FLC,
+			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
 			     &queue);
 	if (err) {
 		dev_err(dev, "dpni_set_queue(RX) failed\n");
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 8429f5c1d810..ed0d010c7cf2 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -29,3 +29,13 @@ config FSL_ENETC_PTP_CLOCK
 	  packets using the SO_TIMESTAMPING API.
 
 	  If compiled as module (M), the module name is fsl-enetc-ptp.
+
+config FSL_ENETC_HW_TIMESTAMPING
+	bool "ENETC hardware timestamping support"
+	depends on FSL_ENETC || FSL_ENETC_VF
+	help
+	  Enable hardware timestamping support on the Ethernet packets
+	  using the SO_TIMESTAMPING API. Because the RX BD ring dynamic
+	  allocation has not been supported and it is too expensive to use
+	  extended RX BDs if timestamping is not used, this option enables
+	  extended RX BDs in order to support hardware timestamping.
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 491475d87736..223709443ea4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -13,7 +13,8 @@
 #define ENETC_MAX_SKB_FRAGS	13
 #define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb);
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads);
 
 netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
@@ -33,7 +34,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 		return NETDEV_TX_BUSY;
 	}
 
-	count = enetc_map_tx_buffs(tx_ring, skb);
+	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
 	if (unlikely(!count))
 		goto drop_packet_err;
 
@@ -105,7 +106,8 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
 	}
 }
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+			      int active_offloads)
 {
 	struct enetc_tx_swbd *tx_swbd;
 	struct skb_frag_struct *frag;
@@ -137,7 +139,10 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 	count++;
 
 	do_vlan = skb_vlan_tag_present(skb);
-	do_tstamp = skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP;
+	do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) &&
+		    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
+	tx_swbd->do_tstamp = do_tstamp;
+	tx_swbd->check_wb = tx_swbd->do_tstamp;
 
 	if (do_vlan || do_tstamp)
 		flags |= ENETC_TXBD_FLAGS_EX;
@@ -299,24 +304,70 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 	return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
 }
 
+static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
+				u64 *tstamp)
+{
+	u32 lo, hi, tstamp_lo;
+
+	lo = enetc_rd(hw, ENETC_SICTR0);
+	hi = enetc_rd(hw, ENETC_SICTR1);
+	tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
+	if (lo <= tstamp_lo)
+		hi -= 1;
+	*tstamp = (u64)hi << 32 | tstamp_lo;
+}
+
+static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+
+	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+		skb_tstamp_tx(skb, &shhwtstamps);
+	}
+}
+
 static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 {
 	struct net_device *ndev = tx_ring->ndev;
 	int tx_frm_cnt = 0, tx_byte_cnt = 0;
 	struct enetc_tx_swbd *tx_swbd;
 	int i, bds_to_clean;
+	bool do_tstamp;
+	u64 tstamp = 0;
 
 	i = tx_ring->next_to_clean;
 	tx_swbd = &tx_ring->tx_swbd[i];
 	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
 
+	do_tstamp = false;
+
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
 		bool is_eof = !!tx_swbd->skb;
 
+		if (unlikely(tx_swbd->check_wb)) {
+			struct enetc_ndev_priv *priv = netdev_priv(ndev);
+			union enetc_tx_bd *txbd;
+
+			txbd = ENETC_TXBD(*tx_ring, i);
+
+			if (txbd->flags & ENETC_TXBD_FLAGS_W &&
+			    tx_swbd->do_tstamp) {
+				enetc_get_tx_tstamp(&priv->si->hw, txbd,
+						    &tstamp);
+				do_tstamp = true;
+			}
+		}
+
 		if (likely(tx_swbd->dma))
 			enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
 		if (is_eof) {
+			if (unlikely(do_tstamp)) {
+				enetc_tstamp_tx(tx_swbd->skb, tstamp);
+				do_tstamp = false;
+			}
 			napi_consume_skb(tx_swbd->skb, napi_budget);
 			tx_swbd->skb = NULL;
 		}
@@ -425,10 +476,38 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	return j;
 }
 
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static void enetc_get_rx_tstamp(struct net_device *ndev,
+				union enetc_rx_bd *rxbd,
+				struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_hw *hw = &priv->si->hw;
+	u32 lo, hi, tstamp_lo;
+	u64 tstamp;
+
+	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
+		lo = enetc_rd(hw, ENETC_SICTR0);
+		hi = enetc_rd(hw, ENETC_SICTR1);
+		tstamp_lo = le32_to_cpu(rxbd->r.tstamp);
+		if (lo <= tstamp_lo)
+			hi -= 1;
+
+		tstamp = (u64)hi << 32 | tstamp_lo;
+		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+		shhwtstamps->hwtstamp = ns_to_ktime(tstamp);
+	}
+}
+#endif
+
 static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 			       union enetc_rx_bd *rxbd, struct sk_buff *skb)
 {
-	/* TODO: add tstamp, hashing */
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
+#endif
+	/* TODO: hashing */
 	if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
 		u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
 
@@ -442,6 +521,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       le16_to_cpu(rxbd->r.vlan_opt));
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (priv->active_offloads & ENETC_F_RX_TSTAMP)
+		enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
+#endif
 }
 
 static void enetc_process_skb(struct enetc_bdr *rx_ring,
@@ -1074,6 +1157,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1);
 
 	rbmr = ENETC_RBMR_EN;
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	rbmr |= ENETC_RBMR_BDS;
+#endif
 	if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
 		rbmr |= ENETC_RBMR_VTE;
 
@@ -1341,6 +1427,62 @@ int enetc_close(struct net_device *ndev)
 	return 0;
 }
 
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	struct enetc_bdr *tx_ring;
+	u8 num_tc;
+	int i;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
+
+	if (!num_tc) {
+		netdev_reset_tc(ndev);
+		netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+
+		/* Reset all ring priorities to 0 */
+		for (i = 0; i < priv->num_tx_rings; i++) {
+			tx_ring = priv->tx_ring[i];
+			enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0);
+		}
+
+		return 0;
+	}
+
+	/* Check if we have enough BD rings available to accommodate all TCs */
+	if (num_tc > priv->num_tx_rings) {
+		netdev_err(ndev, "Max %d traffic classes supported\n",
+			   priv->num_tx_rings);
+		return -EINVAL;
+	}
+
+	/* For the moment, we use only one BD ring per TC.
+	 *
+	 * Configure num_tc BD rings with increasing priorities.
+	 */
+	for (i = 0; i < num_tc; i++) {
+		tx_ring = priv->tx_ring[i];
+		enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i);
+	}
+
+	/* Reset the number of netdev queues based on the TC count */
+	netif_set_real_num_tx_queues(ndev, num_tc);
+
+	netdev_set_num_tc(ndev, num_tc);
+
+	/* Each TC is associated with one netdev queue */
+	for (i = 0; i < num_tc; i++)
+		netdev_set_tc_queue(ndev, i, 1, i);
+
+	return 0;
+}
+
 struct net_device_stats *enetc_get_stats(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -1396,6 +1538,70 @@ int enetc_set_features(struct net_device *ndev,
 	return 0;
 }
 
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP;
+		break;
+	case HWTSTAMP_TX_ON:
+		priv->active_offloads |= ENETC_F_TX_TSTAMP;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		priv->active_offloads &= ~ENETC_F_RX_TSTAMP;
+		break;
+	default:
+		priv->active_offloads |= ENETC_F_RX_TSTAMP;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+
+static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	config.flags = 0;
+
+	if (priv->active_offloads & ENETC_F_TX_TSTAMP)
+		config.tx_type = HWTSTAMP_TX_ON;
+	else
+		config.tx_type = HWTSTAMP_TX_OFF;
+
+	config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ?
+			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+#endif
+
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
+{
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	if (cmd == SIOCSHWTSTAMP)
+		return enetc_hwtstamp_set(ndev, rq);
+	if (cmd == SIOCGHWTSTAMP)
+		return enetc_hwtstamp_get(ndev, rq);
+#endif
+	return -EINVAL;
+}
+
 int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 {
 	struct pci_dev *pdev = priv->si->pdev;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index b274135c5103..541b4e2073fe 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -21,7 +21,9 @@ struct enetc_tx_swbd {
 	struct sk_buff *skb;
 	dma_addr_t dma;
 	u16 len;
-	u16 is_dma_page;
+	u8 is_dma_page:1;
+	u8 check_wb:1;
+	u8 do_tstamp:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
@@ -167,6 +169,12 @@ struct enetc_cls_rule {
 
 #define ENETC_MAX_BDR_INT	2 /* fixed to max # of available cpus */
 
+/* TODO: more hardware offloads */
+enum enetc_active_offloads {
+	ENETC_F_RX_TSTAMP	= BIT(0),
+	ENETC_F_TX_TSTAMP	= BIT(1),
+};
+
 struct enetc_ndev_priv {
 	struct net_device *ndev;
 	struct device *dev; /* dma-mapping device */
@@ -178,6 +186,7 @@ struct enetc_ndev_priv {
 	u16 rx_bd_count, tx_bd_count;
 
 	u16 msg_enable;
+	int active_offloads;
 
 	struct enetc_bdr *tx_ring[16];
 	struct enetc_bdr *rx_ring[16];
@@ -200,6 +209,9 @@ struct enetc_msg_cmd_set_primary_mac {
 
 #define ENETC_CBDR_TIMEOUT	1000 /* usecs */
 
+/* PTP driver exports */
+extern int enetc_phc_index;
+
 /* SI common */
 int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv);
 void enetc_pci_remove(struct pci_dev *pdev);
@@ -216,6 +228,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
 struct net_device_stats *enetc_get_stats(struct net_device *ndev);
 int enetc_set_features(struct net_device *ndev,
 		       netdev_features_t features);
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+		   void *type_data);
+
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index b9519b6ad727..fcb52efec075 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -555,6 +555,35 @@ static void enetc_get_ringparam(struct net_device *ndev,
 	}
 }
 
+static int enetc_get_ts_info(struct net_device *ndev,
+			     struct ethtool_ts_info *info)
+{
+	int *phc_idx;
+
+	phc_idx = symbol_get(enetc_phc_index);
+	if (phc_idx) {
+		info->phc_index = *phc_idx;
+		symbol_put(enetc_phc_index);
+	} else {
+		info->phc_index = -1;
+	}
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+			 (1 << HWTSTAMP_TX_ON);
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_ALL);
+#else
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+#endif
+	return 0;
+}
+
 static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_regs_len = enetc_get_reglen,
 	.get_regs = enetc_get_regs,
@@ -571,6 +600,7 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
 };
 
 static const struct ethtool_ops enetc_vf_ethtool_ops = {
@@ -586,6 +616,7 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = {
 	.set_rxfh = enetc_set_rxfh,
 	.get_ringparam = enetc_get_ringparam,
 	.get_link = ethtool_op_get_link,
+	.get_ts_info = enetc_get_ts_info,
 };
 
 void enetc_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index df8eb8882d92..88276299f447 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -127,7 +127,7 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_TBSR_BUSY	BIT(0)
 #define ENETC_TBMR_VIH	BIT(9)
 #define ENETC_TBMR_PRIO_MASK		GENMASK(2, 0)
-#define ENETC_TBMR_PRIO_SET(val)	val
+#define ENETC_TBMR_SET_PRIO(val)	((val) & ENETC_TBMR_PRIO_MASK)
 #define ENETC_TBMR_EN	BIT(31)
 #define ENETC_TBSR	0x4
 #define ENETC_TBBAR0	0x10
@@ -361,6 +361,12 @@ union enetc_tx_bd {
 		u8 e_flags;
 		u8 flags;
 	} ext; /* Tx BD extension */
+	struct {
+		__le32 tstamp;
+		u8 reserved[10];
+		u8 status;
+		u8 flags;
+	} wb; /* writeback descriptor */
 };
 
 #define ENETC_TXBD_FLAGS_L4CS	BIT(0)
@@ -399,6 +405,9 @@ union enetc_rx_bd {
 	struct {
 		__le64 addr;
 		u8 reserved[8];
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		u8 reserved1[16];
+#endif
 	} w;
 	struct {
 		__le16 inet_csum;
@@ -413,6 +422,10 @@ union enetc_rx_bd {
 			};
 			__le32 lstatus;
 		};
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+		__le32 tstamp;
+		u8 reserved[12];
+#endif
 	} r;
 };
 
@@ -531,3 +544,13 @@ static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx,
 	val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0);
 	enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val);
 }
+
+static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx,
+				      int prio)
+{
+	u32 val = enetc_txbdr_rd(hw, bdr_idx, ENETC_TBMR);
+
+	val &= ~ENETC_TBMR_PRIO_MASK;
+	val |= ENETC_TBMR_SET_PRIO(prio);
+	enetc_txbdr_wr(hw, bdr_idx, ENETC_TBMR, val);
+}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 78287c517095..258b3cb38a6f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -702,6 +702,8 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_set_vf_vlan	= enetc_pf_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= enetc_pf_set_vf_spoofchk,
 	.ndo_set_features	= enetc_pf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
 };
 
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
index 8c1497e7d9c5..2fd2586e42bf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
@@ -7,6 +7,9 @@
 
 #include "enetc.h"
 
+int enetc_phc_index = -1;
+EXPORT_SYMBOL(enetc_phc_index);
+
 static struct ptp_clock_info enetc_ptp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "ENETC PTP clock",
@@ -96,6 +99,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_no_clock;
 
+	enetc_phc_index = ptp_qoriq->phc_index;
 	pci_set_drvdata(pdev, ptp_qoriq);
 
 	return 0;
@@ -119,6 +123,7 @@ static void enetc_ptp_remove(struct pci_dev *pdev)
 {
 	struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev);
 
+	enetc_phc_index = -1;
 	ptp_qoriq_free(ptp_qoriq);
 	kfree(ptp_qoriq);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 72c3ea887bcf..ebd21bf4cfa1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -111,6 +111,8 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_get_stats		= enetc_get_stats,
 	.ndo_set_mac_address	= enetc_vf_set_mac_addr,
 	.ndo_set_features	= enetc_vf_set_features,
+	.ndo_do_ioctl		= enetc_ioctl,
+	.ndo_setup_tc		= enetc_setup_tc,
 };
 
 static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 38f10f7dcbc3..848defa33d3a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3473,7 +3473,6 @@ fec_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev,
 				"Failed to enable phy regulator: %d\n", ret);
-			clk_disable_unprepare(fep->clk_ipg);
 			goto failed_regulator;
 		}
 	} else {
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
index f54da3c684d0..e1bdfed16134 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.c
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c
@@ -144,7 +144,8 @@
 /* Hash Key extraction fields: */
 #define DEFAULT_HASH_KEY_EXTRACT_FIELDS		\
 	(KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \
-	    KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST)
+	 KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST | \
+	 KG_SCH_KN_IPSEC_SPI)
 
 /* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP)
  * don't exist in the frame
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ad21b0ef1946..a18645e24434 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -343,6 +343,8 @@ struct hnae3_ae_dev {
  *   Enable/disable hardware strip vlan tag of packets received
  * set_gro_en
  *   Enable/disable HW GRO
+ * add_arfs_entry
+ *   Check the 5-tuples of flow, and create flow director rule
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -492,6 +494,8 @@ struct hnae3_ae_ops {
 				struct ethtool_rxnfc *cmd, u32 *rule_locs);
 	int (*restore_fd_rules)(struct hnae3_handle *handle);
 	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
+	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
+			      u16 flow_id, struct flow_keys *fkeys);
 	int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf);
 	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
 	bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fc4917ac44be..30354fa33a36 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -252,6 +252,7 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mng tbl\n");
 	dev_info(&h->pdev->dev, "dump reset info\n");
+	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 196a3d780dcf..f6dc3054bcd1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4,6 +4,9 @@
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -79,23 +82,6 @@ static irqreturn_t hns3_irq_handle(int irq, void *vector)
 	return IRQ_HANDLED;
 }
 
-/* This callback function is used to set affinity changes to the irq affinity
- * masks when the irq_set_affinity_notifier function is used.
- */
-static void hns3_nic_irq_affinity_notify(struct irq_affinity_notify *notify,
-					 const cpumask_t *mask)
-{
-	struct hns3_enet_tqp_vector *tqp_vectors =
-		container_of(notify, struct hns3_enet_tqp_vector,
-			     affinity_notify);
-
-	tqp_vectors->affinity_mask = *mask;
-}
-
-static void hns3_nic_irq_affinity_release(struct kref *ref)
-{
-}
-
 static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
 {
 	struct hns3_enet_tqp_vector *tqp_vectors;
@@ -107,8 +93,7 @@ static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
 		if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
 			continue;
 
-		/* clear the affinity notifier and affinity mask */
-		irq_set_affinity_notifier(tqp_vectors->vector_irq, NULL);
+		/* clear the affinity mask */
 		irq_set_affinity_hint(tqp_vectors->vector_irq, NULL);
 
 		/* release the irq resource */
@@ -161,12 +146,6 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
 			return ret;
 		}
 
-		tqp_vectors->affinity_notify.notify =
-					hns3_nic_irq_affinity_notify;
-		tqp_vectors->affinity_notify.release =
-					hns3_nic_irq_affinity_release;
-		irq_set_affinity_notifier(tqp_vectors->vector_irq,
-					  &tqp_vectors->affinity_notify);
 		irq_set_affinity_hint(tqp_vectors->vector_irq,
 				      &tqp_vectors->affinity_mask);
 
@@ -340,6 +319,40 @@ static void hns3_tqp_disable(struct hnae3_queue *tqp)
 	hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
 }
 
+static void hns3_free_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int hns3_set_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int i, ret;
+
+	if (!netdev->rx_cpu_rmap) {
+		netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num);
+		if (!netdev->rx_cpu_rmap)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				       tqp_vector->vector_irq);
+		if (ret) {
+			hns3_free_rx_cpu_rmap(netdev);
+			return ret;
+		}
+	}
+#endif
+	return 0;
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -351,11 +364,16 @@ static int hns3_nic_net_up(struct net_device *netdev)
 	if (ret)
 		return ret;
 
+	/* the device can work without cpu rmap, only aRFS needs it */
+	ret = hns3_set_rx_cpu_rmap(netdev);
+	if (ret)
+		netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
+
 	/* get irq resource for all vectors */
 	ret = hns3_nic_init_irq(priv);
 	if (ret) {
 		netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
-		return ret;
+		goto free_rmap;
 	}
 
 	clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
@@ -384,7 +402,8 @@ out_start_err:
 		hns3_vector_disable(&priv->tqp_vector[j]);
 
 	hns3_nic_uninit_irq(priv);
-
+free_rmap:
+	hns3_free_rx_cpu_rmap(netdev);
 	return ret;
 }
 
@@ -467,6 +486,8 @@ static void hns3_nic_net_down(struct net_device *netdev)
 	if (ops->stop)
 		ops->stop(priv->ae_handle);
 
+	hns3_free_rx_cpu_rmap(netdev);
+
 	/* free irq resources */
 	hns3_nic_uninit_irq(priv);
 
@@ -1722,6 +1743,32 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 		h->ae_algo->ops->reset_event(h->pdev, h);
 }
 
+#ifdef CONFIG_RFS_ACCEL
+static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+			      u16 rxq_index, u32 flow_id)
+{
+	struct hnae3_handle *h = hns3_get_handle(dev);
+	struct flow_keys fkeys;
+
+	if (!h->ae_algo->ops->add_arfs_entry)
+		return -EOPNOTSUPP;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0))
+		return -EPROTONOSUPPORT;
+
+	if ((fkeys.basic.n_proto != htons(ETH_P_IP) &&
+	     fkeys.basic.n_proto != htons(ETH_P_IPV6)) ||
+	    (fkeys.basic.ip_proto != IPPROTO_TCP &&
+	     fkeys.basic.ip_proto != IPPROTO_UDP))
+		return -EPROTONOSUPPORT;
+
+	return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys);
+}
+#endif
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_open		= hns3_nic_net_open,
 	.ndo_stop		= hns3_nic_net_stop,
@@ -1737,6 +1784,10 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
 	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer	= hns3_rx_flow_steer,
+#endif
+
 };
 
 bool hns3_is_phys_func(struct pci_dev *pdev)
@@ -2828,6 +2879,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 		return ret;
 	}
 
+	skb_record_rx_queue(skb, ring->tqp->tqp_index);
 	*out_skb = skb;
 
 	return 0;
@@ -3331,8 +3383,6 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
-			irq_set_affinity_notifier(tqp_vector->vector_irq,
-						  NULL);
 			irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
 			free_irq(tqp_vector->vector_irq, tqp_vector);
 			tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
@@ -3851,6 +3901,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_client_stop(handle);
 
+	hns3_uninit_phy(netdev);
+
 	if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
 		netdev_warn(netdev, "already uninitialized\n");
 		goto out_netdev_free;
@@ -3860,8 +3912,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_force_clear_all_rx_ring(handle);
 
-	hns3_uninit_phy(netdev);
-
 	hns3_nic_uninit_vector_data(priv);
 
 	ret = hns3_nic_dealloc_vector_data(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index c14480f9b625..408efd55ba48 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -145,7 +145,7 @@ enum hns3_nic_state {
 #define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
 #define HNS3_RXD_LKBK_B				15
 #define HNS3_RXD_GRO_SIZE_S			16
-#define HNS3_RXD_GRO_SIZE_M			(0x3ff << HNS3_RXD_GRO_SIZE_S)
+#define HNS3_RXD_GRO_SIZE_M			(0x3fff << HNS3_RXD_GRO_SIZE_S)
 
 #define HNS3_TXD_L3T_S				0
 #define HNS3_TXD_L3T_M				(0x3 << HNS3_TXD_L3T_S)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index fbd904e3077c..e5329053842b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -110,8 +110,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+				ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
 	} else {
@@ -120,8 +119,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
 				upper_32_bits(dma));
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
-				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
-				HCLGE_NIC_CMQ_ENABLE);
+				ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
 		hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d79a209b80f6..61cb10dc2b9b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -243,6 +243,9 @@ enum hclge_opcode_type {
 
 	/* NCL config command */
 	HCLGE_OPC_QUERY_NCL_CONFIG	= 0x7011,
+	/* M7 stats command */
+	HCLGE_OPC_M7_STATS_BD		= 0x7012,
+	HCLGE_OPC_M7_STATS_INFO		= 0x7013,
 
 	/* SFP command */
 	HCLGE_OPC_GET_SFP_INFO		= 0x7104,
@@ -970,6 +973,11 @@ struct hclge_fd_ad_config_cmd {
 	u8 rsv2[8];
 };
 
+struct hclge_get_m7_bd_cmd {
+	__le32 bd_num;
+	u8 rsv[20];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index a9ffb57c4607..ed1f533d4082 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -921,6 +921,61 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 		 hdev->rst_stats.reset_cnt);
 }
 
+void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+{
+	struct hclge_desc *desc_src, *desc_tmp;
+	struct hclge_get_m7_bd_cmd *req;
+	struct hclge_desc desc;
+	u32 bd_num, buf_len;
+	int ret, i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true);
+
+	req = (struct hclge_get_m7_bd_cmd *)desc.data;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics bd number failed, ret=%d\n",
+			ret);
+		return;
+	}
+
+	bd_num = le32_to_cpu(req->bd_num);
+
+	buf_len	 = sizeof(struct hclge_desc) * bd_num;
+	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_src) {
+		dev_err(&hdev->pdev->dev,
+			"allocate desc for get_m7_stats failed\n");
+		return;
+	}
+
+	desc_tmp = desc_src;
+	ret  = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num,
+				  HCLGE_OPC_M7_STATS_INFO);
+	if (ret) {
+		kfree(desc_src);
+		dev_err(&hdev->pdev->dev,
+			"get firmware statistics failed, ret=%d\n", ret);
+		return;
+	}
+
+	for (i = 0; i < bd_num; i++) {
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[0]),
+			 le32_to_cpu(desc_tmp->data[1]),
+			 le32_to_cpu(desc_tmp->data[2]));
+		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
+			 le32_to_cpu(desc_tmp->data[3]),
+			 le32_to_cpu(desc_tmp->data[4]),
+			 le32_to_cpu(desc_tmp->data[5]));
+
+		desc_tmp++;
+	}
+
+	kfree(desc_src);
+}
+
 /* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
  * @hdev: pointer to struct hclge_dev
  * @cmd_buf: string that contains offset and length
@@ -1029,6 +1084,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
 		hclge_dbg_dump_reg_cmd(hdev, cmd_buf);
 	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
 		hclge_dbg_dump_rst_info(hdev);
+	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
+		hclge_dbg_get_m7_stats_info(hdev);
 	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
 		hclge_dbg_dump_ncl_config(hdev,
 					  &cmd_buf[sizeof("dump ncl_config")]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d3b1f8cb1155..0545f3890d1c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -35,6 +35,8 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
 static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 			       u16 *allocated_size, bool is_alloc);
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -1226,8 +1228,10 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->tm_info.hw_pfc_map = 0;
 	hdev->wanted_umv_size = cfg.umv_space;
 
-	if (hnae3_dev_fd_supported(hdev))
+	if (hnae3_dev_fd_supported(hdev)) {
 		hdev->fd_en = true;
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+	}
 
 	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
 	if (ret) {
@@ -2423,7 +2427,8 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
 
 static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
 		schedule_work(&hdev->rst_service_task);
 }
 
@@ -2508,6 +2513,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 
 static void hclge_update_port_capability(struct hclge_mac *mac)
 {
+	/* update fec ability by speed */
+	hclge_convert_setting_fec(mac);
+
 	/* firmware can not identify back plane type, the media type
 	 * read from configuration can help deal it
 	 */
@@ -2580,6 +2588,10 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
 		mac->speed_ability = le32_to_cpu(resp->speed_ability);
 		mac->autoneg = resp->autoneg;
 		mac->support_autoneg = resp->autoneg_ability;
+		if (!resp->active_fec)
+			mac->fec_mode = 0;
+		else
+			mac->fec_mode = BIT(resp->active_fec);
 	} else {
 		mac->speed_type = QUERY_SFP_SPEED;
 	}
@@ -2645,6 +2657,7 @@ static void hclge_service_timer(struct timer_list *t)
 
 	mod_timer(&hdev->service_timer, jiffies + HZ);
 	hdev->hw_stats.stats_timer++;
+	hdev->fd_arfs_expire_timer++;
 	hclge_task_schedule(hdev);
 }
 
@@ -2861,6 +2874,10 @@ int hclge_notify_client(struct hclge_dev *hdev,
 	struct hnae3_client *client = hdev->nic_client;
 	u16 i;
 
+	if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) ||
+	    !client)
+		return 0;
+
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
@@ -2886,7 +2903,8 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
 	int ret = 0;
 	u16 i;
 
-	if (!client)
+	if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) ||
+	    !client)
 		return 0;
 
 	if (!client->ops->reset_notify)
@@ -3180,6 +3198,8 @@ static int hclge_reset_prepare_down(struct hclge_dev *hdev)
 
 static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 {
+#define HCLGE_RESET_SYNC_TIME 100
+
 	u32 reg_val;
 	int ret = 0;
 
@@ -3188,7 +3208,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		/* There is no mechanism for PF to know if VF has stopped IO
 		 * for now, just wait 100 ms for VF to stop IO
 		 */
-		msleep(100);
+		msleep(HCLGE_RESET_SYNC_TIME);
 		ret = hclge_func_reset_cmd(hdev, 0);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -3208,7 +3228,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		/* There is no mechanism for PF to know if VF has stopped IO
 		 * for now, just wait 100 ms for VF to stop IO
 		 */
-		msleep(100);
+		msleep(HCLGE_RESET_SYNC_TIME);
 		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
 		hdev->rst_stats.flr_rst_cnt++;
@@ -3222,6 +3242,10 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		break;
 	}
 
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGE_RESET_SYNC_TIME);
+	hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG,
+			HCLGE_NIC_CMQ_ENABLE);
 	dev_info(&hdev->pdev->dev, "prepare wait ok\n");
 
 	return ret;
@@ -3521,6 +3545,10 @@ static void hclge_service_task(struct work_struct *work)
 	hclge_update_port_info(hdev);
 	hclge_update_link_status(hdev);
 	hclge_update_vport_alive(hdev);
+	if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+		hclge_rfs_filter_expire(hdev);
+		hdev->fd_arfs_expire_timer = 0;
+	}
 	hclge_service_complete(hdev);
 }
 
@@ -4906,14 +4934,18 @@ static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
 	struct hclge_fd_rule *rule = NULL;
 	struct hlist_node *node2;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
 		if (rule->location >= location)
 			break;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return  rule && rule->location == location;
 }
 
+/* make sure being called after lock up with fd_rule_lock */
 static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 				     struct hclge_fd_rule *new_rule,
 				     u16 location,
@@ -4937,9 +4969,13 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 		kfree(rule);
 		hdev->hclge_fd_rule_num--;
 
-		if (!is_add)
-			return 0;
+		if (!is_add) {
+			if (!hdev->hclge_fd_rule_num)
+				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+			clear_bit(location, hdev->fd_bmap);
 
+			return 0;
+		}
 	} else if (!is_add) {
 		dev_err(&hdev->pdev->dev,
 			"delete fail, rule %d is inexistent\n",
@@ -4954,7 +4990,9 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 	else
 		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
 
+	set_bit(location, hdev->fd_bmap);
 	hdev->hclge_fd_rule_num++;
+	hdev->fd_active_type = new_rule->rule_type;
 
 	return 0;
 }
@@ -5112,6 +5150,36 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
+/* make sure being called after lock up with fd_rule_lock */
+static int hclge_fd_config_rule(struct hclge_dev *hdev,
+				struct hclge_fd_rule *rule)
+{
+	int ret;
+
+	if (!rule) {
+		dev_err(&hdev->pdev->dev,
+			"The flow director rule is NULL\n");
+		return -EINVAL;
+	}
+
+	/* it will never fail here, so needn't to check return value */
+	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
+
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		goto clear_rule;
+
+	return 0;
+
+clear_rule:
+	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+	return ret;
+}
+
 static int hclge_add_fd_entry(struct hnae3_handle *handle,
 			      struct ethtool_rxnfc *cmd)
 {
@@ -5174,8 +5242,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 		return -ENOMEM;
 
 	ret = hclge_fd_get_tuple(hdev, fs, rule);
-	if (ret)
-		goto free_rule;
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
 
 	rule->flow_type = fs->flow_type;
 
@@ -5184,24 +5254,19 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	rule->vf_id = dst_vport_id;
 	rule->queue_id = q_index;
 	rule->action = action;
+	rule->rule_type = HCLGE_FD_EP_ACTIVE;
 
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret)
-		goto free_rule;
+	/* to avoid rule conflict, when user configure rule by ethtool,
+	 * we need to clear all arfs rules
+	 */
+	hclge_clear_arfs_rules(handle);
 
-	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret)
-		goto free_rule;
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_config_rule(hdev, rule);
 
-	ret = hclge_fd_update_rule_list(hdev, rule, fs->location, true);
-	if (ret)
-		goto free_rule;
+	spin_unlock_bh(&hdev->fd_rule_lock);
 
 	return ret;
-
-free_rule:
-	kfree(rule);
-	return ret;
 }
 
 static int hclge_del_fd_entry(struct hnae3_handle *handle,
@@ -5232,8 +5297,12 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 	if (ret)
 		return ret;
 
-	return hclge_fd_update_rule_list(hdev, NULL, fs->location,
-					 false);
+	spin_lock_bh(&hdev->fd_rule_lock);
+	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return ret;
 }
 
 static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
@@ -5243,25 +5312,30 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
+	u16 location;
 
 	if (!hnae3_dev_fd_supported(hdev))
 		return;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+	for_each_set_bit(location, hdev->fd_bmap,
+			 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
+				     NULL, false);
+
 	if (clear_list) {
 		hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
 					  rule_node) {
-			hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-					     rule->location, NULL, false);
 			hlist_del(&rule->rule_node);
 			kfree(rule);
-			hdev->hclge_fd_rule_num--;
 		}
-	} else {
-		hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
-					  rule_node)
-			hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-					     rule->location, NULL, false);
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+		hdev->hclge_fd_rule_num = 0;
+		bitmap_zero(hdev->fd_bmap,
+			    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
 	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -5283,6 +5357,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 	if (!hdev->fd_en)
 		return 0;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
 		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 		if (!ret)
@@ -5292,11 +5367,18 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 			dev_warn(&hdev->pdev->dev,
 				 "Restore rule %d failed, remove it\n",
 				 rule->location);
+			clear_bit(rule->location, hdev->fd_bmap);
 			hlist_del(&rule->rule_node);
 			kfree(rule);
 			hdev->hclge_fd_rule_num--;
 		}
 	}
+
+	if (hdev->hclge_fd_rule_num)
+		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return 0;
 }
 
@@ -5329,13 +5411,18 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+
 	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
 		if (rule->location >= fs->location)
 			break;
 	}
 
-	if (!rule || fs->location != rule->location)
+	if (!rule || fs->location != rule->location) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
 		return -ENOENT;
+	}
 
 	fs->flow_type = rule->flow_type;
 	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
@@ -5474,6 +5561,7 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 
 		break;
 	default:
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -EOPNOTSUPP;
 	}
 
@@ -5505,6 +5593,8 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 		fs->ring_cookie |= vf_id;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	return 0;
 }
 
@@ -5522,20 +5612,208 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
 
 	cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
 
+	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node2,
 				  &hdev->fd_rule_list, rule_node) {
-		if (cnt == cmd->rule_cnt)
+		if (cnt == cmd->rule_cnt) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
 			return -EMSGSIZE;
+		}
 
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
 
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
 	cmd->rule_cnt = cnt;
 
 	return 0;
 }
 
+static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
+				     struct hclge_fd_rule_tuples *tuples)
+{
+	tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto);
+	tuples->ip_proto = fkeys->basic.ip_proto;
+	tuples->dst_port = be16_to_cpu(fkeys->ports.dst);
+
+	if (fkeys->basic.n_proto == htons(ETH_P_IP)) {
+		tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src);
+		tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst);
+	} else {
+		memcpy(tuples->src_ip,
+		       fkeys->addrs.v6addrs.src.in6_u.u6_addr32,
+		       sizeof(tuples->src_ip));
+		memcpy(tuples->dst_ip,
+		       fkeys->addrs.v6addrs.dst.in6_u.u6_addr32,
+		       sizeof(tuples->dst_ip));
+	}
+}
+
+/* traverse all rules, check whether an existed rule has the same tuples */
+static struct hclge_fd_rule *
+hclge_fd_search_flow_keys(struct hclge_dev *hdev,
+			  const struct hclge_fd_rule_tuples *tuples)
+{
+	struct hclge_fd_rule *rule = NULL;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (!memcmp(tuples, &rule->tuples, sizeof(*tuples)))
+			return rule;
+	}
+
+	return NULL;
+}
+
+static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples,
+				     struct hclge_fd_rule *rule)
+{
+	rule->unused_tuple = BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+			     BIT(INNER_VLAN_TAG_FST) | BIT(INNER_IP_TOS) |
+			     BIT(INNER_SRC_PORT);
+	rule->action = 0;
+	rule->vf_id = 0;
+	rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
+	if (tuples->ether_proto == ETH_P_IP) {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V4_FLOW;
+		else
+			rule->flow_type = UDP_V4_FLOW;
+	} else {
+		if (tuples->ip_proto == IPPROTO_TCP)
+			rule->flow_type = TCP_V6_FLOW;
+		else
+			rule->flow_type = UDP_V6_FLOW;
+	}
+	memcpy(&rule->tuples, tuples, sizeof(rule->tuples));
+	memset(&rule->tuples_mask, 0xFF, sizeof(rule->tuples_mask));
+}
+
+static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
+				      u16 flow_id, struct flow_keys *fkeys)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_fd_rule_tuples new_tuples;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	u16 tmp_queue_id;
+	u16 bit_id;
+	int ret;
+
+	if (!hnae3_dev_fd_supported(hdev))
+		return -EOPNOTSUPP;
+
+	memset(&new_tuples, 0, sizeof(new_tuples));
+	hclge_fd_get_flow_tuples(fkeys, &new_tuples);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	/* when there is already fd rule existed add by user,
+	 * arfs should not work
+	 */
+	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		return -EOPNOTSUPP;
+	}
+
+	/* check is there flow director filter existed for this flow,
+	 * if not, create a new filter for it;
+	 * if filter exist with different queue id, modify the filter;
+	 * if filter exist with same queue id, do nothing
+	 */
+	rule = hclge_fd_search_flow_keys(hdev, &new_tuples);
+	if (!rule) {
+		bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM);
+		if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOSPC;
+		}
+
+		rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+		if (!rule) {
+			spin_unlock_bh(&hdev->fd_rule_lock);
+
+			return -ENOMEM;
+		}
+
+		set_bit(bit_id, hdev->fd_bmap);
+		rule->location = bit_id;
+		rule->flow_id = flow_id;
+		rule->queue_id = queue_id;
+		hclge_fd_build_arfs_rule(&new_tuples, rule);
+		ret = hclge_fd_config_rule(hdev, rule);
+
+		spin_unlock_bh(&hdev->fd_rule_lock);
+
+		if (ret)
+			return ret;
+
+		return rule->location;
+	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	if (rule->queue_id == queue_id)
+		return rule->location;
+
+	tmp_queue_id = rule->queue_id;
+	rule->queue_id = queue_id;
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret) {
+		rule->queue_id = tmp_queue_id;
+		return ret;
+	}
+
+	return rule->location;
+}
+
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	HLIST_HEAD(del_list);
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
+		spin_unlock_bh(&hdev->fd_rule_lock);
+		return;
+	}
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (rps_may_expire_flow(handle->netdev, rule->queue_id,
+					rule->flow_id, rule->location)) {
+			hlist_del_init(&rule->rule_node);
+			hlist_add_head(&rule->rule_node, &del_list);
+			hdev->hclge_fd_rule_num--;
+			clear_bit(rule->location, hdev->fd_bmap);
+		}
+	}
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
+		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+				     rule->location, NULL, false);
+		kfree(rule);
+	}
+#endif
+}
+
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
+		hclge_del_all_fd_entries(handle, true);
+#endif
+}
+
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5565,10 +5843,12 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	bool clear;
 
 	hdev->fd_en = enable;
+	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false;
 	if (!enable)
-		hclge_del_all_fd_entries(handle, false);
+		hclge_del_all_fd_entries(handle, clear);
 	else
 		hclge_restore_fd_entries(handle);
 }
@@ -5838,6 +6118,8 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
+	hclge_clear_arfs_rules(handle);
+
 	/* If it is not PF reset, the firmware will disable the MAC,
 	 * so it only need to stop phy here.
 	 */
@@ -7894,6 +8176,52 @@ static void hclge_info_show(struct hclge_dev *hdev)
 	dev_info(dev, "PF info end.\n");
 }
 
+static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					  struct hclge_vport *vport)
+{
+	struct hnae3_client *client = vport->nic.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int ret;
+
+	ret = client->ops->init_instance(&vport->nic);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	if (netif_msg_drv(&hdev->vport->nic))
+		hclge_info_show(hdev);
+
+	return 0;
+}
+
+static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					   struct hclge_vport *vport)
+{
+	struct hnae3_client *client = vport->roce.client;
+	struct hclge_dev *hdev = ae_dev->priv;
+	int ret;
+
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
+
+	client = hdev->roce_client;
+	ret = hclge_init_roce_base_info(vport);
+	if (ret)
+		return ret;
+
+	ret = client->ops->init_instance(&vport->roce);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+}
+
 static int hclge_init_client_instance(struct hnae3_client *client,
 				      struct hnae3_ae_dev *ae_dev)
 {
@@ -7909,30 +8237,13 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
 			hdev->nic_client = client;
 			vport->nic.client = client;
-			ret = client->ops->init_instance(&vport->nic);
+			ret = hclge_init_nic_client_instance(ae_dev, vport);
 			if (ret)
 				goto clear_nic;
 
-			hnae3_set_client_init_flag(client, ae_dev, 1);
-
-			if (netif_msg_drv(&hdev->vport->nic))
-				hclge_info_show(hdev);
-
-			if (hdev->roce_client &&
-			    hnae3_dev_roce_supported(hdev)) {
-				struct hnae3_client *rc = hdev->roce_client;
-
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					goto clear_roce;
-
-				ret = rc->ops->init_instance(&vport->roce);
-				if (ret)
-					goto clear_roce;
-
-				hnae3_set_client_init_flag(hdev->roce_client,
-							   ae_dev, 1);
-			}
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
+			if (ret)
+				goto clear_roce;
 
 			break;
 		case HNAE3_CLIENT_UNIC:
@@ -7952,17 +8263,9 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				vport->roce.client = client;
 			}
 
-			if (hdev->roce_client && hdev->nic_client) {
-				ret = hclge_init_roce_base_info(vport);
-				if (ret)
-					goto clear_roce;
-
-				ret = client->ops->init_instance(&vport->roce);
-				if (ret)
-					goto clear_roce;
-
-				hnae3_set_client_init_flag(client, ae_dev, 1);
-			}
+			ret = hclge_init_roce_client_instance(ae_dev, vport);
+			if (ret)
+				goto clear_roce;
 
 			break;
 		default:
@@ -7992,6 +8295,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport = &hdev->vport[i];
 		if (hdev->roce_client) {
+			clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
 			hdev->roce_client->ops->uninit_instance(&vport->roce,
 								0);
 			hdev->roce_client = NULL;
@@ -8000,6 +8304,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 		if (client->type == HNAE3_CLIENT_ROCE)
 			return;
 		if (hdev->nic_client && client->ops->uninit_instance) {
+			clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
 			client->ops->uninit_instance(&vport->nic, 0);
 			hdev->nic_client = NULL;
 			vport->nic.client = NULL;
@@ -8081,6 +8386,7 @@ static void hclge_state_init(struct hclge_dev *hdev)
 static void hclge_state_uninit(struct hclge_dev *hdev)
 {
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+	set_bit(HCLGE_STATE_REMOVING, &hdev->state);
 
 	if (hdev->service_timer.function)
 		del_timer_sync(&hdev->service_timer);
@@ -8143,6 +8449,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 	mutex_init(&hdev->vport_lock);
 	mutex_init(&hdev->vport_cfg_mutex);
+	spin_lock_init(&hdev->fd_rule_lock);
 
 	ret = hclge_pci_init(hdev);
 	if (ret) {
@@ -8908,6 +9215,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_fd_all_rules = hclge_get_all_rules,
 	.restore_fd_rules = hclge_restore_fd_entries,
 	.enable_fd = hclge_enable_fd,
+	.add_arfs_entry = hclge_add_fd_entry_by_arfs,
 	.dbg_run_cmd = hclge_dbg_run_cmd,
 	.handle_hw_ras_error = hclge_handle_hw_ras_error,
 	.get_hw_reset_stat = hclge_get_hw_reset_stat,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index dd06b11187b0..2b3bc95ccbdf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -201,6 +201,8 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_DOWN,
 	HCLGE_STATE_DISABLED,
 	HCLGE_STATE_REMOVING,
+	HCLGE_STATE_NIC_REGISTERED,
+	HCLGE_STATE_ROCE_REGISTERED,
 	HCLGE_STATE_SERVICE_INITED,
 	HCLGE_STATE_SERVICE_SCHED,
 	HCLGE_STATE_RST_SERVICE_SCHED,
@@ -578,6 +580,16 @@ static const struct key_info tuple_key_info[] = {
 #define MAX_KEY_BYTES	(MAX_KEY_DWORDS * 4)
 #define MAX_META_DATA_LENGTH	32
 
+/* assigned by firmware, the real filter number for each pf may be less */
+#define MAX_FD_FILTER_NUM	4096
+#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL	5
+
+enum HCLGE_FD_ACTIVE_RULE_TYPE {
+	HCLGE_FD_RULE_NONE,
+	HCLGE_FD_ARFS_ACTIVE,
+	HCLGE_FD_EP_ACTIVE,
+};
+
 enum HCLGE_FD_PACKET_TYPE {
 	NIC_PACKET,
 	ROCE_PACKET,
@@ -630,6 +642,8 @@ struct hclge_fd_rule {
 	u16 vf_id;
 	u16 queue_id;
 	u16 location;
+	u16 flow_id;	/* only used for arfs */
+	enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
 };
 
 struct hclge_fd_ad_data {
@@ -809,7 +823,11 @@ struct hclge_dev {
 
 	struct hclge_fd_cfg fd_cfg;
 	struct hlist_head fd_rule_list;
+	spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
 	u16 hclge_fd_rule_num;
+	u16 fd_arfs_expire_timer;
+	unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
+	enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
 	u8 fd_en;
 
 	u16 wanted_umv_size;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0e04e63f2a94..d20f01720719 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -192,12 +192,10 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
 		return ret;
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain);
-	if (ret)
-		return ret;
 
 	hclge_free_vector_ring_chain(&ring_chain);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 71f356fc2446..e1588c0e8bb9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -98,7 +98,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
 
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
@@ -110,7 +109,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
-		reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
 
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5d53467ee2d2..87a619db2780 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1306,6 +1306,10 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
 	struct hnae3_handle *handle = &hdev->nic;
 	int ret;
 
+	if (!test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state) ||
+	    !client)
+		return 0;
+
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
@@ -1410,6 +1414,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 
 static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 {
+#define HCLGEVF_RESET_SYNC_TIME 100
+
 	int ret = 0;
 
 	switch (hdev->reset_type) {
@@ -1427,7 +1433,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 	}
 
 	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
-
+	/* inform hardware that preparatory work is done */
+	msleep(HCLGEVF_RESET_SYNC_TIME);
+	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+			  HCLGEVF_NIC_CMQ_ENABLE);
 	dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n",
 		 hdev->reset_type, ret);
 
@@ -1612,7 +1621,8 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) {
+	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
 		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->rst_service_task);
 	}
@@ -2123,6 +2133,7 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 {
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+	set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
 
 	if (hdev->keep_alive_timer.function)
 		del_timer_sync(&hdev->keep_alive_timer);
@@ -2249,6 +2260,48 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev)
 	dev_info(dev, "VF info end.\n");
 }
 
+static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+					    struct hnae3_client *client)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	ret = client->ops->init_instance(&hdev->nic);
+	if (ret)
+		return ret;
+
+	set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	if (netif_msg_drv(&hdev->nic))
+		hclgevf_info_show(hdev);
+
+	return 0;
+}
+
+static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+					     struct hnae3_client *client)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+	    !hdev->nic_client)
+		return 0;
+
+	ret = hclgevf_init_roce_base_info(hdev);
+	if (ret)
+		return ret;
+
+	ret = client->ops->init_instance(&hdev->roce);
+	if (ret)
+		return ret;
+
+	hnae3_set_client_init_flag(client, ae_dev, 1);
+
+	return 0;
+}
+
 static int hclgevf_init_client_instance(struct hnae3_client *client,
 					struct hnae3_ae_dev *ae_dev)
 {
@@ -2260,28 +2313,15 @@ static int hclgevf_init_client_instance(struct hnae3_client *client,
 		hdev->nic_client = client;
 		hdev->nic.client = client;
 
-		ret = client->ops->init_instance(&hdev->nic);
+		ret = hclgevf_init_nic_client_instance(ae_dev, client);
 		if (ret)
 			goto clear_nic;
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
-
-		if (netif_msg_drv(&hdev->nic))
-			hclgevf_info_show(hdev);
-
-		if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
-			struct hnae3_client *rc = hdev->roce_client;
-
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				goto clear_roce;
-			ret = rc->ops->init_instance(&hdev->roce);
-			if (ret)
-				goto clear_roce;
+		ret = hclgevf_init_roce_client_instance(ae_dev,
+							hdev->roce_client);
+		if (ret)
+			goto clear_roce;
 
-			hnae3_set_client_init_flag(hdev->roce_client, ae_dev,
-						   1);
-		}
 		break;
 	case HNAE3_CLIENT_UNIC:
 		hdev->nic_client = client;
@@ -2299,17 +2339,10 @@ static int hclgevf_init_client_instance(struct hnae3_client *client,
 			hdev->roce.client = client;
 		}
 
-		if (hdev->roce_client && hdev->nic_client) {
-			ret = hclgevf_init_roce_base_info(hdev);
-			if (ret)
-				goto clear_roce;
-
-			ret = client->ops->init_instance(&hdev->roce);
-			if (ret)
-				goto clear_roce;
-		}
+		ret = hclgevf_init_roce_client_instance(ae_dev, client);
+		if (ret)
+			goto clear_roce;
 
-		hnae3_set_client_init_flag(client, ae_dev, 1);
 		break;
 	default:
 		return -EINVAL;
@@ -2342,6 +2375,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
 	/* un-init nic/unic, if this was not called by roce client */
 	if (client->ops->uninit_instance && hdev->nic_client &&
 	    client->type != HNAE3_CLIENT_ROCE) {
+		clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
 		client->ops->uninit_instance(&hdev->nic, 0);
 		hdev->nic_client = NULL;
 		hdev->nic.client = NULL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index cc52f54f8c08..a7fbd38c1492 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -130,6 +130,8 @@ enum hclgevf_states {
 	HCLGEVF_STATE_DOWN,
 	HCLGEVF_STATE_DISABLED,
 	HCLGEVF_STATE_IRQ_INITED,
+	HCLGEVF_STATE_REMOVING,
+	HCLGEVF_STATE_NIC_REGISTERED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_SERVICE_SCHED,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index e64bc664f687..cfd3f4232cac 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -724,7 +724,6 @@ static void set_rx_mode(struct work_struct *work)
 {
 	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
 	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
-	struct netdev_hw_addr *ha;
 
 	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
 
@@ -732,9 +731,6 @@ static void set_rx_mode(struct work_struct *work)
 
 	__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
 	__dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
-
-	netdev_for_each_mc_addr(ha, nic_dev->netdev)
-		add_mac_addr(nic_dev->netdev, ha->addr);
 }
 
 static void hinic_set_rx_mode(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0e09bede42a2..b081a1ef6859 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4208,7 +4208,7 @@ void e1000e_up(struct e1000_adapter *adapter)
 		e1000_configure_msix(adapter);
 	e1000_irq_enable(adapter);
 
-	netif_start_queue(adapter->netdev);
+	/* Tx queue started by watchdog timer when link is up */
 
 	e1000e_trigger_lsc(adapter);
 }
@@ -4606,6 +4606,7 @@ int e1000e_open(struct net_device *netdev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
 
 	/* allocate transmit descriptors */
 	err = e1000e_setup_tx_resources(adapter->tx_ring);
@@ -4666,7 +4667,6 @@ int e1000e_open(struct net_device *netdev)
 	e1000_irq_enable(adapter);
 
 	adapter->tx_hang_recheck = false;
-	netif_start_queue(netdev);
 
 	hw->mac.get_link_status = true;
 	pm_runtime_put(&pdev->dev);
@@ -5288,6 +5288,7 @@ static void e1000_watchdog_task(struct work_struct *work)
 			if (phy->ops.cfg_on_link_up)
 				phy->ops.cfg_on_link_up(hw);
 
+			netif_wake_queue(netdev);
 			netif_carrier_on(netdev);
 
 			if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5301,6 +5302,7 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* Link status message must follow this format */
 			pr_info("%s NIC Link is Down\n", adapter->netdev->name);
 			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
 			if (!test_bit(__E1000_DOWN, &adapter->state))
 				mod_timer(&adapter->phy_info_timer,
 					  round_jiffies(jiffies + 2 * HZ));
@@ -5308,13 +5310,8 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* 8000ES2LAN requires a Rx packet buffer work-around
 			 * on link down event; reset the controller to flush
 			 * the Rx packet buffer.
-			 *
-			 * If the link is lost the controller stops DMA, but
-			 * if there is queued Tx work it cannot be done.  So
-			 * reset the controller to flush the Tx packet buffers.
 			 */
-			if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
-			    e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
+			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
 				adapter->flags |= FLAG_RESTART_NOW;
 			else
 				pm_schedule_suspend(netdev->dev.parent,
@@ -5337,6 +5334,14 @@ link_up:
 	adapter->gotc_old = adapter->stats.gotc;
 	spin_unlock(&adapter->stats64_lock);
 
+	/* If the link is lost the controller stops DMA, but
+	 * if there is queued Tx work it cannot be done.  So
+	 * reset the controller to flush the Tx packet buffers.
+	 */
+	if (!netif_carrier_ok(netdev) &&
+	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
+		adapter->flags |= FLAG_RESTART_NOW;
+
 	/* If reset is necessary, do it outside of interrupt context. */
 	if (adapter->flags & FLAG_RESTART_NOW) {
 		schedule_work(&adapter->reset_task);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 792e6e42030e..9ee6b55553c0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -44,15 +44,22 @@
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
 #define ICE_REQ_DESC_MULTIPLE	32
-#define ICE_MIN_NUM_DESC	ICE_REQ_DESC_MULTIPLE
+#define ICE_MIN_NUM_DESC	64
 #define ICE_MAX_NUM_DESC	8160
-/* set default number of Rx/Tx descriptors to the minimum between
- * ICE_MAX_NUM_DESC and the number of descriptors to fill up an entire page
+#define ICE_DFLT_MIN_RX_DESC	512
+/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the
+ * number of descriptors to fill up an entire page is greater than or equal to
+ * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to
+ * ICE_DFLT_MIN_RX_DESC
+ */
+#define ICE_DFLT_NUM_RX_DESC \
+	min_t(u16, ICE_MAX_NUM_DESC, \
+	      max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \
+			       ICE_REQ_DESC_MULTIPLE), \
+		    ICE_DFLT_MIN_RX_DESC))
+/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC
+ * and the number of descriptors to fill up an entire page
  */
-#define ICE_DFLT_NUM_RX_DESC	min_t(u16, ICE_MAX_NUM_DESC, \
-				      ALIGN(PAGE_SIZE / \
-					    sizeof(union ice_32byte_rx_desc), \
-					    ICE_REQ_DESC_MULTIPLE))
 #define ICE_DFLT_NUM_TX_DESC	min_t(u16, ICE_MAX_NUM_DESC, \
 				      ALIGN(PAGE_SIZE / \
 					    sizeof(struct ice_tx_desc), \
@@ -160,7 +167,7 @@ struct ice_tc_cfg {
 
 struct ice_res_tracker {
 	u16 num_entries;
-	u16 search_hint;
+	u16 end;
 	u16 list[1];
 };
 
@@ -182,6 +189,7 @@ struct ice_sw {
 };
 
 enum ice_state {
+	__ICE_TESTING,
 	__ICE_DOWN,
 	__ICE_NEEDS_RESTART,
 	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
@@ -244,8 +252,7 @@ struct ice_vsi {
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 	int num_q_vectors;
-	int sw_base_vector;		/* Irq base for OS reserved vectors */
-	int hw_base_vector;		/* HW (absolute) index of a vector */
+	int base_vector;		/* IRQ base for OS reserved vectors */
 	enum ice_vsi_type type;
 	u16 vsi_num;			/* HW (absolute) index of this VSI */
 	u16 idx;			/* software index in pf->vsi[] */
@@ -277,10 +284,10 @@ struct ice_vsi {
 	struct list_head tmp_sync_list;		/* MAC filters to be synced */
 	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
 
-	u8 irqs_ready;
-	u8 current_isup;		 /* Sync 'link up' logging */
-	u8 stat_offsets_loaded;
-	u8 vlan_ena;
+	u8 irqs_ready:1;
+	u8 current_isup:1;		 /* Sync 'link up' logging */
+	u8 stat_offsets_loaded:1;
+	u8 vlan_ena:1;
 
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -330,7 +337,7 @@ enum ice_pf_flags {
 	ICE_FLAG_DCB_CAPABLE,
 	ICE_FLAG_DCB_ENA,
 	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
-	ICE_FLAG_DISABLE_FW_LLDP,
+	ICE_FLAG_ENABLE_FW_LLDP,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
@@ -340,10 +347,12 @@ struct ice_pf {
 
 	/* OS reserved IRQ details */
 	struct msix_entry *msix_entries;
-	struct ice_res_tracker *sw_irq_tracker;
-
-	/* HW reserved Interrupts for this PF */
-	struct ice_res_tracker *hw_irq_tracker;
+	struct ice_res_tracker *irq_tracker;
+	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
+	 * MSIX vectors allowed on this PF.
+	 */
+	u16 sriov_base_vector;
 
 	struct ice_vsi **vsi;		/* VSIs created by the driver */
 	struct ice_sw *first_sw;	/* first switch created by firmware */
@@ -365,10 +374,8 @@ struct ice_pf {
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
-	u32 sw_oicr_idx;	/* Other interrupt cause SW vector index */
+	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
 	u32 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
-	u32 hw_oicr_idx;	/* Other interrupt cause vector HW index */
-	u32 num_avail_hw_msix;	/* remaining HW MSIX vectors left unclaimed */
 	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
 	u16 num_lan_tx;		/* num LAN Tx queues setup */
 	u16 num_lan_rx;		/* num LAN Rx queues setup */
@@ -384,7 +391,7 @@ struct ice_pf {
 	struct ice_hw_port_stats stats;
 	struct ice_hw_port_stats stats_prev;
 	struct ice_hw hw;
-	u8 stat_prev_loaded;	/* has previous stats been loaded */
+	u8 stat_prev_loaded:1; /* has previous stats been loaded */
 #ifdef CONFIG_DCB
 	u16 dcbx_cap;
 #endif /* CONFIG_DCB */
@@ -392,6 +399,7 @@ struct ice_pf {
 	unsigned long tx_timeout_last_recovery;
 	u32 tx_timeout_recovery_level;
 	char int_name[ICE_INT_NAME_STR_LEN];
+	u32 sw_int_count;
 };
 
 struct ice_netdev_priv {
@@ -409,7 +417,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 		    struct ice_q_vector *q_vector)
 {
 	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
-				((struct ice_pf *)hw->back)->hw_oicr_idx;
+				((struct ice_pf *)hw->back)->oicr_idx;
 	int itr = ICE_ITR_NONE;
 	u32 val;
 
@@ -444,17 +452,22 @@ ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 	return NULL;
 }
 
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
 int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-void ice_napi_del(struct ice_vsi *vsi);
 #ifdef CONFIG_DCB
 int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
 void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
 #endif /* CONFIG_DCB */
+int ice_open(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
 
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 6ef083002f5b..765e3c2ed045 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -35,8 +35,8 @@ struct ice_aqc_get_ver {
 
 /* Queue Shutdown (direct 0x0003) */
 struct ice_aqc_q_shutdown {
-#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
 	__le32 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
 	u8 reserved[12];
 };
 
@@ -120,11 +120,9 @@ struct ice_aqc_manage_mac_read {
 #define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
 #define ICE_AQC_MAN_MAC_READ_S			4
 #define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
-	u8 lport_num;
-	u8 lport_num_valid;
-#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID	BIT(0)
+	u8 rsvd[2];
 	u8 num_addr; /* Used in response */
-	u8 reserved[3];
+	u8 rsvd1[3];
 	__le32 addr_high;
 	__le32 addr_low;
 };
@@ -140,7 +138,7 @@ struct ice_aqc_manage_mac_read_resp {
 
 /* Manage MAC address, write command - direct (0x0108) */
 struct ice_aqc_manage_mac_write {
-	u8 port_num;
+	u8 rsvd;
 	u8 flags;
 #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
 #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
@@ -920,6 +918,8 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_EN_LINK				BIT(3)
 #define ICE_AQC_PHY_AN_MODE				BIT(4)
 #define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC				BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK				ICE_M(0xff, 0)
 	u8 low_power_ctrl;
 #define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
 	__le16 eee_cap;
@@ -932,6 +932,7 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
 	__le16 eeer_value;
 	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 phy_fw_ver[8];
 	u8 link_fec_options;
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
 #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
@@ -940,6 +941,8 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
 #define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
 #define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+#define ICE_AQC_PHY_FEC_MASK				ICE_M(0xdf, 0)
+	u8 rsvd1;	/* Byte 35 reserved */
 	u8 extended_compliance_code;
 #define ICE_MODULE_TYPE_TOTAL_BYTE			3
 	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
@@ -954,13 +957,14 @@ struct ice_aqc_get_phy_caps_data {
 #define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
 #define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
 	u8 qualified_module_count;
+	u8 rsvd2[7];	/* Bytes 47:41 reserved */
 #define ICE_AQC_QUAL_MOD_COUNT_MAX			16
 	struct {
 		u8 v_oui[3];
-		u8 rsvd1;
+		u8 rsvd3;
 		u8 v_part[16];
 		__le32 v_rev;
-		__le64 rsvd8;
+		__le64 rsvd4;
 	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
 };
 
@@ -1062,6 +1066,7 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
 #define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
 #define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+#define ICE_AQ_FEC_MASK			ICE_M(0x7, 0)
 	/* Pacing Config */
 #define ICE_AQ_CFG_PACING_S		3
 #define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
@@ -1112,6 +1117,14 @@ struct ice_aqc_set_event_mask {
 	u8	reserved1[6];
 };
 
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+	u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN		BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK		BIT(1)
+	u8 reserved[15];
+};
+
 /* Set Port Identification LED (direct, 0x06E9) */
 struct ice_aqc_set_port_id_led {
 	u8 lport_num;
@@ -1145,6 +1158,17 @@ struct ice_aqc_nvm {
 	__le32 addr_low;
 };
 
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+	u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY	BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC	BIT(1)
+	u8 rsvd;
+	__le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT	0xBABA
+	u8 rsvd2[12];
+};
+
 /**
  * Send to PF command (indirect 0x0801) ID is only used by PF
  *
@@ -1249,7 +1273,7 @@ struct ice_aqc_get_cee_dcb_cfg_resp {
 };
 
 /* Set Local LLDP MIB (indirect 0x0A08)
- * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
  */
 struct ice_aqc_lldp_set_local_mib {
 	u8 type;
@@ -1266,7 +1290,7 @@ struct ice_aqc_lldp_set_local_mib {
 };
 
 /* Stop/Start LLDP Agent (direct 0x0A09)
- * Used for stopping/starting specific LLDP agent. e.g. DCBx.
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
  * The same structure is used for the response, with the command field
  * being used as the status field.
  */
@@ -1539,6 +1563,7 @@ struct ice_aq_desc {
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_query_port_ets port_ets;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_nvm_checksum nvm_checksum;
 		struct ice_aqc_pf_vf_msg virt;
 		struct ice_aqc_lldp_get_mib lldp_get_mib;
 		struct ice_aqc_lldp_set_mib_change lldp_set_event;
@@ -1554,6 +1579,7 @@ struct ice_aq_desc {
 		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
 		struct ice_aqc_fw_logging fw_logging;
 		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_set_event_mask set_event_mask;
 		struct ice_aqc_get_link_status get_link_status;
@@ -1642,10 +1668,12 @@ enum ice_adminq_opc {
 	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
 	ice_aqc_opc_set_event_mask			= 0x0613,
+	ice_aqc_opc_set_mac_lb				= 0x0620,
 	ice_aqc_opc_set_port_id_led			= 0x06E9,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
+	ice_aqc_opc_nvm_checksum			= 0x0706,
 
 	/* PF/VF mailbox commands */
 	ice_mbx_opc_send_msg_to_pf			= 0x0801,
@@ -1671,6 +1699,7 @@ enum ice_adminq_opc {
 
 	/* debug commands */
 	ice_aqc_opc_fw_logging				= 0xFF09,
+	ice_aqc_opc_fw_logging_info			= 0xFF10,
 };
 
 #endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index da7878529929..2e0731c1e1a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -51,9 +51,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
  */
 void ice_dev_onetime_setup(struct ice_hw *hw)
 {
-	/* configure Rx - set non pxe mode */
-	wr32(hw, GLLAN_RCTL_0, 0x1);
-
 #define MBX_PF_VT_PFALLOC	0x00231E80
 	/* set VFs per PF */
 	wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
@@ -307,6 +304,8 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	hw_link_info->an_info = link_data.an_info;
 	hw_link_info->ext_info = link_data.ext_info;
 	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	hw_link_info->topo_media_conflict = link_data.topo_media_conflict;
 	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
 
 	/* update fc info */
@@ -476,6 +475,49 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
 	ICE_FW_LOG_DESC_SIZE(ICE_AQC_FW_LOG_ID_MAX)
 
 /**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_fw_logging_data *config;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 size;
+
+	size = ICE_FW_LOG_DESC_SIZE_MAX;
+	config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+	if (!config)
+		return ICE_ERR_NO_MEMORY;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+	if (!status) {
+		u16 i;
+
+		/* Save FW logging information into the HW structure */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 v, m, flgs;
+
+			v = le16_to_cpu(config->entry[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+			if (m < ICE_AQC_FW_LOG_ID_MAX)
+				hw->fw_log.evnts[m].cur = flgs;
+		}
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), config);
+
+	return status;
+}
+
+/**
  * ice_cfg_fw_log - configure FW logging
  * @hw: pointer to the HW struct
  * @enable: enable certain FW logging events if true, disable all if false
@@ -529,6 +571,11 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable)
 	    (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
 		return 0;
 
+	/* Get current FW log settings */
+	status = ice_get_fw_log_cfg(hw);
+	if (status)
+		return status;
+
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
 	cmd = &desc.params.fw_logging;
 
@@ -634,17 +681,17 @@ out:
  */
 void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
 {
-	ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg Start ]\n");
-	ice_debug_array(hw, ICE_DBG_AQ_MSG, 16, 1, (u8 *)buf,
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+	ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
 			le16_to_cpu(desc->datalen));
-	ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg End ]\n");
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
 }
 
 /**
  * ice_get_itr_intrl_gran - determine int/intrl granularity
  * @hw: pointer to the HW struct
  *
- * Determines the itr/intrl granularities based on the maximum aggregate
+ * Determines the ITR/intrl granularities based on the maximum aggregate
  * bandwidth according to the device's configuration during power-on.
  */
 static void ice_get_itr_intrl_gran(struct ice_hw *hw)
@@ -815,6 +862,10 @@ err_unroll_cqinit:
 /**
  * ice_deinit_hw - unroll initialization operations done by ice_init_hw
  * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
  */
 void ice_deinit_hw(struct ice_hw *hw)
 {
@@ -1447,6 +1498,7 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 	struct ice_hw_func_caps *func_p = NULL;
 	struct ice_hw_dev_caps *dev_p = NULL;
 	struct ice_hw_common_caps *caps;
+	char const *prefix;
 	u32 i;
 
 	if (!buf)
@@ -1457,9 +1509,11 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 	if (opc == ice_aqc_opc_list_dev_caps) {
 		dev_p = &hw->dev_caps;
 		caps = &dev_p->common_cap;
+		prefix = "dev cap";
 	} else if (opc == ice_aqc_opc_list_func_caps) {
 		func_p = &hw->func_caps;
 		caps = &func_p->common_cap;
+		prefix = "func cap";
 	} else {
 		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
 		return;
@@ -1475,28 +1529,29 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 		case ICE_AQC_CAPS_VALID_FUNCTIONS:
 			caps->valid_functions = number;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Valid Functions = %d\n",
+				  "%s: valid functions = %d\n", prefix,
 				  caps->valid_functions);
 			break;
 		case ICE_AQC_CAPS_SRIOV:
 			caps->sr_iov_1_1 = (number == 1);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: SR-IOV = %d\n", caps->sr_iov_1_1);
+				  "%s: SR-IOV = %d\n", prefix,
+				  caps->sr_iov_1_1);
 			break;
 		case ICE_AQC_CAPS_VF:
 			if (dev_p) {
 				dev_p->num_vfs_exposed = number;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VFs exposed = %d\n",
+					  "%s: VFs exposed = %d\n", prefix,
 					  dev_p->num_vfs_exposed);
 			} else if (func_p) {
 				func_p->num_allocd_vfs = number;
 				func_p->vf_base_id = logical_id;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VFs allocated = %d\n",
+					  "%s: VFs allocated = %d\n", prefix,
 					  func_p->num_allocd_vfs);
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: VF base_id = %d\n",
+					  "%s: VF base_id = %d\n", prefix,
 					  func_p->vf_base_id);
 			}
 			break;
@@ -1504,69 +1559,69 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 			if (dev_p) {
 				dev_p->num_vsi_allocd_to_host = number;
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.VSI cnt = %d\n",
+					  "%s: num VSI alloc to host = %d\n",
+					  prefix,
 					  dev_p->num_vsi_allocd_to_host);
 			} else if (func_p) {
 				func_p->guar_num_vsi =
 					ice_get_num_per_func(hw, ICE_MAX_VSI);
 				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Func.VSI cnt = %d\n",
-					  number);
+					  "%s: num guaranteed VSI (fw) = %d\n",
+					  prefix, number);
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: num guaranteed VSI = %d\n",
+					  prefix, func_p->guar_num_vsi);
 			}
 			break;
 		case ICE_AQC_CAPS_RSS:
 			caps->rss_table_size = number;
 			caps->rss_table_entry_width = logical_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table size = %d\n",
+				  "%s: RSS table size = %d\n", prefix,
 				  caps->rss_table_size);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: RSS table width = %d\n",
+				  "%s: RSS table width = %d\n", prefix,
 				  caps->rss_table_entry_width);
 			break;
 		case ICE_AQC_CAPS_RXQS:
 			caps->num_rxq = number;
 			caps->rxq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+				  "%s: num Rx queues = %d\n", prefix,
+				  caps->num_rxq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Rx first queue ID = %d\n",
+				  "%s: Rx first queue ID = %d\n", prefix,
 				  caps->rxq_first_id);
 			break;
 		case ICE_AQC_CAPS_TXQS:
 			caps->num_txq = number;
 			caps->txq_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+				  "%s: num Tx queues = %d\n", prefix,
+				  caps->num_txq);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Tx first queue ID = %d\n",
+				  "%s: Tx first queue ID = %d\n", prefix,
 				  caps->txq_first_id);
 			break;
 		case ICE_AQC_CAPS_MSIX:
 			caps->num_msix_vectors = number;
 			caps->msix_vector_first_id = phys_id;
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX vector count = %d\n",
+				  "%s: MSIX vector count = %d\n", prefix,
 				  caps->num_msix_vectors);
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: MSIX first vector index = %d\n",
+				  "%s: MSIX first vector index = %d\n", prefix,
 				  caps->msix_vector_first_id);
 			break;
 		case ICE_AQC_CAPS_MAX_MTU:
 			caps->max_mtu = number;
-			if (dev_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: Dev.MaxMTU = %d\n",
-					  caps->max_mtu);
-			else if (func_p)
-				ice_debug(hw, ICE_DBG_INIT,
-					  "HW caps: func.MaxMTU = %d\n",
-					  caps->max_mtu);
+			ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n",
+				  prefix, caps->max_mtu);
 			break;
 		default:
 			ice_debug(hw, ICE_DBG_INIT,
-				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
-				  cap);
+				  "%s: unknown capability[%d]: 0x%x\n", prefix,
+				  i, cap);
 			break;
 		}
 	}
@@ -1947,36 +2002,37 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
  */
 enum ice_status ice_update_link_info(struct ice_port_info *pi)
 {
-	struct ice_aqc_get_phy_caps_data *pcaps;
-	struct ice_phy_info *phy_info;
+	struct ice_link_status *li;
 	enum ice_status status;
-	struct ice_hw *hw;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
 
-	hw = pi->hw;
-
-	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
-	if (!pcaps)
-		return ICE_ERR_NO_MEMORY;
+	li = &pi->phy.link_info;
 
-	phy_info = &pi->phy;
 	status = ice_aq_get_link_info(pi, true, NULL, NULL);
 	if (status)
-		goto out;
+		return status;
+
+	if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		struct ice_aqc_get_phy_caps_data *pcaps;
+		struct ice_hw *hw;
+
+		hw = pi->hw;
+		pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+				     GFP_KERNEL);
+		if (!pcaps)
+			return ICE_ERR_NO_MEMORY;
 
-	if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
 					     pcaps, NULL);
-		if (status)
-			goto out;
+		if (!status)
+			memcpy(li->module_type, &pcaps->module_type,
+			       sizeof(li->module_type));
 
-		memcpy(phy_info->link_info.module_type, &pcaps->module_type,
-		       sizeof(phy_info->link_info.module_type));
+		devm_kfree(ice_hw_to_dev(hw), pcaps);
 	}
-out:
-	devm_kfree(ice_hw_to_dev(hw), pcaps);
+
 	return status;
 }
 
@@ -2081,6 +2137,74 @@ out:
 }
 
 /**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg)
+{
+	if (!caps || !cfg)
+		return;
+
+	cfg->phy_type_low = caps->phy_type_low;
+	cfg->phy_type_high = caps->phy_type_high;
+	cfg->caps = caps->caps;
+	cfg->low_power_ctrl = caps->low_power_ctrl;
+	cfg->eee_cap = caps->eee_cap;
+	cfg->eeer_value = caps->eeer_value;
+	cfg->link_fec_opt = caps->link_fec_options;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ *
+ * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC
+ * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps
+ * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling.
+ */
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
+{
+	switch (fec) {
+	case ICE_FEC_BASER:
+		/* Clear auto FEC and RS bits, and AND BASE-R ability
+		 * bits and OR request bits.
+		 */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+				     ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+				     ICE_AQC_PHY_FEC_25G_KR_REQ;
+		break;
+	case ICE_FEC_RS:
+		/* Clear auto FEC and BASE-R bits, and AND RS ability
+		 * bits and OR request bits.
+		 */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+				     ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+		break;
+	case ICE_FEC_NONE:
+		/* Clear auto FEC and all FEC option bits. */
+		cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+		break;
+	case ICE_FEC_AUTO:
+		/* AND auto FEC bit, and all caps bits. */
+		cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+		break;
+	}
+}
+
+/**
  * ice_get_link_status - get status of the HW network link
  * @pi: port information structure
  * @link_up: pointer to bool (true/false = linkup/linkdown)
@@ -2169,6 +2293,29 @@ ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
 }
 
 /**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_lb *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_lb;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+	if (ena_lpbk)
+		cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
  * ice_aq_set_port_id_led
  * @pi: pointer to the port information
  * @is_orig_mode: is this LED set to original mode (by the net-list)
@@ -2552,7 +2699,7 @@ do_aq:
 			ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
 				  vmvf_num, hw->adminq.sq_last_status);
 		else
-			ice_debug(hw, ICE_DBG_SCHED, "disable Q %d failed %d\n",
+			ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
 				  le16_to_cpu(qg_list[0].q_id[0]),
 				  hw->adminq.sq_last_status);
 	}
@@ -2924,7 +3071,6 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
 		return ICE_ERR_CFG;
 
-
 	if (!num_queues) {
 		/* if queue is disabled already yet the disable queue command
 		 * has to be sent to complete the VF reset, then call
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index f1ddebf45231..d1f8353fe6bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -9,6 +9,8 @@
 #include "ice_switch.h"
 #include <linux/avf/virtchnl.h>
 
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
+
 void
 ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
 enum ice_status ice_init_hw(struct ice_hw *hw);
@@ -84,7 +86,11 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 enum ice_status
 ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
 	   bool ena_auto_link_update);
-
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec);
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg);
 enum ice_status
 ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
 			   struct ice_sq_cd *cd);
@@ -95,6 +101,9 @@ enum ice_status
 ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
 		      struct ice_sq_cd *cd);
 enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+enum ice_status
 ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 		       struct ice_sq_cd *cd);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index cc8cb5fdcdc1..e91ac4df0242 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -439,7 +439,7 @@ do {									\
 	/* free the buffer info list */					\
 	if ((qi)->ring.cmd_buf)						\
 		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
-	/* free dma head */						\
+	/* free DMA head */						\
 	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
 } while (0)
 
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index e0585394d984..44945c2165d8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -35,7 +35,7 @@ enum ice_ctl_q {
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
 
 struct ice_ctl_q_ring {
-	void *dma_head;			/* Virtual address to dma head */
+	void *dma_head;			/* Virtual address to DMA head */
 	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
 	void *cmd_buf;			/* command buffer memory */
 
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 8bbf48e04a1c..c2002ded65f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -82,12 +82,14 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
  * @hw: pointer to the HW struct
  * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
  *			 False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ *	     reboots
  * @cd: pointer to command details structure or NULL
  *
  * Stop or Shutdown the embedded LLDP Agent (0x0A05)
  */
 enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
 		 struct ice_sq_cd *cd)
 {
 	struct ice_aqc_lldp_stop *cmd;
@@ -100,17 +102,22 @@ ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
 	if (shutdown_lldp_agent)
 		cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
 
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
 
 /**
  * ice_aq_start_lldp
  * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
  * @cd: pointer to command details structure or NULL
  *
  * Start the embedded LLDP Agent on all ports. (0x0A06)
  */
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
 {
 	struct ice_aqc_lldp_start *cmd;
 	struct ice_aq_desc desc;
@@ -121,6 +128,9 @@ enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
 
 	cmd->command = ICE_AQ_LLDP_AGENT_START;
 
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
 
@@ -163,7 +173,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
  *
  * Get the DCBX status from the Firmware
  */
-u8 ice_get_dcbx_status(struct ice_hw *hw)
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
 {
 	u32 reg;
 
@@ -614,7 +624,8 @@ ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
  *
  * Parse DCB configuration from the LLDPDU
  */
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+static enum ice_status
+ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
 {
 	struct ice_lldp_org_tlv *tlv;
 	enum ice_status ret = 0;
@@ -658,13 +669,13 @@ enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
 /**
  * ice_aq_get_dcb_cfg
  * @hw: pointer to the HW struct
- * @mib_type: mib type for the query
+ * @mib_type: MIB type for the query
  * @bridgetype: bridge type for the query (remote)
  * @dcbcfg: store for LLDPDU data
  *
  * Query DCB configuration from the firmware
  */
-static enum ice_status
+enum ice_status
 ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
 		   struct ice_dcbx_cfg *dcbcfg)
 {
@@ -689,13 +700,13 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
 }
 
 /**
- * ice_aq_start_stop_dcbx - Start/Stop DCBx service in FW
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
  * @hw: pointer to the HW struct
- * @start_dcbx_agent: True if DCBx Agent needs to be started
- *		      False if DCBx Agent needs to be stopped
- * @dcbx_agent_status: FW indicates back the DCBx agent status
- *		       True if DCBx Agent is active
- *		       False if DCBx Agent is stopped
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ *		      False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ *		       True if DCBX Agent is active
+ *		       False if DCBX Agent is stopped
  * @cd: pointer to command details structure or NULL
  *
  * Start/Stop the embedded dcbx Agent. In case that this wrapper function
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
index e7d4416e3a66..522e1452abe2 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -120,8 +120,9 @@ struct ice_cee_app_prio {
 	u8 prio_map;
 } __packed;
 
-u8 ice_get_dcbx_status(struct ice_hw *hw);
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg);
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg);
 enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
 enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
 enum ice_status ice_init_dcb(struct ice_hw *hw);
@@ -131,9 +132,10 @@ ice_query_port_ets(struct ice_port_info *pi,
 		   struct ice_sq_cd *cmd_details);
 #ifdef CONFIG_DCB
 enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
 		 struct ice_sq_cd *cd);
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
 enum ice_status
 ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
 		       bool *dcbx_agent_status, struct ice_sq_cd *cd);
@@ -144,6 +146,7 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
 static inline enum ice_status
 ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
 		 bool __always_unused shutdown_lldp_agent,
+		 bool __always_unused persist,
 		 struct ice_sq_cd __always_unused *cd)
 {
 	return 0;
@@ -151,6 +154,7 @@ ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
 
 static inline enum ice_status
 ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+		  bool __always_unused persist,
 		  struct ice_sq_cd __always_unused *cd)
 {
 	return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 3e81af1884fc..fe88b127ca42 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -120,12 +120,14 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
 			tc_map = ICE_DFLT_TRAFFIC_CLASS;
 
 		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
-		if (ret)
+		if (ret) {
 			dev_err(&pf->pdev->dev,
 				"Failed to config TC for VSI index: %d\n",
 				pf->vsi[v]->idx);
-		else
-			ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
 	}
 }
 
@@ -133,8 +135,10 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
  * ice_pf_dcb_cfg - Apply new DCB configuration
  * @pf: pointer to the PF struct
  * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
  */
-static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
+static
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 {
 	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
 	struct ice_aqc_port_ets_elem buf = { 0 };
@@ -163,7 +167,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
 	/* avoid race conditions by holding the lock while disabling and
 	 * re-enabling the VSI
 	 */
-	rtnl_lock();
+	if (!locked)
+		rtnl_lock();
 	ice_pf_dis_all_vsi(pf, true);
 
 	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
@@ -192,7 +197,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
 
 out:
 	ice_pf_ena_all_vsi(pf, true);
-	rtnl_unlock();
+	if (!locked)
+		rtnl_unlock();
 	devm_kfree(&pf->pdev->dev, old_cfg);
 	return ret;
 }
@@ -271,15 +277,16 @@ dcb_error:
 	prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
 	prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
 	memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
-	ice_pf_dcb_cfg(pf, prev_cfg);
+	ice_pf_dcb_cfg(pf, prev_cfg, false);
 	devm_kfree(&pf->pdev->dev, prev_cfg);
 }
 
 /**
  * ice_dcb_init_cfg - set the initial DCB config in SW
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
  */
-static int ice_dcb_init_cfg(struct ice_pf *pf)
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
 {
 	struct ice_dcbx_cfg *newcfg;
 	struct ice_port_info *pi;
@@ -294,7 +301,7 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
 	memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
 
 	dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
-	if (ice_pf_dcb_cfg(pf, newcfg))
+	if (ice_pf_dcb_cfg(pf, newcfg, locked))
 		ret = -EINVAL;
 
 	devm_kfree(&pf->pdev->dev, newcfg);
@@ -304,9 +311,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
 
 /**
  * ice_dcb_sw_default_config - Apply a default DCB config
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: was this function called with RTNL held
  */
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 {
 	struct ice_aqc_port_ets_elem buf = { 0 };
 	struct ice_dcbx_cfg *dcbcfg;
@@ -338,7 +346,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
 	dcbcfg->app[0].priority = 3;
 	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
 
-	ret = ice_pf_dcb_cfg(pf, dcbcfg);
+	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
 	devm_kfree(&pf->pdev->dev, dcbcfg);
 	if (ret)
 		return ret;
@@ -348,9 +356,10 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
 
 /**
  * ice_init_pf_dcb - initialize DCB for a PF
- * @pf: pf to initiialize DCB for
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
  */
-int ice_init_pf_dcb(struct ice_pf *pf)
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 {
 	struct device *dev = &pf->pdev->dev;
 	struct ice_port_info *port_info;
@@ -360,33 +369,10 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 
 	port_info = hw->port_info;
 
-	/* check if device is DCB capable */
-	if (!hw->func_caps.common_cap.dcb) {
-		dev_dbg(dev, "DCB not supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	/* Best effort to put DCBx and LLDP into a good state */
-	port_info->dcbx_status = ice_get_dcbx_status(hw);
-	if (port_info->dcbx_status != ICE_DCBX_STATUS_DONE &&
-	    port_info->dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
-		bool dcbx_status;
-
-		/* Attempt to start LLDP engine. Ignore errors
-		 * as this will error if it is already started
-		 */
-		ice_aq_start_lldp(hw, NULL);
-
-		/* Attempt to start DCBX. Ignore errors as this
-		 * will error if it is already started
-		 */
-		ice_aq_start_stop_dcbx(hw, true, &dcbx_status, NULL);
-	}
-
 	err = ice_init_dcb(hw);
 	if (err) {
-		/* FW LLDP not in usable state, default to SW DCBx/LLDP */
-		dev_info(&pf->pdev->dev, "FW LLDP not in usable state\n");
+		/* FW LLDP is not active, default to SW DCBX/LLDP */
+		dev_info(&pf->pdev->dev, "FW LLDP is not active\n");
 		hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
 		hw->port_info->is_sw_lldp = true;
 	}
@@ -398,15 +384,16 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 	if (port_info->is_sw_lldp) {
 		sw_default = 1;
 		dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n");
+		clear_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
+	} else {
+		set_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
 	}
 
-	if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
-		sw_default = 1;
+	if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED)
 		dev_info(&pf->pdev->dev, "DCBX not started\n");
-	}
 
 	if (sw_default) {
-		err = ice_dcb_sw_dflt_cfg(pf);
+		err = ice_dcb_sw_dflt_cfg(pf, locked);
 		if (err) {
 			dev_err(&pf->pdev->dev,
 				"Failed to set local DCB config %d\n", err);
@@ -425,7 +412,7 @@ int ice_init_pf_dcb(struct ice_pf *pf)
 
 	set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
 
-	err = ice_dcb_init_cfg(pf);
+	err = ice_dcb_init_cfg(pf, locked);
 	if (err)
 		goto dcb_init_err;
 
@@ -515,6 +502,55 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
 }
 
 /**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+			       struct ice_dcbx_cfg *new_cfg)
+{
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prio_table,
+			   &old_cfg->etscfg.prio_table,
+			   sizeof(new_cfg->etscfg.prio_table))) {
+			need_reconfig = true;
+			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
  * ice_dcb_process_lldp_set_mib_change - Process MIB change
  * @pf: ptr to ice_pf
  * @event: pointer to the admin queue receive event
@@ -523,29 +559,95 @@ void
 ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 				    struct ice_rq_event_info *event)
 {
-	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
-		struct ice_dcbx_cfg *dcbcfg, *prev_cfg;
-		int err;
-
-		prev_cfg = &pf->hw.port_info->local_dcbx_cfg;
-		dcbcfg = devm_kmemdup(&pf->pdev->dev, prev_cfg,
-				      sizeof(*dcbcfg), GFP_KERNEL);
-		if (!dcbcfg)
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_aqc_lldp_get_mib *mib;
+	struct ice_dcbx_cfg tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	struct ice_port_info *pi;
+	u8 type;
+	int ret;
+
+	/* Not DCB capable or capability disabled */
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+		dev_dbg(&pf->pdev->dev,
+			"MIB Change Event in HOST mode\n");
+		return;
+	}
+
+	pi = pf->hw.port_info;
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+	/* Ignore if event is not for Nearest Bridge */
+	type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M);
+	dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+	if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+		return;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+	dev_dbg(&pf->pdev->dev,
+		"LLDP event mib type %s\n", type ? "remote" : "local");
+	if (type == ICE_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+					 &pi->remote_dcbx_cfg);
+		if (ret) {
+			dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
 			return;
+		}
+	}
 
-		err = ice_lldp_to_dcb_cfg(event->msg_buf, dcbcfg);
-		if (!err)
-			ice_pf_dcb_cfg(pf, dcbcfg);
+	/* store the old configuration */
+	tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
 
-		devm_kfree(&pf->pdev->dev, dcbcfg);
+	/* Reset the old DCBX configuration data */
+	memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
 
-		/* Get updated DCBx data from firmware */
-		err = ice_get_dcb_cfg(pf->hw.port_info);
-		if (err)
-			dev_err(&pf->pdev->dev,
-				"Failed to get DCB config\n");
-	} else {
+	/* Get updated DCBX data from firmware */
+	ret = ice_get_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+		return;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
 		dev_dbg(&pf->pdev->dev,
-			"MIB Change Event in HOST mode\n");
+			"No change detected in DCBX configuration.\n");
+		return;
+	}
+
+	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+					   &pi->local_dcbx_cfg);
+	if (!need_reconfig)
+		return;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	}
+
+	rtnl_lock();
+	ice_pf_dis_all_vsi(pf, true);
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		rtnl_unlock();
+		return;
+	}
+
+	/* changes in configuration update VSI */
+	ice_pf_dcb_recfg(pf);
+
+	ice_pf_ena_all_vsi(pf, true);
+	rtnl_unlock();
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index ca7b76faa03c..819081053ff5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -14,7 +14,7 @@ void ice_dcb_rebuild(struct ice_pf *pf);
 u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
-int ice_init_pf_dcb(struct ice_pf *pf);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
 void ice_update_dcb_stats(struct ice_pf *pf);
 int
 ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
@@ -40,7 +40,8 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
 	return 1;
 }
 
-static inline int ice_init_pf_dcb(struct ice_pf *pf)
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
 {
 	dev_dbg(&pf->pdev->dev, "DCB not supported\n");
 	return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1341fde8d53f..52083a63dee6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -45,22 +45,40 @@ static int ice_q_stats_len(struct net_device *netdev)
 				 ICE_VSI_STATS_LEN + ice_q_stats_len(n))
 
 static const struct ice_stats ice_gstrings_vsi_stats[] = {
-	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
-	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
 	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
-	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
 	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
-	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
 	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
-	ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
-	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
-	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
 	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
 	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+};
+
+enum ice_ethtool_test_id {
+	ICE_ETH_TEST_REG = 0,
+	ICE_ETH_TEST_EEPROM,
+	ICE_ETH_TEST_INTR,
+	ICE_ETH_TEST_LOOP,
+	ICE_ETH_TEST_LINK,
 };
 
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"EEPROM test    (offline)",
+	"Interrupt test (offline)",
+	"Loopback test  (offline)",
+	"Link test   (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
  * but they aren't. This device is capable of supporting multiple
  * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
@@ -71,45 +89,45 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = {
  * is queried on the base PF netdev.
  */
 static const struct ice_stats ice_gstrings_pf_stats[] = {
-	ICE_PF_STAT("port.tx_bytes", stats.eth.tx_bytes),
-	ICE_PF_STAT("port.rx_bytes", stats.eth.rx_bytes),
-	ICE_PF_STAT("port.tx_unicast", stats.eth.tx_unicast),
-	ICE_PF_STAT("port.rx_unicast", stats.eth.rx_unicast),
-	ICE_PF_STAT("port.tx_multicast", stats.eth.tx_multicast),
-	ICE_PF_STAT("port.rx_multicast", stats.eth.rx_multicast),
-	ICE_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast),
-	ICE_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast),
-	ICE_PF_STAT("port.tx_errors", stats.eth.tx_errors),
-	ICE_PF_STAT("port.tx_size_64", stats.tx_size_64),
-	ICE_PF_STAT("port.rx_size_64", stats.rx_size_64),
-	ICE_PF_STAT("port.tx_size_127", stats.tx_size_127),
-	ICE_PF_STAT("port.rx_size_127", stats.rx_size_127),
-	ICE_PF_STAT("port.tx_size_255", stats.tx_size_255),
-	ICE_PF_STAT("port.rx_size_255", stats.rx_size_255),
-	ICE_PF_STAT("port.tx_size_511", stats.tx_size_511),
-	ICE_PF_STAT("port.rx_size_511", stats.rx_size_511),
-	ICE_PF_STAT("port.tx_size_1023", stats.tx_size_1023),
-	ICE_PF_STAT("port.rx_size_1023", stats.rx_size_1023),
-	ICE_PF_STAT("port.tx_size_1522", stats.tx_size_1522),
-	ICE_PF_STAT("port.rx_size_1522", stats.rx_size_1522),
-	ICE_PF_STAT("port.tx_size_big", stats.tx_size_big),
-	ICE_PF_STAT("port.rx_size_big", stats.rx_size_big),
-	ICE_PF_STAT("port.link_xon_tx", stats.link_xon_tx),
-	ICE_PF_STAT("port.link_xon_rx", stats.link_xon_rx),
-	ICE_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx),
-	ICE_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx),
-	ICE_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down),
-	ICE_PF_STAT("port.rx_undersize", stats.rx_undersize),
-	ICE_PF_STAT("port.rx_fragments", stats.rx_fragments),
-	ICE_PF_STAT("port.rx_oversize", stats.rx_oversize),
-	ICE_PF_STAT("port.rx_jabber", stats.rx_jabber),
-	ICE_PF_STAT("port.rx_csum_bad", hw_csum_rx_error),
-	ICE_PF_STAT("port.rx_length_errors", stats.rx_len_errors),
-	ICE_PF_STAT("port.rx_dropped", stats.eth.rx_discards),
-	ICE_PF_STAT("port.rx_crc_errors", stats.crc_errors),
-	ICE_PF_STAT("port.illegal_bytes", stats.illegal_bytes),
-	ICE_PF_STAT("port.mac_local_faults", stats.mac_local_faults),
-	ICE_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults),
+	ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+	ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+	ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+	ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+	ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+	ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+	ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+	ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+	ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
 };
 
 static const u32 ice_regs_dump_list[] = {
@@ -120,6 +138,9 @@ static const u32 ice_regs_dump_list[] = {
 	QINT_RQCTL(0),
 	PFINT_OICR_ENA,
 	QRX_ITR(0),
+	PF0INT_ITR_0(0),
+	PF0INT_ITR_1(0),
+	PF0INT_ITR_2(0),
 };
 
 struct ice_priv_flag {
@@ -134,7 +155,7 @@ struct ice_priv_flag {
 
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
 	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
-	ICE_PRIV_FLAG("disable-fw-lldp", ICE_FLAG_DISABLE_FW_LLDP),
+	ICE_PRIV_FLAG("enable-fw-lldp", ICE_FLAG_ENABLE_FW_LLDP),
 };
 
 #define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -278,6 +299,571 @@ out:
 	return ret;
 }
 
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+	struct ice_vf *vf = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			return true;
+	return false;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	enum ice_status status;
+	bool link_up = false;
+
+	netdev_info(netdev, "link test\n");
+	status = ice_get_link_status(np->vsi->port_info, &link_up);
+	if (status) {
+		netdev_err(netdev, "link query error, status = %d\n", status);
+		return 1;
+	}
+
+	if (!link_up)
+		return 2;
+
+	return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	netdev_info(netdev, "EEPROM test\n");
+	return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5,
+		0x00000000, 0xFFFFFFFF
+	};
+	u32 val, orig_val;
+	int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+		u32 pattern = patterns[i] & mask;
+
+		wr32(hw, reg, pattern);
+		val = rd32(hw, reg);
+		if (val == pattern)
+			continue;
+		dev_err(&pf->pdev->dev,
+			"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+			, __func__, reg, pattern, val);
+		return 1;
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		dev_err(&pf->pdev->dev,
+			"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+			, __func__, reg, orig_val, val);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_hw *hw = np->vsi->port_info->hw;
+	u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+		hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+	struct ice_diag_reg_test_info {
+		u32 address;
+		u32 mask;
+		u32 elem_num;
+		u32 elem_size;
+	} ice_reg_list[] = {
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+		{GLINT_ITR(1, 0), 0x00000fff, int_elements,
+			GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+		{GLINT_CTL, 0xffff0001, 1, 0}
+	};
+	int i;
+
+	netdev_dbg(netdev, "Register test\n");
+	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+		u32 j;
+
+		for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+			u32 mask = ice_reg_list[i].mask;
+			u32 reg = ice_reg_list[i].address +
+				(j * ice_reg_list[i].elem_size);
+
+			/* bail on failure (non-zero return) */
+			if (ice_reg_pattern_test(hw, reg, mask))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_setup_tx_rings(vsi);
+	if (status)
+		goto err_setup_tx_ring;
+
+	status = ice_vsi_setup_rx_rings(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_cfg(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_start_rx_rings(vsi);
+	if (status)
+		goto err_start_rx_ring;
+
+	return status;
+
+err_start_rx_ring:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+	ice_vsi_free_tx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	status = ice_vsi_stop_rx_rings(vsi);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+	u8 *data;
+
+	if (!pf)
+		return -EINVAL;
+
+	data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Since the ethernet test frame should always be at least
+	 * 64 bytes long, fill some octets in the payload with test data.
+	 */
+	memset(data, 0xFF, size);
+	data[32] = 0xDE;
+	data[42] = 0xAD;
+	data[44] = 0xBE;
+	data[46] = 0xEF;
+
+	*ret_data = data;
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+	/* Validate bytes of a frame under offsets chosen earlier */
+	if (frame[32] == 0xDE &&
+	    frame[42] == 0xAD &&
+	    frame[44] == 0xBE &&
+	    frame[46] == 0xEF &&
+	    frame[48] == 0xFF)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
+{
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+	u64 td_cmd;
+
+	tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return -EINVAL;
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+
+	/* These flags are required for a descriptor to be pushed out */
+	td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+		cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			    (td_cmd << ICE_TXD_QW1_CMD_S) |
+			    ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+			    ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			    ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+	tx_buf->next_to_watch = tx_desc;
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use >= tx_ring->count)
+		tx_ring->next_to_use = 0;
+
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* Wait until the packets get transmitted to the receive queue. */
+	usleep_range(1000, 2000);
+	dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+{
+	struct ice_rx_buf *rx_buf;
+	int valid_frames, i;
+	u8 *received_buf;
+
+	valid_frames = 0;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union ice_32b_rx_flex_desc *rx_desc;
+
+		rx_desc = ICE_RX_DESC(rx_ring, i);
+
+		if (!(rx_desc->wb.status_error0 &
+		    cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+			continue;
+
+		rx_buf = &rx_ring->rx_buf[i];
+		received_buf = page_address(rx_buf->page);
+
+		if (ice_lbtest_check_frame(received_buf))
+			valid_frames++;
+	}
+
+	return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+	struct ice_pf *pf = orig_vsi->back;
+	struct ice_ring *tx_ring, *rx_ring;
+	u8 broadcast[ETH_ALEN], ret = 0;
+	int num_frames, valid_frames;
+	LIST_HEAD(tmp_list);
+	u8 *tx_frame;
+	int i;
+
+	netdev_info(netdev, "loopback test\n");
+
+	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+	if (!test_vsi) {
+		netdev_err(netdev, "Failed to create a VSI for the loopback test");
+		return 1;
+	}
+
+	test_vsi->netdev = netdev;
+	tx_ring = test_vsi->tx_rings[0];
+	rx_ring = test_vsi->rx_rings[0];
+
+	if (ice_lbtest_prepare_rings(test_vsi)) {
+		ret = 2;
+		goto lbtest_vsi_close;
+	}
+
+	if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+		ret = 3;
+		goto lbtest_rings_dis;
+	}
+
+	/* Enable MAC loopback in firmware */
+	if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+		ret = 4;
+		goto lbtest_mac_dis;
+	}
+
+	/* Test VSI needs to receive broadcast packets */
+	eth_broadcast_addr(broadcast);
+	if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
+		ret = 5;
+		goto lbtest_mac_dis;
+	}
+
+	if (ice_add_mac(&pf->hw, &tmp_list)) {
+		ret = 6;
+		goto free_mac_list;
+	}
+
+	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+		ret = 7;
+		goto remove_mac_filters;
+	}
+
+	num_frames = min_t(int, tx_ring->count, 32);
+	for (i = 0; i < num_frames; i++) {
+		if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+			ret = 8;
+			goto lbtest_free_frame;
+		}
+	}
+
+	valid_frames = ice_lbtest_receive_frames(rx_ring);
+	if (!valid_frames)
+		ret = 9;
+	else if (valid_frames != num_frames)
+		ret = 10;
+
+lbtest_free_frame:
+	devm_kfree(&pf->pdev->dev, tx_frame);
+remove_mac_filters:
+	if (ice_remove_mac(&pf->hw, &tmp_list))
+		netdev_err(netdev, "Could not remove MAC filter for the test VSI");
+free_mac_list:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+lbtest_mac_dis:
+	/* Disable MAC loopback after the test is completed. */
+	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+		netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+	if (ice_lbtest_disable_rings(test_vsi))
+		netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+	test_vsi->netdev = NULL;
+	if (ice_vsi_release(test_vsi))
+		netdev_err(netdev, "Failed to remove the test VSI");
+
+	return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	u16 swic_old = pf->sw_int_count;
+
+	netdev_info(netdev, "interrupt test\n");
+
+	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M |
+	     GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_SWINT_TRIG_M);
+
+	usleep_range(1000, 2000);
+	return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+	      u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(netdev, "offline testing starting\n");
+
+		set_bit(__ICE_TESTING, pf->state);
+
+		if (ice_active_vfs(pf)) {
+			dev_warn(&pf->pdev->dev,
+				 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			data[ICE_ETH_TEST_REG] = 1;
+			data[ICE_ETH_TEST_EEPROM] = 1;
+			data[ICE_ETH_TEST_INTR] = 1;
+			data[ICE_ETH_TEST_LOOP] = 1;
+			data[ICE_ETH_TEST_LINK] = 1;
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+			clear_bit(__ICE_TESTING, pf->state);
+			goto skip_ol_tests;
+		}
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			ice_stop(netdev);
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+		data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+		data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+		data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+		if (data[ICE_ETH_TEST_LINK] ||
+		    data[ICE_ETH_TEST_EEPROM] ||
+		    data[ICE_ETH_TEST_LOOP] ||
+		    data[ICE_ETH_TEST_INTR] ||
+		    data[ICE_ETH_TEST_REG])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__ICE_TESTING, pf->state);
+
+		if (if_running) {
+			int status = ice_open(netdev);
+
+			if (status) {
+				dev_err(&pf->pdev->dev,
+					"Could not open device %s, err %d",
+					pf->int_name, status);
+			}
+		}
+	} else {
+		/* Online tests */
+		netdev_info(netdev, "online testing starting\n");
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		if (data[ICE_ETH_TEST_LINK])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[ICE_ETH_TEST_REG] = 0;
+		data[ICE_ETH_TEST_EEPROM] = 0;
+		data[ICE_ETH_TEST_INTR] = 0;
+		data[ICE_ETH_TEST_LOOP] = 0;
+	}
+
+skip_ol_tests:
+	netdev_info(netdev, "testing finished\n");
+}
+
 static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -295,17 +881,17 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 		ice_for_each_alloc_txq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "tx-queue-%u.tx_packets", i);
+				 "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
 		ice_for_each_alloc_rxq(vsi, i) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "rx-queue-%u.rx_packets", i);
+				 "rx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 
@@ -320,21 +906,24 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx-priority-%u-xon", i);
+				 "tx_priority_%u_xon.nic", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx-priority-%u-xoff", i);
+				 "tx_priority_%u_xoff.nic", i);
 			p += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx-priority-%u-xon", i);
+				 "rx_priority_%u_xon.nic", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx-priority-%u-xoff", i);
+				 "rx_priority_%u_xoff.nic", i);
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_TEST:
+		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
 	case ETH_SS_PRIV_FLAGS:
 		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "%s",
@@ -371,6 +960,185 @@ ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
 }
 
 /**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_set_phy_cfg_data config = { 0 };
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_vsi *vsi = np->vsi;
+	u8 sw_cfg_caps, sw_cfg_fec;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	/* Changing the FEC parameters is not supported if not the PF VSI */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Get last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Copy SW configuration returned from PHY caps to PHY config */
+	ice_copy_phy_caps_to_cfg(caps, &config);
+	sw_cfg_caps = caps->caps;
+	sw_cfg_fec = caps->link_fec_options;
+
+	/* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
+	memset(caps, 0, sizeof(*caps));
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
+	config.link_fec_opt = caps->link_fec_options;
+
+	ice_cfg_phy_fec(&config, req_fec);
+
+	/* If FEC mode has changed, then set PHY configuration and enable AN. */
+	if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
+	    (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
+	    config.link_fec_opt != sw_cfg_fec) {
+		if (caps->caps & ICE_AQC_PHY_AN_MODE)
+			config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+		status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
+
+		if (status)
+			err = -EAGAIN;
+	}
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_fec_mode fec;
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec = ICE_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec = ICE_FEC_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec = ICE_FEC_BASER;
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec = ICE_FEC_NONE;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+	link_info = &pi->phy.link_info;
+
+	/* Set FEC mode based on negotiated link info */
+	switch (link_info->fec_info) {
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	}
+
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Set supported/configured FEC modes based on PHY capability */
+	if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if (caps->link_fec_options == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
+	return err;
+}
+
+/**
  * ice_get_priv_flags - report device private flags
  * @netdev: network interface device structure
  *
@@ -433,10 +1201,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 
 	bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
 
-	if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, change_flags)) {
-		if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, pf->flags)) {
+	if (test_bit(ICE_FLAG_ENABLE_FW_LLDP, change_flags)) {
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) {
 			enum ice_status status;
 
+			/* Disable FW LLDP engine */
 			status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
 							    NULL);
 			/* If unregistering for LLDP events fails, this is
@@ -450,7 +1219,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			/* The AQ call to stop the FW LLDP agent will generate
 			 * an error if the agent is already stopped.
 			 */
-			status = ice_aq_stop_lldp(&pf->hw, true, NULL);
+			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
 			if (status)
 				dev_warn(&pf->pdev->dev,
 					 "Fail to stop LLDP agent\n");
@@ -458,9 +1227,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 * will likely not need DCB, so failure to init is
 			 * not a concern of ethtool
 			 */
-			status = ice_init_pf_dcb(pf);
+			status = ice_init_pf_dcb(pf, true);
 			if (status)
 				dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Forward LLDP packets to default VSI so that they
+			 * are passed up the stack
+			 */
+			ice_cfg_sw_lldp(vsi, false, true);
 		} else {
 			enum ice_status status;
 			bool dcbx_agent_status;
@@ -468,12 +1242,12 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			/* AQ command to start FW LLDP agent will return an
 			 * error if the agent is already started
 			 */
-			status = ice_aq_start_lldp(&pf->hw, NULL);
+			status = ice_aq_start_lldp(&pf->hw, true, NULL);
 			if (status)
 				dev_warn(&pf->pdev->dev,
 					 "Fail to start LLDP Agent\n");
 
-			/* AQ command to start FW DCBx agent will fail if
+			/* AQ command to start FW DCBX agent will fail if
 			 * the agent is already started
 			 */
 			status = ice_aq_start_stop_dcbx(&pf->hw, true,
@@ -491,15 +1265,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 * registration/init failed but do not return error
 			 * state to ethtool
 			 */
-			status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
-							    NULL);
-			if (status)
-				dev_dbg(&pf->pdev->dev,
-					"Fail to reg for MIB change\n");
-
-			status = ice_init_pf_dcb(pf);
+			status = ice_init_pf_dcb(pf, true);
 			if (status)
 				dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+
+			/* Remove rule to direct LLDP packets to default VSI.
+			 * The FW LLDP engine will now be consuming them.
+			 */
+			ice_cfg_sw_lldp(vsi, false, false);
 		}
 	}
 	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
@@ -529,6 +1302,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
 		 * not safe.
 		 */
 		return ICE_ALL_STATS_LEN(netdev);
+	case ETH_SS_TEST:
+		return ICE_TEST_LEN;
 	case ETH_SS_PRIV_FLAGS:
 		return ICE_PRIV_FLAG_ARRAY_SIZE;
 	default:
@@ -628,7 +1403,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     100baseT_Full);
 	}
@@ -636,14 +1412,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseKX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseKX_Full);
 	}
@@ -651,14 +1429,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseX_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     2500baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     2500baseT_Full);
 	}
@@ -666,7 +1446,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     2500baseX_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     2500baseX_Full);
 	}
@@ -674,7 +1455,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     5000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     5000baseT_Full);
 	}
@@ -684,28 +1466,32 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseT_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseT_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseKR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseKR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseSR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseSR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseLR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     10000baseLR_Full);
 	}
@@ -717,7 +1503,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseCR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseCR_Full);
 	}
@@ -725,7 +1512,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseSR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseSR_Full);
 	}
@@ -734,14 +1522,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseKR_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     25000baseKR_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseKR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseKR4_Full);
 	}
@@ -750,21 +1540,24 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseCR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseCR4_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseSR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseSR4_Full);
 	}
 	if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     40000baseLR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     40000baseLR4_Full);
 	}
@@ -779,7 +1572,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseCR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseCR2_Full);
 	}
@@ -787,7 +1581,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseKR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseKR2_Full);
 	}
@@ -797,7 +1592,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     50000baseSR2_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     50000baseSR2_Full);
 	}
@@ -814,7 +1610,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseCR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -826,7 +1623,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseSR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -838,7 +1636,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseLR4_ER4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode) {
@@ -851,7 +1650,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	    phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseKR4_Full);
-		if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+		if (!hw_link_info->req_speeds ||
+		    hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
 			need_add_adv_mode = true;
 	}
 	if (need_add_adv_mode)
@@ -1275,6 +2075,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		       struct ethtool_link_ksettings *ks)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
 	struct ice_link_status *hw_link_info;
 	struct ice_vsi *vsi = np->vsi;
 
@@ -1345,6 +2146,40 @@ ice_get_link_ksettings(struct net_device *netdev,
 		break;
 	}
 
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		goto done;
+
+	if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_TOPO_CAP,
+				caps, NULL))
+		netdev_info(netdev, "Get phy capability failed.\n");
+
+	/* Set supported FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+	if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_SW_CFG,
+				caps, NULL))
+		netdev_info(netdev, "Get phy capability failed.\n");
+
+	/* Set advertised FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+done:
+	devm_kfree(&vsi->back->pdev->dev, caps);
 	return 0;
 }
 
@@ -2371,8 +3206,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 
 		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
 			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
-			wr32(&pf->hw, GLINT_RATE(vsi->hw_base_vector +
-						 rc->ring->q_vector->v_idx),
+			wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
 			     ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
 						   pf->hw.intrl_gran));
 		}
@@ -2533,6 +3367,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.get_regs               = ice_get_regs,
 	.get_msglevel           = ice_get_msglevel,
 	.set_msglevel           = ice_set_msglevel,
+	.self_test		= ice_self_test,
 	.get_link		= ethtool_op_get_link,
 	.get_eeprom_len		= ice_get_eeprom_len,
 	.get_eeprom		= ice_get_eeprom,
@@ -2557,6 +3392,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_per_queue_coalesce = ice_get_per_q_coalesce,
 	.set_per_queue_coalesce = ice_set_per_q_coalesce,
+	.get_fecparam		= ice_get_fecparam,
+	.set_fecparam		= ice_set_fecparam,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index ec25f26069b0..6c5ce05742b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -6,6 +6,9 @@
 #ifndef _ICE_HW_AUTOGEN_H_
 #define _ICE_HW_AUTOGEN_H_
 
+#define PF0INT_ITR_0(_i)			(0x03000004 + ((_i) * 4096))
+#define PF0INT_ITR_1(_i)			(0x03000008 + ((_i) * 4096))
+#define PF0INT_ITR_2(_i)			(0x0300000C + ((_i) * 4096))
 #define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD_HEAD_S			0
@@ -155,6 +158,7 @@
 #define PFINT_OICR_HMC_ERR_M			BIT(26)
 #define PFINT_OICR_PE_CRITERR_M			BIT(28)
 #define PFINT_OICR_VFLR_M			BIT(29)
+#define PFINT_OICR_SWINT_M			BIT(31)
 #define PFINT_OICR_CTL				0x0016CA80
 #define PFINT_OICR_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
 #define PFINT_OICR_CTL_ITR_INDX_S		11
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index fbf1eba0cc2a..35c5ff910afa 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -137,6 +137,8 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
 	 * for PF or EMP this field should be set to zero
 	 */
 	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
 	case ICE_VSI_PF:
 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
 		break;
@@ -251,6 +253,10 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->rx_rings)
 		goto err_rxrings;
 
+	/* There is no need to allocate q_vectors for a loopback VSI. */
+	if (vsi->type == ICE_VSI_LB)
+		return 0;
+
 	/* allocate memory for q_vector pointers */
 	vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
 				      sizeof(*vsi->q_vectors), GFP_KERNEL);
@@ -275,6 +281,8 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
 {
 	switch (vsi->type) {
 	case ICE_VSI_PF:
+		/* fall through */
+	case ICE_VSI_LB:
 		vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
 		vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
 		break;
@@ -313,10 +321,14 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		vsi->alloc_rxq = vf->num_vf_qs;
 		/* pf->num_vf_msix includes (VF miscellaneous vector +
 		 * data queue interrupts). Since vsi->num_q_vectors is number
-		 * of queues vectors, subtract 1 from the original vector
-		 * count
+		 * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+		 * original vector count
 		 */
-		vsi->num_q_vectors = pf->num_vf_msix - 1;
+		vsi->num_q_vectors = pf->num_vf_msix - ICE_NONQ_VECS_VF;
+		break;
+	case ICE_VSI_LB:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
 		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
@@ -516,6 +528,10 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 		if (ice_vsi_alloc_arrays(vsi))
 			goto err_rings;
 		break;
+	case ICE_VSI_LB:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
 		goto unlock_pf;
@@ -732,6 +748,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 				      BIT(cap->rss_table_entry_width));
 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
 		break;
+	case ICE_VSI_LB:
+		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
 			 vsi->type);
@@ -924,6 +942,9 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
 		break;
+	case ICE_VSI_LB:
+		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+		return;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
 		return;
@@ -955,6 +976,8 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 
 	ctxt->info = vsi->info;
 	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
 	case ICE_VSI_PF:
 		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
 		break;
@@ -1145,61 +1168,32 @@ err_out:
 static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	int num_q_vectors = 0;
+	u16 num_q_vectors;
+
+	/* SRIOV doesn't grab irq_tracker entries for each VSI */
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
 
-	if (vsi->sw_base_vector || vsi->hw_base_vector) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero HW base vector %d or SW base vector %d\n",
-			vsi->vsi_num, vsi->hw_base_vector, vsi->sw_base_vector);
+	if (vsi->base_vector) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
 		return -EEXIST;
 	}
 
 	if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		return -ENOENT;
 
-	switch (vsi->type) {
-	case ICE_VSI_PF:
-		num_q_vectors = vsi->num_q_vectors;
-		/* reserve slots from OS requested IRQs */
-		vsi->sw_base_vector = ice_get_res(pf, pf->sw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		if (vsi->sw_base_vector < 0) {
-			dev_err(&pf->pdev->dev,
-				"Failed to get tracking for %d SW vectors for VSI %d, err=%d\n",
-				num_q_vectors, vsi->vsi_num,
-				vsi->sw_base_vector);
-			return -ENOENT;
-		}
-		pf->num_avail_sw_msix -= num_q_vectors;
-
-		/* reserve slots from HW interrupts */
-		vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		break;
-	case ICE_VSI_VF:
-		/* take VF misc vector and data vectors into account */
-		num_q_vectors = pf->num_vf_msix;
-		/* For VF VSI, reserve slots only from HW interrupts */
-		vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
-						  num_q_vectors, vsi->idx);
-		break;
-	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
-		break;
-	}
-
-	if (vsi->hw_base_vector < 0) {
+	num_q_vectors = vsi->num_q_vectors;
+	/* reserve slots from OS requested IRQs */
+	vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+				       vsi->idx);
+	if (vsi->base_vector < 0) {
 		dev_err(&pf->pdev->dev,
-			"Failed to get tracking for %d HW vectors for VSI %d, err=%d\n",
-			num_q_vectors, vsi->vsi_num, vsi->hw_base_vector);
-		if (vsi->type != ICE_VSI_VF) {
-			ice_free_res(pf->sw_irq_tracker,
-				     vsi->sw_base_vector, vsi->idx);
-			pf->num_avail_sw_msix += num_q_vectors;
-		}
+			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			num_q_vectors, vsi->vsi_num, vsi->base_vector);
 		return -ENOENT;
 	}
-
-	pf->num_avail_hw_msix -= num_q_vectors;
+	pf->num_avail_sw_msix -= num_q_vectors;
 
 	return 0;
 }
@@ -1842,8 +1836,73 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 }
 
 /**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
  * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
  * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
  */
 void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 {
@@ -1873,43 +1932,17 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 		 * tracked for this PF.
 		 */
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
-			int itr_idx = (q_vector->tx.itr_idx <<
-				       QINT_TQCTL_ITR_INDX_S) &
-				QINT_TQCTL_ITR_INDX_M;
-			u32 val;
-
-			if (vsi->type == ICE_VSI_VF)
-				val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-				      (((i + 1) << QINT_TQCTL_MSIX_INDX_S) &
-				       QINT_TQCTL_MSIX_INDX_M);
-			else
-				val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-				      ((reg_idx << QINT_TQCTL_MSIX_INDX_S) &
-				       QINT_TQCTL_MSIX_INDX_M);
-			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+			ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+					      q_vector->tx.itr_idx);
 			txq++;
 		}
 
 		for (q = 0; q < q_vector->num_ring_rx; q++) {
-			int itr_idx = (q_vector->rx.itr_idx <<
-				       QINT_RQCTL_ITR_INDX_S) &
-				QINT_RQCTL_ITR_INDX_M;
-			u32 val;
-
-			if (vsi->type == ICE_VSI_VF)
-				val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-					(((i + 1) << QINT_RQCTL_MSIX_INDX_S) &
-					 QINT_RQCTL_MSIX_INDX_M);
-			else
-				val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-					((reg_idx << QINT_RQCTL_MSIX_INDX_S) &
-					 QINT_RQCTL_MSIX_INDX_M);
-			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+			ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+					      q_vector->rx.itr_idx);
 			rxq++;
 		}
 	}
-
-	ice_flush(hw);
 }
 
 /**
@@ -2024,6 +2057,19 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
  * ice_vsi_stop_tx_rings - Disable Tx rings
  * @vsi: the VSI being configured
  * @rst_src: reset source
@@ -2070,8 +2116,9 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			break;
 
 		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			if (!rings || !rings[q_idx] ||
-			    !rings[q_idx]->q_vector) {
+			struct ice_q_vector *q_vector;
+
+			if (!rings || !rings[q_idx]) {
 				err = -EINVAL;
 				goto err_out;
 			}
@@ -2091,9 +2138,10 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			/* trigger a software interrupt for the vector
 			 * associated to the queue to schedule NAPI handler
 			 */
-			wr32(hw, GLINT_DYN_CTL(rings[i]->q_vector->reg_idx),
-			     GLINT_DYN_CTL_SWINT_TRIG_M |
-			     GLINT_DYN_CTL_INTENA_MSK_M);
+			q_vector = rings[i]->q_vector;
+			if (q_vector)
+				ice_trigger_sw_intr(hw, q_vector);
+
 			q_idx++;
 		}
 		status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc,
@@ -2234,7 +2282,14 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
 			goto clear_reg_idx;
 		}
 
-		q_vector->reg_idx = q_vector->v_idx + vsi->hw_base_vector;
+		if (vsi->type == ICE_VSI_VF) {
+			struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+			q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+		} else {
+			q_vector->reg_idx =
+				q_vector->v_idx + vsi->base_vector;
+		}
 	}
 
 	return 0;
@@ -2290,6 +2345,56 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
 	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
 }
 
+#define ICE_ETH_P_LLDP	0x88CC
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	list->fltr_info.vsi_handle = vsi->idx;
+	list->fltr_info.l_data.ethertype_mac.ethertype = ICE_ETH_P_LLDP;
+
+	if (tx) {
+		list->fltr_info.fltr_act = ICE_DROP_PACKET;
+		list->fltr_info.flag = ICE_FLTR_TX;
+		list->fltr_info.src_id = ICE_SRC_ID_VSI;
+	} else {
+		list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		list->fltr_info.flag = ICE_FLTR_RX;
+		list->fltr_info.src_id = ICE_SRC_ID_LPORT;
+	}
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (create)
+		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+	else
+		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+
+	if (status)
+		dev_err(&pf->pdev->dev,
+			"Fail %s %s LLDP rule on VSI %i error: %d\n",
+			create ? "adding" : "removing", tx ? "TX" : "RX",
+			vsi->vsi_num, status);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
 /**
  * ice_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
@@ -2310,6 +2415,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct device *dev = &pf->pdev->dev;
+	enum ice_status status;
 	struct ice_vsi *vsi;
 	int ret, i;
 
@@ -2389,23 +2495,24 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		if (ret)
 			goto unroll_alloc_q_vector;
 
-		/* Setup Vector base only during VF init phase or when VF asks
-		 * for more vectors than assigned number. In all other cases,
-		 * assign hw_base_vector to the value given earlier.
-		 */
-		if (test_bit(ICE_VF_STATE_CFG_INTR, pf->vf[vf_id].vf_states)) {
-			ret = ice_vsi_setup_vector_base(vsi);
-			if (ret)
-				goto unroll_vector_base;
-		} else {
-			vsi->hw_base_vector = pf->vf[vf_id].first_vector_idx;
-		}
 		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
 		if (ret)
 			goto unroll_vector_base;
 
 		pf->q_left_tx -= vsi->alloc_txq;
 		pf->q_left_rx -= vsi->alloc_rxq;
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_LB:
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vsi_init;
 		break;
 	default:
 		/* clean up the resources and exit */
@@ -2416,12 +2523,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	for (i = 0; i < vsi->tc_cfg.numtc; i++)
 		max_txqs[i] = pf->num_lan_tx;
 
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
-			      max_txqs);
-	if (ret) {
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
 		dev_err(&pf->pdev->dev,
 			"VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, ret);
+			vsi->vsi_num, status);
 		goto unroll_vector_base;
 	}
 
@@ -2430,19 +2537,28 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
 	 * The rule is added once for PF VSI in order to create appropriate
 	 * recipe, since VSI/VSI list is ignored with drop action...
+	 * Also add rules to handle LLDP Tx and Rx packets.  Tx LLDP packets
+	 * need to be dropped so that VFs cannot send LLDP packets to reconfig
+	 * DCB settings in the HW.  Also, if the FW DCBX engine is not running
+	 * then Rx LLDP packets need to be redirected up the stack.
 	 */
-	if (vsi->type == ICE_VSI_PF)
+	if (vsi->type == ICE_VSI_PF) {
 		ice_vsi_add_rem_eth_mac(vsi, true);
 
+		/* Tx LLDP packets */
+		ice_cfg_sw_lldp(vsi, true, true);
+
+		/* Rx LLDP packets */
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+			ice_cfg_sw_lldp(vsi, false, true);
+	}
+
 	return vsi;
 
 unroll_vector_base:
 	/* reclaim SW interrupts back to the common pool */
-	ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+	ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 	pf->num_avail_sw_msix += vsi->num_q_vectors;
-	/* reclaim HW interrupt back to the common pool */
-	ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
-	pf->num_avail_hw_msix += vsi->num_q_vectors;
 unroll_alloc_q_vector:
 	ice_vsi_free_q_vectors(vsi);
 unroll_vsi_init:
@@ -2463,17 +2579,17 @@ unroll_get_qs:
 static void ice_vsi_release_msix(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	u16 vector = vsi->hw_base_vector;
 	struct ice_hw *hw = &pf->hw;
 	u32 txq = 0;
 	u32 rxq = 0;
 	int i, q;
 
-	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+	for (i = 0; i < vsi->num_q_vectors; i++) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
 
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, vector), 0);
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, vector), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
 			txq++;
@@ -2495,7 +2611,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 void ice_vsi_free_irq(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
 		int i;
@@ -2591,11 +2707,11 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
 	int count = 0;
 	int i;
 
-	if (!res || index >= res->num_entries)
+	if (!res || index >= res->end)
 		return -EINVAL;
 
 	id |= ICE_RES_VALID_BIT;
-	for (i = index; i < res->num_entries && res->list[i] == id; i++) {
+	for (i = index; i < res->end && res->list[i] == id; i++) {
 		res->list[i] = 0;
 		count++;
 	}
@@ -2613,10 +2729,9 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
  */
 static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 {
-	int start = res->search_hint;
-	int end = start;
+	int start = 0, end = 0;
 
-	if ((start + needed) > res->num_entries)
+	if (needed > res->end)
 		return -ENOMEM;
 
 	id |= ICE_RES_VALID_BIT;
@@ -2625,7 +2740,7 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 		/* skip already allocated entries */
 		if (res->list[end++] & ICE_RES_VALID_BIT) {
 			start = end;
-			if ((start + needed) > res->num_entries)
+			if ((start + needed) > res->end)
 				break;
 		}
 
@@ -2636,13 +2751,9 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 			while (i != end)
 				res->list[i++] = id;
 
-			if (end == res->num_entries)
-				end = 0;
-
-			res->search_hint = end;
 			return start;
 		}
-	} while (1);
+	} while (end < res->end);
 
 	return -ENOMEM;
 }
@@ -2654,16 +2765,11 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
  * @needed: size of the block needed
  * @id: identifier to track owner
  *
- * Returns the base item index of the block, or -ENOMEM for error
- * The search_hint trick and lack of advanced fit-finding only works
- * because we're highly likely to have all the same sized requests.
- * Linear search time and any fragmentation should be minimal.
+ * Returns the base item index of the block, or negative for error
  */
 int
 ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
 {
-	int ret;
-
 	if (!res || !pf)
 		return -EINVAL;
 
@@ -2674,16 +2780,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
 		return -EINVAL;
 	}
 
-	/* search based on search_hint */
-	ret = ice_search_res(res, needed, id);
-
-	if (ret < 0) {
-		/* previous search failed. Reset search hint and try again */
-		res->search_hint = 0;
-		ret = ice_search_res(res, needed, id);
-	}
-
-	return ret;
+	return ice_search_res(res, needed, id);
 }
 
 /**
@@ -2692,7 +2789,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
  */
 void ice_vsi_dis_irq(struct ice_vsi *vsi)
 {
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	u32 val;
@@ -2738,6 +2835,21 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
  * ice_vsi_release - Delete a VSI and free its resources
  * @vsi: the VSI being removed
  *
@@ -2745,60 +2857,61 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
  */
 int ice_vsi_release(struct ice_vsi *vsi)
 {
-	struct ice_vf *vf = NULL;
 	struct ice_pf *pf;
 
 	if (!vsi->back)
 		return -ENODEV;
 	pf = vsi->back;
 
-	if (vsi->type == ICE_VSI_VF)
-		vf = &pf->vf[vsi->vf_id];
-	/* do not unregister and free netdevs while driver is in the reset
-	 * recovery pending state. Since reset/rebuild happens through PF
-	 * service task workqueue, its not a good idea to unregister netdev
-	 * that is associated to the PF that is running the work queue items
-	 * currently. This is done to avoid check_flush_dependency() warning
-	 * on this wq
+	/* do not unregister while driver is in the reset recovery pending
+	 * state. Since reset/rebuild happens through PF service task workqueue,
+	 * it's not a good idea to unregister netdev that is associated to the
+	 * PF that is running the work queue items currently. This is done to
+	 * avoid check_flush_dependency() warning on this wq
 	 */
-	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
-		ice_napi_del(vsi);
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state))
 		unregister_netdev(vsi->netdev);
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
 
 	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
 		ice_rss_clean(vsi);
 
 	/* Disable VSI and free resources */
-	ice_vsi_dis_irq(vsi);
+	if (vsi->type != ICE_VSI_LB)
+		ice_vsi_dis_irq(vsi);
 	ice_vsi_close(vsi);
 
-	/* reclaim interrupt vectors back to PF */
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
 	if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
-		ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
-		/* reclaim HW interrupts back to the common pool */
-		ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
-		pf->num_avail_hw_msix += vsi->num_q_vectors;
-	} else if (test_bit(ICE_VF_STATE_CFG_INTR, vf->vf_states)) {
-		/* Reclaim VF resources back only while freeing all VFs or
-		 * vector reassignment is requested
-		 */
-		ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
-			     vsi->idx);
-		pf->num_avail_hw_msix += pf->num_vf_msix;
 	}
 
-	if (vsi->type == ICE_VSI_PF)
+	if (vsi->type == ICE_VSI_PF) {
 		ice_vsi_add_rem_eth_mac(vsi, false);
+		ice_cfg_sw_lldp(vsi, true, false);
+		/* The Rx rule will only exist to remove if the LLDP FW
+		 * engine is currently stopped
+		 */
+		if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+			ice_cfg_sw_lldp(vsi, false, false);
+	}
 
 	ice_remove_vsi_fltr(&pf->hw, vsi->idx);
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
+
+	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
+	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
 	ice_vsi_clear_rings(vsi);
 
 	ice_vsi_put_qs(vsi);
@@ -2825,6 +2938,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct ice_vf *vf = NULL;
+	enum ice_status status;
 	struct ice_pf *pf;
 	int ret, i;
 
@@ -2838,24 +2952,17 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_free_q_vectors(vsi);
 
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
 	if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
-		ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
-		vsi->sw_base_vector = 0;
-		/* reclaim HW interrupts back to the common pool */
-		ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector,
-			     vsi->idx);
-		pf->num_avail_hw_msix += vsi->num_q_vectors;
-	} else {
-		/* Reclaim VF resources back to the common pool for reset and
-		 * and rebuild, with vector reassignment
-		 */
-		ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
-			     vsi->idx);
-		pf->num_avail_hw_msix += pf->num_vf_msix;
+		vsi->base_vector = 0;
 	}
-	vsi->hw_base_vector = 0;
 
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
@@ -2881,10 +2988,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		if (ret)
 			goto err_rings;
 
-		ret = ice_vsi_setup_vector_base(vsi);
-		if (ret)
-			goto err_vectors;
-
 		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
 		if (ret)
 			goto err_vectors;
@@ -2929,12 +3032,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	for (i = 0; i < vsi->tc_cfg.numtc; i++)
 		max_txqs[i] = pf->num_lan_tx;
 
-	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
-			      max_txqs);
-	if (ret) {
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
 		dev_err(&pf->pdev->dev,
 			"VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, ret);
+			vsi->vsi_num, status);
 		goto err_vectors;
 	}
 	return 0;
@@ -2956,7 +3059,7 @@ err_vsi:
 
 /**
  * ice_is_reset_in_progress - check for a reset in progress
- * @state: pf state field
+ * @state: PF state field
  */
 bool ice_is_reset_in_progress(unsigned long *state)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index a91d3553cc89..6e43ef03bfc3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -19,6 +19,14 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+#endif /* CONFIG_PCI_IOV */
+
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -37,6 +45,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
 void ice_vsi_delete(struct ice_vsi *vsi);
 
 int ice_vsi_clear(struct ice_vsi *vsi);
@@ -49,6 +59,8 @@ struct ice_vsi *
 ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	      enum ice_vsi_type type, u16 vf_id);
 
+void ice_napi_del(struct ice_vsi *vsi);
+
 int ice_vsi_release(struct ice_vsi *vsi);
 
 void ice_vsi_close(struct ice_vsi *vsi);
@@ -64,6 +76,8 @@ bool ice_is_reset_in_progress(unsigned long *state);
 
 void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
 
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
 #ifdef CONFIG_DCB
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7843abf4d44d..28ec0d57941d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -61,9 +61,10 @@ static u32 ice_get_tx_pending(struct ice_ring *ring)
 static void ice_check_for_hang_subtask(struct ice_pf *pf)
 {
 	struct ice_vsi *vsi = NULL;
+	struct ice_hw *hw;
 	unsigned int i;
-	u32 v, v_idx;
 	int packets;
+	u32 v;
 
 	ice_for_each_vsi(pf, v)
 		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
@@ -77,12 +78,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
 		return;
 
+	hw = &vsi->back->hw;
+
 	for (i = 0; i < vsi->num_txq; i++) {
 		struct ice_ring *tx_ring = vsi->tx_rings[i];
 
 		if (tx_ring && tx_ring->desc) {
-			int itr = ICE_ITR_NONE;
-
 			/* If packet counter has not changed the queue is
 			 * likely stalled, so force an interrupt for this
 			 * queue.
@@ -93,12 +94,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 			packets = tx_ring->stats.pkts & INT_MAX;
 			if (tx_ring->tx_stats.prev_pkt == packets) {
 				/* Trigger sw interrupt to revive the queue */
-				v_idx = tx_ring->q_vector->v_idx;
-				wr32(&vsi->back->hw,
-				     GLINT_DYN_CTL(vsi->hw_base_vector + v_idx),
-				     (itr << GLINT_DYN_CTL_ITR_INDX_S) |
-				     GLINT_DYN_CTL_SWINT_TRIG_M |
-				     GLINT_DYN_CTL_INTENA_MSK_M);
+				ice_trigger_sw_intr(hw, tx_ring->q_vector);
 				continue;
 			}
 
@@ -113,6 +109,67 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 }
 
 /**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+	LIST_HEAD(tmp_add_list);
+	u8 broadcast[ETH_ALEN];
+	struct ice_vsi *vsi;
+	int status;
+
+	vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF);
+	if (!vsi)
+		return -EINVAL;
+
+	/* To add a MAC filter, first add the MAC to a list and then
+	 * pass the list to ice_add_mac.
+	 */
+
+	 /* Add a unicast MAC filter so the VSI can get its packets */
+	status = ice_add_mac_to_list(vsi, &tmp_add_list,
+				     vsi->port_info->mac.perm_addr);
+	if (status)
+		goto unregister;
+
+	/* VSI needs to receive broadcast traffic, so add the broadcast
+	 * MAC address to the list as well.
+	 */
+	eth_broadcast_addr(broadcast);
+	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+	if (status)
+		goto free_mac_list;
+
+	/* Program MAC filters for entries in tmp_add_list */
+	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	if (status)
+		status = -ENOMEM;
+
+free_mac_list:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+
+unregister:
+	/* We aren't useful with no MAC filters, so unregister if we
+	 * had an error
+	 */
+	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
+		dev_err(&pf->pdev->dev,
+			"Could not add MAC filters error %d. Unregistering device\n",
+			status);
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	return status;
+}
+
+/**
  * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
  * @netdev: the net device on which the sync is happening
  * @addr: MAC address to sync
@@ -567,7 +624,11 @@ static void ice_reset_subtask(struct ice_pf *pf)
  */
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 {
+	struct ice_aqc_get_phy_caps_data *caps;
+	enum ice_status status;
+	const char *fec_req;
 	const char *speed;
+	const char *fec;
 	const char *fc;
 
 	if (!vsi)
@@ -584,6 +645,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 	}
 
 	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		speed = "100 G";
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		speed = "50 G";
+		break;
 	case ICE_AQ_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
@@ -615,13 +682,13 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 
 	switch (vsi->port_info->fc.current_mode) {
 	case ICE_FC_FULL:
-		fc = "RX/TX";
+		fc = "Rx/Tx";
 		break;
 	case ICE_FC_TX_PAUSE:
-		fc = "TX";
+		fc = "Tx";
 		break;
 	case ICE_FC_RX_PAUSE:
-		fc = "RX";
+		fc = "Rx";
 		break;
 	case ICE_FC_NONE:
 		fc = "None";
@@ -631,8 +698,47 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 		break;
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
-		    speed, fc);
+	/* Get FEC mode based on negotiated link info */
+	switch (vsi->port_info->phy.link_info.fec_info) {
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+		/* fall through */
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fec = "RS-FEC";
+		break;
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fec = "FC-FEC/BASE-R";
+		break;
+	default:
+		fec = "NONE";
+		break;
+	}
+
+	/* Get FEC mode requested based on PHY caps last SW configuration */
+	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		fec_req = "Unknown";
+		goto done;
+	}
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+	if (status)
+		netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		fec_req = "RS-FEC";
+	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fec_req = "FC-FEC/BASE-R";
+	else
+		fec_req = "NONE";
+
+	devm_kfree(&vsi->back->pdev->dev, caps);
+
+done:
+	netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n",
+		    speed, fec_req, fec, fc);
 }
 
 /**
@@ -664,7 +770,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
 
 /**
  * ice_link_event - process the link event
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
  * @pi: port_info for the port that the link event is associated with
  * @link_up: true if the physical link is up and false if it is down
  * @link_speed: current link speed received from the link event
@@ -774,7 +880,7 @@ static int ice_init_link_events(struct ice_port_info *pi)
 
 /**
  * ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
  * @event: event structure containing link status info
  */
 static int
@@ -1161,16 +1267,16 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		}
 	}
 
-	/* see if one of the VFs needs to be reset */
-	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+	/* check to see if one of the VFs caused the MDD */
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
 		struct ice_vf *vf = &pf->vf[i];
 
-		mdd_detected = false;
+		bool vf_mdd_detected = false;
 
 		reg = rd32(hw, VP_MDET_TX_PQM(i));
 		if (reg & VP_MDET_TX_PQM_VALID_M) {
 			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1178,7 +1284,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_TX_TCLAN(i));
 		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
 			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1186,7 +1292,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_TX_TDPU(i));
 		if (reg & VP_MDET_TX_TDPU_VALID_M) {
 			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
@@ -1194,19 +1300,18 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, VP_MDET_RX(i));
 		if (reg & VP_MDET_RX_VALID_M) {
 			wr32(hw, VP_MDET_RX(i), 0xFFFF);
-			mdd_detected = true;
+			vf_mdd_detected = true;
 			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
 				 i);
 		}
 
-		if (mdd_detected) {
+		if (vf_mdd_detected) {
 			vf->num_mdd_events++;
-			dev_info(&pf->pdev->dev,
-				 "Use PF Control I/F to re-enable the VF\n");
-			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+			if (vf->num_mdd_events > 1)
+				dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n",
+					 i, vf->num_mdd_events);
 		}
 	}
-
 }
 
 /**
@@ -1327,7 +1432,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 {
 	int q_vectors = vsi->num_q_vectors;
 	struct ice_pf *pf = vsi->back;
-	int base = vsi->sw_base_vector;
+	int base = vsi->base_vector;
 	int rx_int_idx = 0;
 	int tx_int_idx = 0;
 	int vector, err;
@@ -1408,7 +1513,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
 	wr32(hw, PFINT_OICR_ENA, val);
 
 	/* SW_ITR_IDX = 0, but don't change INTENA */
-	wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
 	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
 }
 
@@ -1430,6 +1535,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	oicr = rd32(hw, PFINT_OICR);
 	ena_mask = rd32(hw, PFINT_OICR_ENA);
 
+	if (oicr & PFINT_OICR_SWINT_M) {
+		ena_mask &= ~PFINT_OICR_SWINT_M;
+		pf->sw_int_count++;
+	}
+
 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
 		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
 		set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
@@ -1556,15 +1666,13 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
 	ice_flush(hw);
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
-		synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector);
+		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
 		devm_free_irq(&pf->pdev->dev,
-			      pf->msix_entries[pf->sw_oicr_idx].vector, pf);
+			      pf->msix_entries[pf->oicr_idx].vector, pf);
 	}
 
 	pf->num_avail_sw_msix += 1;
-	ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID);
-	pf->num_avail_hw_msix += 1;
-	ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID);
+	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
 }
 
 /**
@@ -1618,43 +1726,31 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 	if (ice_is_reset_in_progress(pf->state))
 		goto skip_req_irq;
 
-	/* reserve one vector in sw_irq_tracker for misc interrupts */
-	oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 	if (oicr_idx < 0)
 		return oicr_idx;
 
 	pf->num_avail_sw_msix -= 1;
-	pf->sw_oicr_idx = oicr_idx;
-
-	/* reserve one vector in hw_irq_tracker for misc interrupts */
-	oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-	if (oicr_idx < 0) {
-		ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-		pf->num_avail_sw_msix += 1;
-		return oicr_idx;
-	}
-	pf->num_avail_hw_msix -= 1;
-	pf->hw_oicr_idx = oicr_idx;
+	pf->oicr_idx = oicr_idx;
 
 	err = devm_request_irq(&pf->pdev->dev,
-			       pf->msix_entries[pf->sw_oicr_idx].vector,
+			       pf->msix_entries[pf->oicr_idx].vector,
 			       ice_misc_intr, 0, pf->int_name, pf);
 	if (err) {
 		dev_err(&pf->pdev->dev,
 			"devm_request_irq for %s failed: %d\n",
 			pf->int_name, err);
-		ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 		pf->num_avail_sw_msix += 1;
-		ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
-		pf->num_avail_hw_msix += 1;
 		return err;
 	}
 
 skip_req_irq:
 	ice_ena_misc_vector(pf);
 
-	ice_ena_ctrlq_interrupts(hw, pf->hw_oicr_idx);
-	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
+	ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
 	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
 
 	ice_flush(hw);
@@ -1664,21 +1760,6 @@ skip_req_irq:
 }
 
 /**
- * ice_napi_del - Remove NAPI handler for the VSI
- * @vsi: VSI for which NAPI handler is to be removed
- */
-void ice_napi_del(struct ice_vsi *vsi)
-{
-	int v_idx;
-
-	if (!vsi->netdev)
-		return;
-
-	ice_for_each_q_vector(vsi, v_idx)
-		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
-}
-
-/**
  * ice_napi_add - register NAPI handler for the VSI
  * @vsi: VSI for which NAPI handler is to be registered
  *
@@ -1803,8 +1884,8 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
  * @pf: board private structure
  * @pi: pointer to the port_info instance
  *
- * Returns pointer to the successfully allocated VSI sw struct on success,
- * otherwise returns NULL on failure.
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
  */
 static struct ice_vsi *
 ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
@@ -1813,6 +1894,20 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 }
 
 /**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
+}
+
+/**
  * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
  * @netdev: network interface to be adjusted
  * @proto: unused protocol
@@ -1900,8 +1995,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
  */
 static int ice_setup_pf_sw(struct ice_pf *pf)
 {
-	LIST_HEAD(tmp_add_list);
-	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	int status = 0;
 
@@ -1926,38 +2019,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 	 */
 	ice_napi_add(vsi);
 
-	/* To add a MAC filter, first add the MAC to a list and then
-	 * pass the list to ice_add_mac.
-	 */
-
-	 /* Add a unicast MAC filter so the VSI can get its packets */
-	status = ice_add_mac_to_list(vsi, &tmp_add_list,
-				     vsi->port_info->mac.perm_addr);
+	status = ice_init_mac_fltr(pf);
 	if (status)
 		goto unroll_napi_add;
 
-	/* VSI needs to receive broadcast traffic, so add the broadcast
-	 * MAC address to the list as well.
-	 */
-	eth_broadcast_addr(broadcast);
-	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
-	if (status)
-		goto free_mac_list;
-
-	/* program MAC filters for entries in tmp_add_list */
-	status = ice_add_mac(&pf->hw, &tmp_add_list);
-	if (status) {
-		dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
-		status = -ENOMEM;
-		goto free_mac_list;
-	}
-
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
 	return status;
 
-free_mac_list:
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-
 unroll_napi_add:
 	if (vsi) {
 		ice_napi_del(vsi);
@@ -2149,14 +2216,9 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		ice_dis_msix(pf);
 
-	if (pf->sw_irq_tracker) {
-		devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker);
-		pf->sw_irq_tracker = NULL;
-	}
-
-	if (pf->hw_irq_tracker) {
-		devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker);
-		pf->hw_irq_tracker = NULL;
+	if (pf->irq_tracker) {
+		devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+		pf->irq_tracker = NULL;
 	}
 }
 
@@ -2166,7 +2228,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
  */
 static int ice_init_interrupt_scheme(struct ice_pf *pf)
 {
-	int vectors = 0, hw_vectors = 0;
+	int vectors;
 
 	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
 		vectors = ice_ena_msix_range(pf);
@@ -2177,31 +2239,18 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 		return vectors;
 
 	/* set up vector assignment tracking */
-	pf->sw_irq_tracker =
-		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->sw_irq_tracker) +
+	pf->irq_tracker =
+		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
 			     (sizeof(u16) * vectors), GFP_KERNEL);
-	if (!pf->sw_irq_tracker) {
+	if (!pf->irq_tracker) {
 		ice_dis_msix(pf);
 		return -ENOMEM;
 	}
 
 	/* populate SW interrupts pool with number of OS granted IRQs. */
 	pf->num_avail_sw_msix = vectors;
-	pf->sw_irq_tracker->num_entries = vectors;
-
-	/* set up HW vector assignment tracking */
-	hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
-	pf->hw_irq_tracker =
-		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->hw_irq_tracker) +
-			     (sizeof(u16) * hw_vectors), GFP_KERNEL);
-	if (!pf->hw_irq_tracker) {
-		ice_clear_interrupt_scheme(pf);
-		return -ENOMEM;
-	}
-
-	/* populate HW interrupts pool with number of HW supported irqs. */
-	pf->num_avail_hw_msix = hw_vectors;
-	pf->hw_irq_tracker->num_entries = hw_vectors;
+	pf->irq_tracker->num_entries = vectors;
+	pf->irq_tracker->end = pf->irq_tracker->num_entries;
 
 	return 0;
 }
@@ -2252,7 +2301,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	if (!pf)
 		return -ENOMEM;
 
-	/* set up for high or low dma */
+	/* set up for high or low DMA */
 	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (err)
 		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
@@ -2302,7 +2351,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	ice_init_pf(pf);
 
-	err = ice_init_pf_dcb(pf);
+	err = ice_init_pf_dcb(pf, false);
 	if (err) {
 		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
@@ -2368,7 +2417,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	err = ice_setup_pf_sw(pf);
 	if (err) {
-		dev_err(dev, "probe failed due to setup pf switch:%d\n", err);
+		dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
 		goto err_alloc_sw_unroll;
 	}
 
@@ -2625,7 +2674,7 @@ static int __init ice_module_init(void)
 
 	status = pci_register_driver(&ice_driver);
 	if (status) {
-		pr_err("failed to register pci driver, err %d\n", status);
+		pr_err("failed to register PCI driver, err %d\n", status);
 		destroy_workqueue(ice_wq);
 	}
 
@@ -2725,21 +2774,21 @@ free_lists:
 	ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
 
 	if (err) {
-		netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
 			   mac);
 		return err;
 	}
 
 	/* change the netdev's MAC address */
 	memcpy(netdev->dev_addr, mac, netdev->addr_len);
-	netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
 		   netdev->dev_addr);
 
 	/* write new MAC address to the firmware */
 	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
 	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
 	if (status) {
-		netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed.\n",
 			   mac);
 	}
 	return 0;
@@ -2876,6 +2925,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
 		ret = ice_vsi_manage_vlan_insertion(vsi);
 
+	if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
+
 	return ret;
 }
 
@@ -2901,7 +2957,7 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
  *
  * Return 0 on success and negative value on error
  */
-static int ice_vsi_cfg(struct ice_vsi *vsi)
+int ice_vsi_cfg(struct ice_vsi *vsi)
 {
 	int err;
 
@@ -2933,7 +2989,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	ice_for_each_q_vector(vsi, q_idx)  {
+	ice_for_each_q_vector(vsi, q_idx) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
 
 		if (q_vector->rx.ring || q_vector->tx.ring)
@@ -3456,7 +3512,7 @@ int ice_down(struct ice_vsi *vsi)
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -3482,7 +3538,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
  *
  * Return 0 on success, negative on failure
  */
-static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 {
 	int i, err = 0;
 
@@ -3658,7 +3714,7 @@ static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
 }
 
 /**
- * ice_vsi_rebuild_all - rebuild all VSIs in pf
+ * ice_vsi_rebuild_all - rebuild all VSIs in PF
  * @pf: the PF
  */
 static int ice_vsi_rebuild_all(struct ice_pf *pf)
@@ -3728,7 +3784,7 @@ static int ice_vsi_replay_all(struct ice_pf *pf)
 
 /**
  * ice_rebuild - rebuild after reset
- * @pf: pf to rebuild
+ * @pf: PF to rebuild
  */
 static void ice_rebuild(struct ice_pf *pf)
 {
@@ -3740,7 +3796,7 @@ static void ice_rebuild(struct ice_pf *pf)
 	if (test_bit(__ICE_DOWN, pf->state))
 		goto clear_recovery;
 
-	dev_dbg(dev, "rebuilding pf\n");
+	dev_dbg(dev, "rebuilding PF\n");
 
 	ret = ice_init_all_ctrlq(hw);
 	if (ret) {
@@ -3768,12 +3824,6 @@ static void ice_rebuild(struct ice_pf *pf)
 
 	ice_dcb_rebuild(pf);
 
-	/* reset search_hint of irq_trackers to 0 since interrupts are
-	 * reclaimed and could be allocated from beginning during VSI rebuild
-	 */
-	pf->sw_irq_tracker->search_hint = 0;
-	pf->hw_irq_tracker->search_hint = 0;
-
 	err = ice_vsi_rebuild_all(pf);
 	if (err) {
 		dev_err(dev, "ice_vsi_rebuild_all failed\n");
@@ -3857,16 +3907,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	u8 count = 0;
 
 	if (new_mtu == netdev->mtu) {
-		netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
+		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
 		return 0;
 	}
 
 	if (new_mtu < netdev->min_mtu) {
-		netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	} else if (new_mtu > netdev->max_mtu) {
-		netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+		netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
 	}
@@ -3882,7 +3932,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	} while (count < 100);
 
 	if (count == 100) {
-		netdev_err(netdev, "can't change mtu. Device is busy\n");
+		netdev_err(netdev, "can't change MTU. Device is busy\n");
 		return -EBUSY;
 	}
 
@@ -3894,18 +3944,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 
 		err = ice_down(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 
 		err = ice_up(vsi);
 		if (err) {
-			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
 			return err;
 		}
 	}
 
-	netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+	netdev_info(netdev, "changed MTU to %d\n", new_mtu);
 	return 0;
 }
 
@@ -4241,7 +4291,7 @@ static void ice_tx_timeout(struct net_device *netdev)
  *
  * Returns 0 on success, negative value on failure
  */
-static int ice_open(struct net_device *netdev)
+int ice_open(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
@@ -4278,7 +4328,7 @@ static int ice_open(struct net_device *netdev)
  *
  * Returns success only - not allowed to fail
  */
-static int ice_stop(struct net_device *netdev)
+int ice_stop(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 62571d33d0d6..bcb431f1bd92 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -119,7 +119,7 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
 
 	status = ice_read_sr_aq(hw, offset, 1, data, true);
 	if (!status)
-		*data = le16_to_cpu(*(__le16 *)data);
+		*data = le16_to_cpu(*(__force __le16 *)data);
 
 	return status;
 }
@@ -174,7 +174,7 @@ ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
 	} while (words_read < *words);
 
 	for (i = 0; i < *words; i++)
-		data[i] = le16_to_cpu(((__le16 *)data)[i]);
+		data[i] = le16_to_cpu(((__force __le16 *)data)[i]);
 
 read_nvm_buf_aq_exit:
 	*words = words_read;
@@ -316,3 +316,34 @@ ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
 
 	return status;
 }
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_checksum *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	cmd = &desc.params.nvm_checksum;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+	cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	ice_release_nvm(hw);
+
+	if (!status)
+		if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+			status = ICE_ERR_NVM_CHECKSUM;
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 17afe6acb18a..c01597885629 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -26,6 +26,7 @@ enum ice_status {
 	ICE_ERR_IN_USE				= -16,
 	ICE_ERR_MAX_LIMIT			= -17,
 	ICE_ERR_RESET_ONGOING			= -18,
+	ICE_ERR_NVM_CHECKSUM			= -51,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 9f1f595ae7e6..8271fd651725 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -799,7 +799,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
 		daddr = f_info->l_data.ethertype_mac.mac_addr;
 		/* fall-through */
 	case ICE_SW_LKUP_ETHERTYPE:
-		off = (__be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
 		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
 		break;
 	case ICE_SW_LKUP_MAC_VLAN:
@@ -829,7 +829,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
 		ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
 
 	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
-		off = (__be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
 		*off = cpu_to_be16(vlan_id);
 	}
 
@@ -1973,6 +1973,10 @@ ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
  * ice_add_eth_mac - Add ethertype and MAC based filter rule
  * @hw: pointer to the hardware structure
  * @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
  */
 enum ice_status
 ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
@@ -1990,7 +1994,6 @@ ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
 		    l_type != ICE_SW_LKUP_ETHERTYPE)
 			return ICE_ERR_PARAM;
 
-		em_list_itr->fltr_info.flag = ICE_FLTR_TX;
 		em_list_itr->status = ice_add_rule_internal(hw, l_type,
 							    em_list_itr);
 		if (em_list_itr->status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 732b0b9b2e15..cb123fbe30be 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -8,9 +8,11 @@
 
 #define ICE_SW_CFG_MAX_BUF_LEN 2048
 #define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
 #define ICE_VSI_INVAL_ID 0xffff
 #define ICE_INVAL_Q_HANDLE 0xFFFF
-#define ICE_INVAL_Q_HANDLE 0xFFFF
 
 /* VSI queue context structure */
 struct ice_q_ctx {
@@ -69,9 +71,6 @@ struct ice_fltr_info {
 	/* rule ID returned by firmware once filter rule is created */
 	u16 fltr_rule_id;
 	u16 flag;
-#define ICE_FLTR_RX		BIT(0)
-#define ICE_FLTR_TX		BIT(1)
-#define ICE_FLTR_TX_RX		(ICE_FLTR_RX | ICE_FLTR_TX)
 
 	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
 	u16 src;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 2364eaf33d23..3c83230434b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -55,7 +55,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 	if (!tx_ring->tx_buf)
 		return;
 
-	/* Free all the Tx ring sk_bufss */
+	/* Free all the Tx ring sk_buffs */
 	for (i = 0; i < tx_ring->count; i++)
 		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
@@ -1101,7 +1101,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
  * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
  * @port_info: port_info structure containing the current link speed
  * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
- * @itr: itr value to update
+ * @itr: ITR value to update
  *
  * Calculate how big of an increment should be applied to the ITR value passed
  * in based on wmem_default, SKB overhead, Ethernet overhead, and the current
@@ -1316,7 +1316,7 @@ clear_counts:
  */
 static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
 {
-	/* The itr value is reported in microseconds, and the register value is
+	/* The ITR value is reported in microseconds, and the register value is
 	 * recorded in 2 microsecond units. For this reason we only need to
 	 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
 	 * granularity as a shift instead of division. The mask makes sure the
@@ -1645,7 +1645,7 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 	return;
 
 dma_error:
-	/* clear dma mappings for failed tx_buf map */
+	/* clear DMA mappings for failed tx_buf map */
 	for (;;) {
 		tx_buf = &tx_ring->tx_buf[i];
 		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
@@ -1874,10 +1874,10 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 	cd_mss = skb_shinfo(skb)->gso_size;
 
 	/* record cdesc_qw1 with TSO parameters */
-	off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
-			 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
-			 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
-			 (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			     (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			     (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
 	first->tx_flags |= ICE_TX_FLAGS_TSO;
 	return 1;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 66e05032ee56..ec76aba347b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -58,19 +58,19 @@ struct ice_tx_buf {
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
-	DEFINE_DMA_UNMAP_ADDR(dma);
 	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
 };
 
 struct ice_tx_offload_params {
-	u8 header_len;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
 	u32 td_cmd;
 	u32 td_offset;
 	u32 td_l2tag1;
-	u16 cd_l2tag2;
 	u32 cd_tunnel_params;
-	u64 cd_qw1;
-	struct ice_ring *tx_ring;
+	u16 cd_l2tag2;
+	u8 header_len;
 };
 
 struct ice_rx_buf {
@@ -150,6 +150,7 @@ enum ice_rx_dtype {
 
 /* descriptor ring, associated with a VSI */
 struct ice_ring {
+	/* CL1 - 1st cacheline starts here */
 	struct ice_ring *next;		/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
@@ -161,11 +162,11 @@ struct ice_ring {
 		struct ice_tx_buf *tx_buf;
 		struct ice_rx_buf *rx_buf;
 	};
+	/* CL2 - 2nd cacheline starts here */
 	u16 q_index;			/* Queue number of ring */
-	u32 txq_teid;			/* Added Tx queue TEID */
-#ifdef CONFIG_DCB
-	u8 dcb_tc;		/* Traffic class of ring */
-#endif /* CONFIG_DCB */
+	u16 q_handle;			/* Queue handle per TC */
+
+	u8 ring_active:1;		/* is ring online or not */
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
@@ -173,8 +174,7 @@ struct ice_ring {
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
-
-	u8 ring_active;			/* is ring online or not */
+	u16 next_to_alloc;
 
 	/* stats structs */
 	struct ice_q_stats	stats;
@@ -184,10 +184,17 @@ struct ice_ring {
 		struct ice_rxq_stats rx_stats;
 	};
 
-	unsigned int size;		/* length of descriptor ring in bytes */
-	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
+	/* CLX - the below items are only accessed infrequently and should be
+	 * in their own cache line if possible
+	 */
+	dma_addr_t dma;			/* physical address of ring */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 rx_buf_len;
+#ifdef CONFIG_DCB
+	u8 dcb_tc;			/* Traffic class of ring */
+#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
 struct ice_ring_container {
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a862af4cbf78..24bbef8bbe69 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -23,6 +23,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
 
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_FW_LOG		BIT_ULL(3)
 #define ICE_DBG_LINK		BIT_ULL(4)
 #define ICE_DBG_PHY		BIT_ULL(5)
 #define ICE_DBG_QCTX		BIT_ULL(6)
@@ -61,6 +62,13 @@ enum ice_fc_mode {
 	ICE_FC_DFLT
 };
 
+enum ice_fec_mode {
+	ICE_FEC_NONE = 0,
+	ICE_FEC_RS,
+	ICE_FEC_BASER,
+	ICE_FEC_AUTO
+};
+
 enum ice_set_fc_aq_failures {
 	ICE_SET_FC_AQ_FAIL_NONE = 0,
 	ICE_SET_FC_AQ_FAIL_GET,
@@ -86,12 +94,14 @@ enum ice_media_type {
 enum ice_vsi_type {
 	ICE_VSI_PF = 0,
 	ICE_VSI_VF,
+	ICE_VSI_LB = 6,
 };
 
 struct ice_link_status {
 	/* Refer to ice_aq_phy_type for bits definition */
 	u64 phy_type_low;
 	u64 phy_type_high;
+	u8 topo_media_conflict;
 	u16 max_frame_size;
 	u16 link_speed;
 	u16 req_speeds;
@@ -99,6 +109,7 @@ struct ice_link_status {
 	u8 link_info;
 	u8 an_info;
 	u8 ext_info;
+	u8 fec_info;
 	u8 pacing;
 	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
 	 * ice_aqc_get_phy_caps structure
@@ -423,7 +434,7 @@ struct ice_hw {
 	struct ice_fw_log_cfg fw_log;
 
 /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
- * register. Used for determining the itr/intrl granularity during
+ * register. Used for determining the ITR/intrl granularity during
  * initialization.
  */
 #define ICE_MAX_AGG_BW_200G	0x0
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a805cbdd69be..5d24b539648f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -103,7 +103,7 @@ ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
 	u16 link_speed;
 
 	if (link_up)
-		link_speed = ICE_AQ_LINK_SPEED_40GB;
+		link_speed = ICE_AQ_LINK_SPEED_100GB;
 	else
 		link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
 
@@ -141,32 +141,20 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
 }
 
 /**
- * ice_get_vf_vector - get VF interrupt vector register offset
- * @vf_msix: number of MSIx vector per VF on a PF
- * @vf_id: VF identifier
- * @i: index of MSIx vector
- */
-static u32 ice_get_vf_vector(int vf_msix, int vf_id, int i)
-{
-	return ((i == 0) ? VFINT_DYN_CTLN(vf_id) :
-		 VFINT_DYN_CTLN(((vf_msix - 1) * (vf_id)) + (i - 1)));
-}
-
-/**
  * ice_free_vf_res - Free a VF's resources
  * @vf: pointer to the VF info
  */
 static void ice_free_vf_res(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
-	int i, pf_vf_msix;
+	int i, last_vector_idx;
 
 	/* First, disable VF's configuration API to prevent OS from
 	 * accessing the VF's VSI after it's freed or invalidated.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
-	/* free vsi & disconnect it from the parent uplink */
+	/* free VSI and disconnect it from the parent uplink */
 	if (vf->lan_vsi_idx) {
 		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
 		vf->lan_vsi_idx = 0;
@@ -174,13 +162,10 @@ static void ice_free_vf_res(struct ice_vf *vf)
 		vf->num_mac = 0;
 	}
 
-	pf_vf_msix = pf->num_vf_msix;
+	last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1;
 	/* Disable interrupts so that VF starts in a known state */
-	for (i = 0; i < pf_vf_msix; i++) {
-		u32 reg_idx;
-
-		reg_idx = ice_get_vf_vector(pf_vf_msix, vf->vf_id, i);
-		wr32(&pf->hw, reg_idx, VFINT_DYN_CTLN_CLEARPBA_M);
+	for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+		wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
 		ice_flush(&pf->hw);
 	}
 	/* reset some of the state variables keeping track of the resources */
@@ -205,8 +190,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
 	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
 
-	first = vf->first_vector_idx +
-		hw->func_caps.common_cap.msix_vector_first_id;
+	first = vf->first_vector_idx;
 	last = first + pf->num_vf_msix - 1;
 	for (v = first; v <= last; v++) {
 		u32 reg;
@@ -232,6 +216,42 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 }
 
 /**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * If MSIX entries from the pf->irq_tracker were needed then we need to
+ * reset the irq_tracker->end and give back the entries we needed to
+ * num_avail_sw_msix.
+ *
+ * If no MSIX entries were taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+	struct ice_res_tracker *res;
+
+	if (!pf)
+		return -EINVAL;
+
+	res = pf->irq_tracker;
+	if (!res)
+		return -EINVAL;
+
+	/* give back irq_tracker resources used */
+	if (pf->sriov_base_vector < res->num_entries) {
+		res->end = res->num_entries;
+		pf->num_avail_sw_msix +=
+			res->num_entries - pf->sriov_base_vector;
+	}
+
+	pf->sriov_base_vector = 0;
+
+	return 0;
+}
+
+/**
  * ice_free_vfs - Free all VFs
  * @pf: pointer to the PF structure
  */
@@ -246,15 +266,6 @@ void ice_free_vfs(struct ice_pf *pf)
 	while (test_and_set_bit(__ICE_VF_DIS, pf->state))
 		usleep_range(1000, 2000);
 
-	/* Disable IOV before freeing resources. This lets any VF drivers
-	 * running in the host get themselves cleaned up before we yank
-	 * the carpet out from underneath their feet.
-	 */
-	if (!pci_vfs_assigned(pf->pdev))
-		pci_disable_sriov(pf->pdev);
-	else
-		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
-
 	/* Avoid wait time by stopping all VFs at the same time */
 	for (i = 0; i < pf->num_alloc_vfs; i++) {
 		struct ice_vsi *vsi;
@@ -270,6 +281,15 @@ void ice_free_vfs(struct ice_pf *pf)
 		clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
 	}
 
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
 	tmp = pf->num_alloc_vfs;
 	pf->num_vf_qps = 0;
 	pf->num_alloc_vfs = 0;
@@ -288,6 +308,10 @@ void ice_free_vfs(struct ice_pf *pf)
 		}
 	}
 
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
 	devm_kfree(&pf->pdev->dev, pf->vf);
 	pf->vf = NULL;
 
@@ -457,6 +481,22 @@ ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id)
 }
 
 /**
+ * ice_calc_vf_first_vector_idx - Calculate absolute MSIX vector index in HW
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in HW that is used by this VF and
+ * this will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return pf->hw.func_caps.common_cap.msix_vector_first_id +
+		pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix;
+}
+
+/**
  * ice_alloc_vsi_res - Setup VF VSI and its resources
  * @vf: pointer to the VF structure
  *
@@ -470,8 +510,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	struct ice_vsi *vsi;
 	int status = 0;
 
-	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+	/* first vector index is the VFs OICR index */
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
 
+	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
 	if (!vsi) {
 		dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
 		return -ENOMEM;
@@ -480,14 +522,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	vf->lan_vsi_idx = vsi->idx;
 	vf->lan_vsi_num = vsi->vsi_num;
 
-	/* first vector index is the VFs OICR index */
-	vf->first_vector_idx = vsi->hw_base_vector;
-	/* Since hw_base_vector holds the vector where data queue interrupts
-	 * starts, increment by 1 since VFs allocated vectors include OICR intr
-	 * as well.
-	 */
-	vsi->hw_base_vector += 1;
-
 	/* Check if port VLAN exist before, and restore it accordingly */
 	if (vf->port_vlan_id) {
 		ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true);
@@ -580,8 +614,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 
 	hw = &pf->hw;
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	first = vf->first_vector_idx +
-		hw->func_caps.common_cap.msix_vector_first_id;
+	first = vf->first_vector_idx;
 	last = (first + pf->num_vf_msix) - 1;
 	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 
@@ -687,6 +720,97 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
 }
 
 /**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf;
+
+	if (!vf || !q_vector)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* always add one to account for the OICR being the first MSIX */
+	return pf->sriov_base_vector + pf->num_vf_msix * vf->vf_id +
+		q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+	int i;
+
+	if (!res)
+		return -EINVAL;
+
+	for (i = res->num_entries - 1; i >= 0; i--)
+		if (res->list[i] & ICE_RES_VALID_BIT)
+			return i;
+
+	return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so in many cases the irq_tracker will not
+ * be needed. In these cases we just set the pf->sriov_base_vector and return
+ * success.
+ *
+ * If SR-IOV needs to use any pf->irq_tracker entries it updates the
+ * irq_tracker->end based on the first entry needed for SR-IOV. This makes it
+ * so any calls to ice_get_res() using the irq_tracker will not try to use
+ * resources at or beyond the newly set value.
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 pf_total_msix_vectors =
+		pf->hw.func_caps.common_cap.num_msix_vectors;
+	struct ice_res_tracker *res = pf->irq_tracker;
+	int sriov_base_vector;
+
+	if (max_valid_res_idx < 0)
+		return max_valid_res_idx;
+
+	sriov_base_vector = pf_total_msix_vectors - num_msix_needed;
+
+	/* make sure we only grab irq_tracker entries from the list end and
+	 * that we have enough available MSIX vectors
+	 */
+	if (sriov_base_vector <= max_valid_res_idx)
+		return -EINVAL;
+
+	pf->sriov_base_vector = sriov_base_vector;
+
+	/* dip into irq_tracker entries and update used resources */
+	if (num_msix_needed > (pf_total_msix_vectors - res->num_entries)) {
+		pf->num_avail_sw_msix -=
+			res->num_entries - pf->sriov_base_vector;
+		res->end = pf->sriov_base_vector;
+	}
+
+	return 0;
+}
+
+/**
  * ice_check_avail_res - check if vectors and queues are available
  * @pf: pointer to the PF structure
  *
@@ -696,11 +820,16 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
  */
 static int ice_check_avail_res(struct ice_pf *pf)
 {
-	u16 num_msix, num_txq, num_rxq;
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	u16 num_msix, num_txq, num_rxq, num_avail_msix;
 
-	if (!pf->num_alloc_vfs)
+	if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
 		return -EINVAL;
 
+	/* add 1 to max_valid_res_idx to account for it being 0-based */
+	num_avail_msix = pf->hw.func_caps.common_cap.num_msix_vectors -
+		(max_valid_res_idx + 1);
+
 	/* Grab from HW interrupts common pool
 	 * Note: By the time the user decides it needs more vectors in a VF
 	 * its already too late since one must decide this prior to creating the
@@ -717,11 +846,11 @@ static int ice_check_avail_res(struct ice_pf *pf)
 	 * grab default interrupt vectors (5 as supported by AVF driver).
 	 */
 	if (pf->num_alloc_vfs <= 16) {
-		num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+		num_msix = ice_determine_res(pf, num_avail_msix,
 					     ICE_MAX_INTR_PER_VF,
 					     ICE_MIN_INTR_PER_VF);
 	} else if (pf->num_alloc_vfs <= ICE_MAX_VF_COUNT) {
-		num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+		num_msix = ice_determine_res(pf, num_avail_msix,
 					     ICE_DFLT_INTR_PER_VF,
 					     ICE_MIN_INTR_PER_VF);
 	} else {
@@ -750,6 +879,9 @@ static int ice_check_avail_res(struct ice_pf *pf)
 	if (!num_txq || !num_rxq)
 		return -EIO;
 
+	if (ice_sriov_set_msix_res(pf, num_msix * pf->num_alloc_vfs))
+		return -EINVAL;
+
 	/* since AVF driver works with only queue pairs which means, it expects
 	 * to have equal number of Rx and Tx queues, so take the minimum of
 	 * available Tx or Rx queues
@@ -938,6 +1070,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 		vf->num_vf_qs = 0;
 	}
 
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(&pf->pdev->dev,
+			"Failed to free MSIX resources used by SR-IOV\n");
+
 	if (ice_check_avail_res(pf)) {
 		dev_err(&pf->pdev->dev,
 			"Cannot allocate VF resources, try with fewer number of VFs\n");
@@ -1119,7 +1255,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	int i, ret;
 
 	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
-	wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
 	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
 
 	ice_flush(hw);
@@ -1134,7 +1270,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 			   GFP_KERNEL);
 	if (!vfs) {
 		ret = -ENOMEM;
-		goto err_unroll_sriov;
+		goto err_pci_disable_sriov;
 	}
 	pf->vf = vfs;
 
@@ -1154,12 +1290,19 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	pf->num_alloc_vfs = num_alloc_vfs;
 
 	/* VF resources get allocated during reset */
-	if (!ice_reset_all_vfs(pf, true))
+	if (!ice_reset_all_vfs(pf, true)) {
+		ret = -EIO;
 		goto err_unroll_sriov;
+	}
 
 	goto err_unroll_intr;
 
 err_unroll_sriov:
+	pf->vf = NULL;
+	devm_kfree(&pf->pdev->dev, vfs);
+	vfs = NULL;
+	pf->num_alloc_vfs = 0;
+err_pci_disable_sriov:
 	pci_disable_sriov(pf->pdev);
 err_unroll_intr:
 	/* rearm interrupts here */
@@ -1168,8 +1311,8 @@ err_unroll_intr:
 }
 
 /**
- * ice_pf_state_is_nominal - checks the pf for nominal state
- * @pf: pointer to pf to check
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
  *
  * Check the PF's state for a collection of bits that would indicate
  * the PF is in a state that would inhibit normal operation for
@@ -1496,7 +1639,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf)
 
 /**
  * ice_find_vsi_from_id
- * @pf: the pf structure to search for the VSI
+ * @pf: the PF structure to search for the VSI
  * @id: ID of the VSI it is searching for
  *
  * searches for the VSI with the given ID
@@ -1807,28 +1950,37 @@ error_param:
 static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 {
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
-	struct virtchnl_irq_map_info *irqmap_info =
-	    (struct virtchnl_irq_map_info *)msg;
+	struct virtchnl_irq_map_info *irqmap_info;
 	u16 vsi_id, vsi_q_id, vector_id;
 	struct virtchnl_vector_map *map;
-	struct ice_vsi *vsi = NULL;
 	struct ice_pf *pf = vf->pf;
+	u16 num_q_vectors_mapped;
+	struct ice_vsi *vsi;
 	unsigned long qmap;
-	u16 num_q_vectors;
 	int i;
 
-	num_q_vectors = irqmap_info->num_vectors - ICE_NONQ_VECS_VF;
+	irqmap_info = (struct virtchnl_irq_map_info *)msg;
+	num_q_vectors_mapped = irqmap_info->num_vectors;
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
 
+	/* Check to make sure number of VF vectors mapped is not greater than
+	 * number of VF vectors originally allocated, and check that
+	 * there is actually at least a single VF queue vector mapped
+	 */
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
-	    !vsi || vsi->num_q_vectors < num_q_vectors ||
-	    irqmap_info->num_vectors == 0) {
+	    pf->num_vf_msix < num_q_vectors_mapped ||
+	    !irqmap_info->num_vectors) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
 
-	for (i = 0; i < num_q_vectors; i++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+	for (i = 0; i < num_q_vectors_mapped; i++) {
+		struct ice_q_vector *q_vector;
 
 		map = &irqmap_info->vecmap[i];
 
@@ -1836,7 +1988,21 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 		vsi_id = map->vsi_id;
 		/* validate msg params */
 		if (!(vector_id < pf->hw.func_caps.common_cap
-		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id)) {
+		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		    (!vector_id && (map->rxq_map || map->txq_map))) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* No need to map VF miscellaneous or rogue vector */
+		if (!vector_id)
+			continue;
+
+		/* Subtract non queue vector from vector_id passed by VF
+		 * to get actual number of VSI queue vector array index
+		 */
+		q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+		if (!q_vector) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
 		}
@@ -1852,6 +2018,8 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 			q_vector->num_ring_rx++;
 			q_vector->rx.itr_idx = map->rxitr_idx;
 			vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->rx.itr_idx);
 		}
 
 		qmap = map->txq_map;
@@ -1864,11 +2032,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 			q_vector->num_ring_tx++;
 			q_vector->tx.itr_idx = map->txitr_idx;
 			vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+			ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+					      q_vector->tx.itr_idx);
 		}
 	}
 
-	if (vsi)
-		ice_vsi_cfg_msix(vsi);
 error_param:
 	/* send the response to the VF */
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
@@ -1903,9 +2071,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	}
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!vsi) {
+	if (!vsi)
 		goto error_param;
-	}
 
 	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) {
 		dev_err(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 3725aea16840..c3ca522c245a 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -49,29 +49,34 @@ struct ice_vf {
 	struct ice_pf *pf;
 
 	s16 vf_id;			/* VF ID in the PF space */
-	u32 driver_caps;		/* reported by VF driver */
+	u16 lan_vsi_idx;		/* index into PF struct */
 	int first_vector_idx;		/* first vector index of this VF */
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
 	struct virtchnl_version_info vf_ver;
+	u32 driver_caps;		/* reported by VF driver */
 	struct virtchnl_ether_addr dflt_lan_addr;
 	u16 port_vlan_id;
-	u8 pf_set_mac;			/* VF MAC address set by VMM admin */
-	u8 trusted;
-	u16 lan_vsi_idx;		/* index into PF struct */
+	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
+	u8 trusted:1;
+	u8 spoofchk:1;
+	u8 link_forced:1;
+	u8 link_up:1;			/* only valid if VF link is forced */
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
 	u16 lan_vsi_num;		/* ID as used by firmware */
+	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
+	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
+
 	u64 num_mdd_events;		/* number of MDD events detected */
 	u64 num_inval_msgs;		/* number of continuous invalid msgs */
 	u64 num_valid_msgs;		/* number of valid msgs detected */
 	unsigned long vf_caps;		/* VF's adv. capabilities */
-	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
-	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
-	u8 link_forced;
-	u8 link_up;			/* only valid if VF link is forced */
-	u8 spoofchk;
+	u8 num_req_qs;			/* num of queue pairs requested by VF */
 	u16 num_mac;
 	u16 num_vlan;
 	u16 num_vf_qs;			/* num of queue configured per VF */
-	u8 num_req_qs;			/* num of queue pairs requested by VF */
 };
 
 #ifdef CONFIG_PCI_IOV
@@ -96,6 +101,8 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
 int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
 
 int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
@@ -161,5 +168,11 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev,
 	return -EOPNOTSUPP;
 }
 
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+		    struct ice_q_vector __always_unused *q_vector)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_IOV */
 #endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index bafdcf70a353..3ec2ce0725d5 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -638,7 +638,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
 			dev_spec->sgmii_active = true;
 			break;
 		}
-		/* fall through for I2C based SGMII */
+		/* fall through - for I2C based SGMII */
 	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
 		/* read media type from SFP EEPROM */
 		ret_val = igb_set_sfp_media_type_82575(hw);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 39f33afc479c..fc925adbd9fa 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -753,6 +753,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
 		netdev_err(netdev, "PCIe link lost\n");
+		WARN(1, "igb: Failed to read reg 0x%x!\n", reg);
 	}
 
 	return value;
@@ -6695,7 +6696,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
 			igb_setup_dca(adapter);
 			break;
 		}
-		/* Fall Through since DCA is disabled. */
+		/* Fall Through - since DCA is disabled. */
 	case DCA_PROVIDER_REMOVE:
 		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
 			/* without this a class_device is left
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 51a8b8769c67..59258d791106 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -10,50 +10,6 @@
 #include "igc.h"
 
 /**
- * igc_set_pcie_completion_timeout - set pci-e completion timeout
- * @hw: pointer to the HW structure
- */
-static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
-{
-	u32 gcr = rd32(IGC_GCR);
-	u16 pcie_devctl2;
-	s32 ret_val = 0;
-
-	/* only take action if timeout value is defaulted to 0 */
-	if (gcr & IGC_GCR_CMPL_TMOUT_MASK)
-		goto out;
-
-	/* if capabilities version is type 1 we can write the
-	 * timeout of 10ms to 200ms through the GCR register
-	 */
-	if (!(gcr & IGC_GCR_CAP_VER2)) {
-		gcr |= IGC_GCR_CMPL_TMOUT_10ms;
-		goto out;
-	}
-
-	/* for version 2 capabilities we need to write the config space
-	 * directly in order to set the completion timeout value for
-	 * 16ms to 55ms
-	 */
-	ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					&pcie_devctl2);
-	if (ret_val)
-		goto out;
-
-	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
-
-	ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
-					 &pcie_devctl2);
-out:
-	/* disable completion timeout resend */
-	gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND;
-
-	wr32(IGC_GCR, gcr);
-
-	return ret_val;
-}
-
-/**
  * igc_reset_hw_base - Reset hardware
  * @hw: pointer to the HW structure
  *
@@ -72,11 +28,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw)
 	if (ret_val)
 		hw_dbg("PCI-E Master disable polling has failed.\n");
 
-	/* set the completion timeout for interface */
-	ret_val = igc_set_pcie_completion_timeout(hw);
-	if (ret_val)
-		hw_dbg("PCI-E Set completion timeout has failed.\n");
-
 	hw_dbg("Masking off all interrupts\n");
 	wr32(IGC_IMC, 0xffffffff);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index a9a30268de59..fc0ccfe38a20 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -5,8 +5,8 @@
 #define _IGC_DEFINES_H_
 
 /* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define REQ_TX_DESCRIPTOR_MULTIPLE  8
-#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+#define REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define REQ_RX_DESCRIPTOR_MULTIPLE	8
 
 #define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
 
@@ -29,12 +29,6 @@
 /* Status of Master requests. */
 #define IGC_STATUS_GIO_MASTER_ENABLE	0x00080000
 
-/* PCI Express Control */
-#define IGC_GCR_CMPL_TMOUT_MASK		0x0000F000
-#define IGC_GCR_CMPL_TMOUT_10ms		0x00001000
-#define IGC_GCR_CMPL_TMOUT_RESEND	0x00010000
-#define IGC_GCR_CAP_VER2		0x00040000
-
 /* Receive Address
  * Number of high/low register pairs in the RAR. The RAR (Receive Address
  * Registers) holds the directed and multicast addresses that we monitor.
@@ -72,6 +66,9 @@
 
 #define IGC_CONNSW_AUTOSENSE_EN	0x1
 
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE	0x2600
+
 /* PBA constants */
 #define IGC_PBA_34K		0x0022
 
@@ -264,9 +261,6 @@
 #define IGC_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
 #define IGC_TCTL_MULR		0x10000000 /* Multiple request support */
 
-#define IGC_CT_SHIFT			4
-#define IGC_COLLISION_THRESHOLD		15
-
 /* Flow Control Constants */
 #define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
 #define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
@@ -398,7 +392,7 @@
 #define IGC_MDIC_ERROR		0x40000000
 #define IGC_MDIC_DEST		0x80000000
 
-#define IGC_N0_QUEUE -1
+#define IGC_N0_QUEUE		-1
 
 #define IGC_MAX_MAC_HDR_LEN	127
 #define IGC_MAX_NETWORK_HDR_LEN	511
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 7c88b7bd4799..1039a224ac80 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -114,11 +114,8 @@ struct igc_nvm_operations {
 
 struct igc_phy_operations {
 	s32 (*acquire)(struct igc_hw *hw);
-	s32 (*check_polarity)(struct igc_hw *hw);
 	s32 (*check_reset_block)(struct igc_hw *hw);
 	s32 (*force_speed_duplex)(struct igc_hw *hw);
-	s32 (*get_cfg_done)(struct igc_hw *hw);
-	s32 (*get_cable_length)(struct igc_hw *hw);
 	s32 (*get_phy_info)(struct igc_hw *hw);
 	s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
 	void (*release)(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index f7683d3ae47c..ba4646737288 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -8,7 +8,6 @@
 #include "igc_hw.h"
 
 /* forward declaration */
-static s32 igc_set_default_fc(struct igc_hw *hw);
 static s32 igc_set_fc_watermarks(struct igc_hw *hw);
 
 /**
@@ -96,13 +95,10 @@ s32 igc_setup_link(struct igc_hw *hw)
 		goto out;
 
 	/* If requested flow control is set to default, set flow control
-	 * based on the EEPROM flow control settings.
+	 * to the both 'rx' and 'tx' pause frames.
 	 */
-	if (hw->fc.requested_mode == igc_fc_default) {
-		ret_val = igc_set_default_fc(hw);
-		if (ret_val)
-			goto out;
-	}
+	if (hw->fc.requested_mode == igc_fc_default)
+		hw->fc.requested_mode = igc_fc_full;
 
 	/* We want to save off the original Flow Control configuration just
 	 * in case we get disconnected and then reconnected into a different
@@ -136,19 +132,6 @@ out:
 }
 
 /**
- * igc_set_default_fc - Set flow control default values
- * @hw: pointer to the HW structure
- *
- * Read the EEPROM for the default values for flow control and store the
- * values.
- */
-static s32 igc_set_default_fc(struct igc_hw *hw)
-{
-	hw->fc.requested_mode = igc_fc_full;
-	return 0;
-}
-
-/**
  * igc_force_mac_fc - Force the MAC's flow control settings
  * @hw: pointer to the HW structure
  *
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 34fa0e60a780..93f3b4e6185b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -72,6 +72,27 @@ void igc_reset(struct igc_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	struct igc_hw *hw = &adapter->hw;
+	struct igc_fc_info *fc = &hw->fc;
+	u32 pba, hwm;
+
+	/* Repartition PBA for greater than 9k MTU if required */
+	pba = IGC_PBA_34K;
+
+	/* flow control settings
+	 * The high water mark must be low enough to fit one full frame
+	 * after transmitting the pause frame.  As such we must have enough
+	 * space to allow for us to complete our current transmit and then
+	 * receive the frame that is in progress from the link partner.
+	 * Set it to:
+	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+	 */
+	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
 
 	hw->mac.ops.reset_hw(hw);
 
@@ -3934,6 +3955,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
 		hw->hw_addr = NULL;
 		netif_device_detach(netdev);
 		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		WARN(1, "igc: Failed to read reg 0x%x!\n", reg);
 	}
 
 	return value;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 269bd73be1a0..94dc0a272644 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -437,6 +437,7 @@ struct mvneta_port {
 	struct device_node *dn;
 	unsigned int tx_csum_limit;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	struct phy *comphy;
 
 	struct mvneta_bm *bm_priv;
@@ -3356,9 +3357,11 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 	return 0;
 }
 
-static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
+static void mvneta_validate(struct phylink_config *config,
+			    unsigned long *supported,
 			    struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
@@ -3408,9 +3411,10 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
 	phylink_helper_basex_speed(state);
 }
 
-static int mvneta_mac_link_state(struct net_device *ndev,
+static int mvneta_mac_link_state(struct phylink_config *config,
 				 struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_stat;
 
@@ -3438,8 +3442,9 @@ static int mvneta_mac_link_state(struct net_device *ndev,
 	return 1;
 }
 
-static void mvneta_mac_an_restart(struct net_device *ndev)
+static void mvneta_mac_an_restart(struct phylink_config *config)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 
@@ -3449,9 +3454,10 @@ static void mvneta_mac_an_restart(struct net_device *ndev)
 		    gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN);
 }
 
-static void mvneta_mac_config(struct net_device *ndev, unsigned int mode,
-	const struct phylink_link_state *state)
+static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
+			      const struct phylink_link_state *state)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0);
 	u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
@@ -3581,9 +3587,10 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable)
 	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1);
 }
 
-static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
-				 phy_interface_t interface)
+static void mvneta_mac_link_down(struct phylink_config *config,
+				 unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -3600,10 +3607,11 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
 	mvneta_set_eee(pp, false);
 }
 
-static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode,
+static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface,
 			       struct phy_device *phy)
 {
+	struct net_device *ndev = to_net_dev(config->dev);
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
 
@@ -4500,8 +4508,14 @@ static int mvneta_probe(struct platform_device *pdev)
 		comphy = NULL;
 	}
 
-	phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode,
-				 &mvneta_phylink_ops);
+	pp = netdev_priv(dev);
+	spin_lock_init(&pp->lock);
+
+	pp->phylink_config.dev = &dev->dev;
+	pp->phylink_config.type = PHYLINK_NETDEV;
+
+	phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
+				 phy_mode, &mvneta_phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
 		goto err_free_irq;
@@ -4513,8 +4527,6 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
-	pp = netdev_priv(dev);
-	spin_lock_init(&pp->lock);
 	pp->phylink = phylink;
 	pp->comphy = comphy;
 	pp->phy_interface = phy_mode;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 6171270a016c..d67c970f02e5 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -148,6 +148,8 @@
 #define MVPP22_CLS_C2_ATTR2			0x1b6c
 #define     MVPP22_CLS_C2_ATTR2_RSS_EN		BIT(30)
 #define MVPP22_CLS_C2_ATTR3			0x1b70
+#define MVPP22_CLS_C2_TCAM_CTRL			0x1b90
+#define     MVPP22_CLS_C2_TCAM_BYPASS_FIFO	BIT(0)
 
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG			0x2040
@@ -624,6 +626,7 @@
 #define MVPP2_N_RFS_RULES		(MVPP2_N_RFS_ENTRIES_PER_FLOW * 7)
 
 /* RSS constants */
+#define MVPP22_N_RSS_TABLES		8
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* IPv6 max L3 address size */
@@ -725,6 +728,10 @@ enum mvpp2_prs_l3_cast {
 /* Definitions */
 struct mvpp2_dbgfs_entries;
 
+struct mvpp2_rss_table {
+	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+};
+
 /* Shared Packet Processor resources */
 struct mvpp2 {
 	/* Shared registers' base addresses */
@@ -788,6 +795,9 @@ struct mvpp2 {
 
 	/* Debugfs entries private data */
 	struct mvpp2_dbgfs_entries *dbgfs_entries;
+
+	/* RSS Indirection tables */
+	struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES];
 };
 
 struct mvpp2_pcpu_stats {
@@ -905,6 +915,7 @@ struct mvpp2_port {
 
 	phy_interface_t phy_interface;
 	struct phylink *phylink;
+	struct phylink_config phylink_config;
 	struct phy *comphy;
 
 	struct mvpp2_bm_pool *pool_long;
@@ -919,12 +930,14 @@ struct mvpp2_port {
 
 	u32 tx_time_coal;
 
-	/* RSS indirection table */
-	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
-
 	/* List of steering rules active on that port */
-	struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_RULES];
+	struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
 	int n_rfs_rules;
+
+	/* Each port has its own view of the rss contexts, so that it can number
+	 * them from 0
+	 */
+	int rss_ctx[MVPP22_N_RSS_TABLES];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index a57d17ab91f0..e47c00c5f829 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -923,6 +923,12 @@ void mvpp2_cls_init(struct mvpp2 *priv)
 		mvpp2_cls_c2_write(priv, &c2);
 	}
 
+	/* Disable the FIFO stages in C2 engine, which are only used in BIST
+	 * mode
+	 */
+	mvpp2_write(priv, MVPP22_CLS_C2_TCAM_CTRL,
+		    MVPP22_CLS_C2_TCAM_BYPASS_FIFO);
+
 	mvpp2_cls_port_init_flows(priv);
 }
 
@@ -963,12 +969,22 @@ u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index)
 	return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
 }
 
-static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* The RxQ number is used to select the RSS table. It that case, we set
+	 * it to be the ctx number.
+	 */
+	qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		     MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
@@ -977,22 +993,45 @@ static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
 static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
 {
 	struct mvpp2_cls_c2_entry c2;
+	u8 qh, ql;
 
 	mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
 
+	/* Reset the default destination RxQ to the port's first rx queue. */
+	qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+	ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+	c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+		      MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
 	c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
 
 	mvpp2_cls_c2_write(port->priv, &c2);
 }
 
-void mvpp22_port_rss_enable(struct mvpp2_port *port)
+static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx)
+{
+	return port->rss_ctx[port_rss_ctx];
+}
+
+int mvpp22_port_rss_enable(struct mvpp2_port *port)
 {
-	mvpp2_rss_port_c2_enable(port);
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
+	mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0));
+
+	return 0;
 }
 
-void mvpp22_port_rss_disable(struct mvpp2_port *port)
+int mvpp22_port_rss_disable(struct mvpp2_port *port)
 {
+	if (mvpp22_rss_ctx(port, 0) < 0)
+		return -EINVAL;
+
 	mvpp2_rss_port_c2_disable(port);
+
+	return 0;
 }
 
 static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry)
@@ -1029,7 +1068,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
 	struct flow_action_entry *act;
 	struct mvpp2_cls_c2_entry c2;
 	u8 qh, ql, pmap;
-	int index;
+	int index, ctx;
 
 	memset(&c2, 0, sizeof(c2));
 
@@ -1069,14 +1108,36 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
 		 */
 		c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK);
 
+		/* Update RSS status after matching this entry */
+		if (act->queue.ctx)
+			c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+		/* Always lock the RSS_EN decision. We might have high prio
+		 * rules steering to an RXQ, and a lower one steering to RSS,
+		 * we don't want the low prio RSS rule overwriting this flag.
+		 */
+		c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
 		/* Mark packet as "forwarded to software", needed for RSS */
 		c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
 
 		c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) |
 			   MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK);
 
-		qh = ((act->queue.index + port->first_rxq) >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
-		ql = (act->queue.index + port->first_rxq) & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		if (act->queue.ctx) {
+			/* Get the global ctx number */
+			ctx = mvpp22_rss_ctx(port, act->queue.ctx);
+			if (ctx < 0)
+				return -EINVAL;
+
+			qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		} else {
+			qh = ((act->queue.index + port->first_rxq) >> 3) &
+			      MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+			ql = (act->queue.index + port->first_rxq) &
+			      MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+		}
 
 		c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
 			      MVPP22_CLS_C2_ATTR0_QLOW(ql);
@@ -1196,6 +1257,13 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule)
 	if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP)
 		return -EOPNOTSUPP;
 
+	/* When both an RSS context and an queue index are set, the index
+	 * is considered as an offset to be added to the indirection table
+	 * entries. We don't support this, so reject this rule.
+	 */
+	if (act->queue.ctx && act->queue.index)
+		return -EOPNOTSUPP;
+
 	/* For now, only use the C2 engine which has a HEK size limited to 64
 	 * bits for TCAM matching.
 	 */
@@ -1212,7 +1280,7 @@ int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
 {
 	struct mvpp2_ethtool_fs *efs;
 
-	if (rxnfc->fs.location >= MVPP2_N_RFS_RULES)
+	if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
 		return -EINVAL;
 
 	efs = port->rfs_rules[rxnfc->fs.location];
@@ -1232,8 +1300,7 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
 	struct mvpp2_ethtool_fs *efs, *old_efs;
 	int ret = 0;
 
-	if (info->fs.location >= 4 ||
-	    info->fs.location < 0)
+	if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
 		return -EINVAL;
 
 	efs = kzalloc(sizeof(*efs), GFP_KERNEL);
@@ -1242,6 +1309,12 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
 
 	input.fs = &info->fs;
 
+	/* We need to manually set the rss_ctx, since this info isn't present
+	 * in info->fs
+	 */
+	if (info->fs.flow_type & FLOW_RSS)
+		input.rss_ctx = info->rss_context;
+
 	ethtool_rule = ethtool_rx_flow_rule_create(&input);
 	if (IS_ERR(ethtool_rule)) {
 		ret = PTR_ERR(ethtool_rule);
@@ -1328,19 +1401,160 @@ static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
 	return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
 }
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+static void mvpp22_rss_fill_table(struct mvpp2_port *port,
+				  struct mvpp2_rss_table *table,
+				  u32 rss_ctx)
 {
 	struct mvpp2 *priv = port->priv;
 	int i;
 
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
-		u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+		u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
 			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
 		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
 
 		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
-			    mvpp22_rxfh_indir(port, port->indir[i]));
+			    mvpp22_rxfh_indir(port, table->indir[i]));
+	}
+}
+
+static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 ctx;
+
+	/* Find the first free RSS table */
+	for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) {
+		if (!priv->rss_tables[ctx])
+			break;
+	}
+
+	if (ctx == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]),
+					GFP_KERNEL);
+	if (!priv->rss_tables[ctx])
+		return -ENOMEM;
+
+	*rss_ctx = ctx;
+
+	/* Set the table width: replace the whole classifier Rx queue number
+	 * with the ones configured in RSS table entries.
+	 */
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx));
+	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
+	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+{
+	u32 rss_ctx;
+	int ret, i;
+
+	ret = mvpp22_rss_context_create(port, &rss_ctx);
+	if (ret)
+		return ret;
+
+	/* Find the first available context number in the port, starting from 1.
+	 * Context 0 on each port is reserved for the default context.
+	 */
+	for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
+		if (port->rss_ctx[i] < 0)
+			break;
+	}
+
+	if (i == MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	port->rss_ctx[i] = rss_ctx;
+	*port_ctx = i;
+
+	return 0;
+}
+
+static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
+						    int rss_ctx)
+{
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return NULL;
+
+	return priv->rss_tables[rss_ctx];
+}
+
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
+{
+	struct mvpp2 *priv = port->priv;
+	struct ethtool_rxnfc *rxnfc;
+	int i, rss_ctx, ret;
+
+	rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+
+	if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+		return -EINVAL;
+
+	/* Invalidate any active classification rule that use this context */
+	for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+		if (!port->rfs_rules[i])
+			continue;
+
+		rxnfc = &port->rfs_rules[i]->rxnfc;
+		if (!(rxnfc->fs.flow_type & FLOW_RSS) ||
+		    rxnfc->rss_context != port_ctx)
+			continue;
+
+		ret = mvpp2_ethtool_cls_rule_del(port, rxnfc);
+		if (ret) {
+			netdev_warn(port->dev,
+				    "couldn't remove classification rule %d associated to this context",
+				    rxnfc->fs.location);
+		}
 	}
+
+	kfree(priv->rss_tables[rss_ctx]);
+
+	priv->rss_tables[rss_ctx] = NULL;
+	port->rss_ctx[port_ctx] = -1;
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx,
+				  const u32 *indir)
+{
+	int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(rss_table->indir, indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	mvpp22_rss_fill_table(port, rss_table, rss_ctx);
+
+	return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx,
+				  u32 *indir)
+{
+	int rss_ctx =  mvpp22_rss_ctx(port, port_ctx);
+	struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+								 rss_ctx);
+
+	if (!rss_table)
+		return -EINVAL;
+
+	memcpy(indir, rss_table->indir,
+	       MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+	return 0;
 }
 
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
@@ -1424,32 +1638,32 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
 	return 0;
 }
 
-void mvpp22_port_rss_init(struct mvpp2_port *port)
+int mvpp22_port_rss_init(struct mvpp2_port *port)
 {
-	struct mvpp2 *priv = port->priv;
-	int i;
+	struct mvpp2_rss_table *table;
+	u32 context = 0;
+	int i, ret;
 
-	/* Set the table width: replace the whole classifier Rx queue number
-	 * with the ones configured in RSS table entries.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
-	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+	for (i = 0; i < MVPP22_N_RSS_TABLES; i++)
+		port->rss_ctx[i] = -1;
 
-	/* The default RxQ is used as a key to select the RSS table to use.
-	 * We use one RSS table per port.
-	 */
-	mvpp2_write(priv, MVPP22_RSS_INDEX,
-		    MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
-	mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
-		    MVPP22_RSS_TABLE_POINTER(port->id));
+	ret = mvpp22_rss_context_create(port, &context);
+	if (ret)
+		return ret;
+
+	table = mvpp22_rss_table_get(port->priv, context);
+	if (!table)
+		return -EINVAL;
+
+	port->rss_ctx[0] = context;
 
 	/* Configure the first table to evenly distribute the packets across
 	 * real Rx Queues. The table entries map a hash to a port Rx Queue.
 	 */
 	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
-		port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
+		table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
 
-	mvpp22_rss_fill_table(port, port->id);
+	mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0));
 
 	/* Configure default flows */
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T);
@@ -1458,4 +1672,6 @@ void mvpp22_port_rss_init(struct mvpp2_port *port)
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T);
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T);
 	mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 56b617375a65..26cc1176e758 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -249,11 +249,18 @@ struct mvpp2_cls_lookup_entry {
 	u32 data;
 };
 
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
-void mvpp22_port_rss_init(struct mvpp2_port *port);
+int mvpp22_port_rss_init(struct mvpp2_port *port);
 
-void mvpp22_port_rss_enable(struct mvpp2_port *port);
-void mvpp22_port_rss_disable(struct mvpp2_port *port);
+int mvpp22_port_rss_enable(struct mvpp2_port *port);
+int mvpp22_port_rss_disable(struct mvpp2_port *port);
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
+				  const u32 *indir);
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx,
+				  u32 *indir);
 
 int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 7a67e23a2c2b..772caf0bdecd 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -56,9 +56,9 @@ static struct {
 /* The prototype is added here to be used in start_dev when using ACPI. This
  * will be removed once phylink is used for all modes (dt+ACPI).
  */
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy);
 
 /* Queue modes */
@@ -3237,9 +3237,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 		struct phylink_link_state state = {
 			.interface = port->phy_interface,
 		};
-		mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
-		mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface,
-				  NULL);
+		mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+		mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND,
+				  port->phy_interface, NULL);
 	}
 
 	netif_tx_start_all_queues(port->dev);
@@ -3954,7 +3954,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
 		ret = mvpp2_ethtool_cls_rule_get(port, info);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		for (i = 0; i < MVPP2_N_RFS_RULES; i++) {
+		for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
 			if (port->rfs_rules[i])
 				rules[loc++] = i;
 		}
@@ -4000,24 +4000,25 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 				  u8 *hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
 
 	if (indir)
-		memcpy(indir, port->indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+		ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir);
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_CRC32;
 
-	return 0;
+	return ret;
 }
 
 static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 				  const u8 *key, const u8 hfunc)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
 
 	if (!mvpp22_rss_is_supported())
 		return -EOPNOTSUPP;
@@ -4028,15 +4029,58 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 	if (key)
 		return -EOPNOTSUPP;
 
-	if (indir) {
-		memcpy(port->indir, indir,
-		       ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
-		mvpp22_rss_fill_table(port, port->id);
-	}
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir);
 
-	return 0;
+	return ret;
+}
+
+static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
+					  u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret = 0;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_CRC32;
+
+	if (indir)
+		ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir);
+
+	return ret;
 }
 
+static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
+					  const u32 *indir, const u8 *key,
+					  const u8 hfunc, u32 *rss_context,
+					  bool delete)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	if (!mvpp22_rss_is_supported())
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+		return -EOPNOTSUPP;
+
+	if (key)
+		return -EOPNOTSUPP;
+
+	if (delete)
+		return mvpp22_port_rss_ctx_delete(port, *rss_context);
+
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		ret = mvpp22_port_rss_ctx_create(port, rss_context);
+		if (ret)
+			return ret;
+	}
+
+	return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir);
+}
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -4073,7 +4117,8 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_rxfh_indir_size	= mvpp2_ethtool_get_rxfh_indir_size,
 	.get_rxfh		= mvpp2_ethtool_get_rxfh,
 	.set_rxfh		= mvpp2_ethtool_set_rxfh,
-
+	.get_rxfh_context	= mvpp2_ethtool_get_rxfh_context,
+	.set_rxfh_context	= mvpp2_ethtool_set_rxfh_context,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -4416,11 +4461,12 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	eth_hw_addr_random(dev);
 }
 
-static void mvpp2_phylink_validate(struct net_device *dev,
+static void mvpp2_phylink_validate(struct phylink_config *config,
 				   unsigned long *supported,
 				   struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
 	/* Invalid combinations */
@@ -4544,10 +4590,11 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port,
 		state->pause |= MLO_PAUSE_TX;
 }
 
-static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+static int mvpp2_phylink_mac_link_state(struct phylink_config *config,
 					struct phylink_link_state *state)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 
 	if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
 		u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4563,9 +4610,10 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev,
 	return 1;
 }
 
-static void mvpp2_mac_an_restart(struct net_device *dev)
+static void mvpp2_mac_an_restart(struct phylink_config *config)
 {
-	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+					       phylink_config);
 	u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
 
 	writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
@@ -4750,9 +4798,10 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
 	}
 }
 
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
 			     const struct phylink_link_state *state)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	bool change_interface = port->phy_interface != state->interface;
 
@@ -4792,9 +4841,10 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 	mvpp2_port_enable(port);
 }
 
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface, struct phy_device *phy)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
@@ -4819,9 +4869,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
 	netif_tx_wake_all_queues(dev);
 }
 
-static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
-				phy_interface_t interface)
+static void mvpp2_mac_link_down(struct phylink_config *config,
+				unsigned int mode, phy_interface_t interface)
 {
+	struct net_device *dev = to_net_dev(config->dev);
 	struct mvpp2_port *port = netdev_priv(dev);
 	u32 val;
 
@@ -5078,8 +5129,11 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	/* Phylink isn't used w/ ACPI as of now */
 	if (port_node) {
-		phylink = phylink_create(dev, port_fwnode, phy_mode,
-					 &mvpp2_phylink_ops);
+		port->phylink_config.dev = &dev->dev;
+		port->phylink_config.type = PHYLINK_NETDEV;
+
+		phylink = phylink_create(&port->phylink_config, port_fwnode,
+					 phy_mode, &mvpp2_phylink_ops);
 		if (IS_ERR(phylink)) {
 			err = PTR_ERR(phylink);
 			goto err_free_port_pcpu;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 6ee6de7f0160..182762898361 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1098,6 +1098,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 			goto err_register_params;
 	}
 
+	if (mlxsw_driver->init) {
+		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+		if (err)
+			goto err_driver_init;
+	}
+
 	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
 	if (err)
 		goto err_hwmon_init;
@@ -1107,22 +1113,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_thermal_init;
 
-	if (mlxsw_driver->init) {
-		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
-		if (err)
-			goto err_driver_init;
-	}
-
 	if (mlxsw_driver->params_register && !reload)
 		devlink_params_publish(devlink);
 
 	return 0;
 
-err_driver_init:
-	mlxsw_thermal_fini(mlxsw_core->thermal);
 err_thermal_init:
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 err_hwmon_init:
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
+err_driver_init:
 	if (mlxsw_driver->params_unregister && !reload)
 		mlxsw_driver->params_unregister(mlxsw_core);
 err_register_params:
@@ -1187,10 +1188,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 
 	if (mlxsw_core->driver->params_unregister && !reload)
 		devlink_params_unpublish(devlink);
-	if (mlxsw_core->driver->fini)
-		mlxsw_core->driver->fini(mlxsw_core);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
 	mlxsw_hwmon_fini(mlxsw_core->hwmon);
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
 	if (mlxsw_core->driver->params_unregister && !reload)
 		mlxsw_core->driver->params_unregister(mlxsw_core);
 	if (!reload)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 72539a9a3847..d2c7ce67c300 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 		u16 temp;
 	} temp_thresh;
 	char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
-	char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
-	u16 module_temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int module_temp;
 	bool qsfp;
 	int err;
 
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
-			    1);
-	err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl);
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err)
 		return err;
-
-	/* Don't read temperature thresholds for module with no valid info. */
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL);
-	switch (module_temp) {
-	case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA:
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+	if (!module_temp) {
 		*temp = 0;
 		return 0;
-	default:
-		/* Do not consider thresholds for zero temperature. */
-		if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) {
-			*temp = 0;
-			return 0;
-		}
-		break;
 	}
 
 	/* Read Free Side Device Temperature Thresholds from page 03h
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 496dc904c5ed..056e3f55ae6c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr {
 	char name[32];
 };
 
+static int mlxsw_hwmon_get_attr_index(int index, int count)
+{
+	if (index >= count)
+		return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+
+	return index;
+}
+
 struct mlxsw_hwmon {
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -33,6 +41,7 @@ struct mlxsw_hwmon {
 	struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
 	unsigned int attrs_count;
 	u8 sensor_count;
+	u8 module_sensor_count;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -44,10 +53,12 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned int temp;
+	int index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -66,10 +77,12 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned int temp_max;
+	int index;
 	int err;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
-			    false, false);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -88,6 +101,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned long val;
+	int index;
 	int err;
 
 	err = kstrtoul(buf, 10, &val);
@@ -96,7 +110,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 	if (val != 1)
 		return -EINVAL;
 
-	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+					   mlxsw_hwmon->module_sensor_count);
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
 	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -198,38 +214,18 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
 	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
-	char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
-	u16 temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int temp;
 	u8 module;
 	int err;
 
 	module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
-			    1);
-	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
-	if (err) {
-		dev_err(dev, "Failed to query module temperature sensor\n");
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+			    false, false);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
 		return err;
-	}
-
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
-	/* Update status and temperature cache. */
-	switch (temp) {
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA:
-		temp = 0;
-		break;
-	case MLXSW_REG_MTBR_BAD_SENS_INFO:
-		/* Untrusted cable is connected. Reading temperature from its
-		 * sensor is faulty.
-		 */
-		temp = 0;
-		break;
-	default:
-		temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
-		break;
-	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 
 	return sprintf(buf, "%u\n", temp);
 }
@@ -333,6 +329,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev,
 		       mlwsw_hwmon_attr->type_index);
 }
 
+static ssize_t
+mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	int index = mlwsw_hwmon_attr->type_index -
+		    mlxsw_hwmon->module_sensor_count + 1;
+
+	return sprintf(buf, "gearbox %03u\n", index);
+}
+
 enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -345,6 +355,7 @@ enum mlxsw_hwmon_attr_type {
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
 	MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
 };
 
 static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -428,6 +439,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_label", num + 1);
 		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL:
+		mlxsw_hwmon_attr->dev_attr.show =
+			mlxsw_hwmon_gbox_temp_label_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_label", num + 1);
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -556,6 +574,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
 				     index, index);
 		index++;
 	}
+	mlxsw_hwmon->module_sensor_count = index;
+
+	return 0;
+}
+
+static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	int index, max_index, sensor_index;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u8 gbox_num;
+	int err;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+	if (!gbox_num)
+		return 0;
+
+	index = mlxsw_hwmon->module_sensor_count;
+	max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+	while (index < max_index) {
+		sensor_index = index % mlxsw_hwmon->module_sensor_count +
+			       MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+		mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+		err = mlxsw_reg_write(mlxsw_hwmon->core,
+				      MLXSW_REG(mtmp), mtmp_pl);
+		if (err) {
+			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+				sensor_index);
+			return err;
+		}
+		mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP,
+				     index, index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index,
+				     index);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+				     index, index);
+		index++;
+	}
 
 	return 0;
 }
@@ -586,6 +652,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_temp_module_init;
 
+	err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon);
+	if (err)
+		goto err_temp_gearbox_init;
+
 	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
 	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
 
@@ -602,6 +672,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_hwmon_register:
+err_temp_gearbox_init:
 err_temp_module_init:
 err_fans_init:
 err_temp_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index d3e851e7ca72..cfab0e330a47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -449,39 +449,31 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 	struct mlxsw_thermal_module *tz = tzdev->devdata;
 	struct mlxsw_thermal *thermal = tz->parent;
 	struct device *dev = thermal->bus_info->dev;
-	char mtbr_pl[MLXSW_REG_MTBR_LEN];
-	u16 temp;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int temp;
 	int err;
 
 	/* Read module temperature. */
-	mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX +
-			    tz->module, 1);
-	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
-	/* Update temperature. */
-	switch (temp) {
-	case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
-	case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
-	case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */
-	case MLXSW_REG_MTBR_BAD_SENS_INFO:
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+			    tz->module, false, false);
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		/* Do not return error - in case of broken module's sensor
+		 * it will cause error message flooding.
+		 */
 		temp = 0;
-		break;
-	default:
-		temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
-		/* Reset all trip point. */
-		mlxsw_thermal_module_trips_reset(tz);
-		/* Update trip points. */
-		err = mlxsw_thermal_module_trips_update(dev, thermal->core,
-							tz);
-		if (err)
-			return err;
-		break;
+		*p_temp = (int) temp;
+		return 0;
 	}
-
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 	*p_temp = (int) temp;
+
+	if (!temp)
+		return 0;
+
+	/* Update trip points. */
+	mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 06aea1999518..95f408d0e103 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -43,11 +43,10 @@
 #define MLXSW_I2C_PREP_SIZE		(MLXSW_I2C_ADDR_WIDTH + 28)
 #define MLXSW_I2C_MBOX_SIZE		20
 #define MLXSW_I2C_MBOX_OUT_PARAM_OFF	12
-#define MLXSW_I2C_MAX_BUFF_SIZE		32
 #define MLXSW_I2C_MBOX_OFFSET_BITS	20
 #define MLXSW_I2C_MBOX_SIZE_BITS	12
 #define MLXSW_I2C_ADDR_BUF_SIZE		4
-#define MLXSW_I2C_BLK_MAX		32
+#define MLXSW_I2C_BLK_DEF		32
 #define MLXSW_I2C_RETRY			5
 #define MLXSW_I2C_TIMEOUT_MSECS		5000
 #define MLXSW_I2C_MAX_DATA_SIZE		256
@@ -62,6 +61,7 @@
  * @dev: I2C device;
  * @core: switch core pointer;
  * @bus_info: bus info block;
+ * @block_size: maximum block size allowed to pass to under layer;
  */
 struct mlxsw_i2c {
 	struct {
@@ -74,6 +74,7 @@ struct mlxsw_i2c {
 	struct device *dev;
 	struct mlxsw_core *core;
 	struct mlxsw_bus_info bus_info;
+	u16 block_size;
 };
 
 #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
@@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
 	unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
-	u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
 	int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
 	unsigned long end;
+	u8 *tran_buf;
 	struct i2c_msg write_tran =
-		MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+		MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE);
 	int err;
 
+	tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE,
+			   GFP_KERNEL);
+	if (!tran_buf)
+		return -ENOMEM;
+
+	write_tran.buf = tran_buf;
 	for (i = 0; i < num; i++) {
-		chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : in_mbox_size;
+		chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : in_mbox_size;
 		write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 		memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
-		       MLXSW_I2C_BLK_MAX * i, chunk_size);
+		       mlxsw_i2c->block_size * i, chunk_size);
 
 		j = 0;
 		end = jiffies + timeout;
@@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 			 (j++ < MLXSW_I2C_RETRY));
 
 		if (err != 1) {
-			if (!err)
+			if (!err) {
 				err = -EIO;
-			return err;
+				goto mlxsw_i2c_write_exit;
+			}
 		}
 
 		off += chunk_size;
@@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
 	err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
 	if (err) {
 		dev_err(&client->dev, "Could not start transaction");
-		return -EIO;
+		err = -EIO;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Wait until go bit is cleared. */
 	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
 	if (err) {
 		dev_err(&client->dev, "HW semaphore is not released");
-		return err;
+		goto mlxsw_i2c_write_exit;
 	}
 
 	/* Validate transaction completion status. */
 	if (*p_status) {
 		dev_err(&client->dev, "Bad transaction completion status %x\n",
 			*p_status);
-		return -EIO;
+		err = -EIO;
 	}
 
-	return 0;
+mlxsw_i2c_write_exit:
+	kfree(tran_buf);
+	return err;
 }
 
 /* Routine executes I2C command. */
@@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 
 	if (in_mbox) {
 		reg_size = mlxsw_i2c_get_reg_size(in_mbox);
-		num = reg_size / MLXSW_I2C_BLK_MAX;
-		if (reg_size % MLXSW_I2C_BLK_MAX)
+		num = reg_size / mlxsw_i2c->block_size;
+		if (reg_size % mlxsw_i2c->block_size)
 			num++;
 
 		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
@@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 	} else {
 		/* No input mailbox is case of initialization query command. */
 		reg_size = MLXSW_I2C_MAX_DATA_SIZE;
-		num = reg_size / MLXSW_I2C_BLK_MAX;
+		num = reg_size / mlxsw_i2c->block_size;
 
 		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
 			dev_err(&client->dev, "Could not acquire lock");
@@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 	/* Send read transaction to get output mailbox content. */
 	read_tran[1].buf = out_mbox;
 	for (i = 0; i < num; i++) {
-		chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
-			     MLXSW_I2C_BLK_MAX : reg_size;
+		chunk_size = (reg_size > mlxsw_i2c->block_size) ?
+			     mlxsw_i2c->block_size : reg_size;
 		read_tran[1].len = chunk_size;
 		mlxsw_i2c_set_slave_addr(tran_buf, off);
 
@@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (!mbox)
 		return -ENOMEM;
 
+	err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+	if (err)
+		goto mbox_put;
+
+	mlxsw_i2c->bus_info.fw_rev.major =
+		mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.minor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+	mlxsw_i2c->bus_info.fw_rev.subminor =
+		mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
 	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
 
+mbox_put:
 	mlxsw_cmd_mbox_free(mbox);
 	return err;
 }
@@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = {
 static int mlxsw_i2c_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id)
 {
+	const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
 	struct mlxsw_i2c *mlxsw_i2c;
 	u8 status;
 	int err;
@@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 	if (!mlxsw_i2c)
 		return -ENOMEM;
 
+	if (quirks) {
+		if ((quirks->max_read_len &&
+		     quirks->max_read_len < MLXSW_I2C_BLK_DEF) ||
+		    (quirks->max_write_len &&
+		     quirks->max_write_len < MLXSW_I2C_BLK_DEF)) {
+			dev_err(&client->dev, "Insufficient transaction buffer length\n");
+			return -EOPNOTSUPP;
+		}
+
+		mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+					      min_t(u16, quirks->max_read_len,
+						    quirks->max_write_len));
+	} else {
+		mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF;
+	}
+
 	i2c_set_clientdata(client, mlxsw_i2c);
 	mutex_init(&mlxsw_i2c->cmd.lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index cf2114273b72..471b0ca6d69a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = {
 	.ndo_get_devlink_port	= mlxsw_m_port_get_devlink_port,
 };
 
+static void mlxsw_m_module_get_drvinfo(struct net_device *dev,
+				       struct ethtool_drvinfo *drvinfo)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+	struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+	strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind,
+		sizeof(drvinfo->driver));
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%d",
+		 mlxsw_m->bus_info->fw_rev.major,
+		 mlxsw_m->bus_info->fw_rev.minor,
+		 mlxsw_m->bus_info->fw_rev.subminor);
+	strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name,
+		sizeof(drvinfo->bus_info));
+}
+
 static int mlxsw_m_get_module_info(struct net_device *netdev,
 				   struct ethtool_modinfo *modinfo)
 {
@@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
 }
 
 static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
+	.get_drvinfo		= mlxsw_m_module_get_drvinfo,
 	.get_module_info	= mlxsw_m_get_module_info,
 	.get_module_eeprom	= mlxsw_m_get_module_eeprom,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e8002bfc1e8f..7348c5a5ad6a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8039,13 +8039,15 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
 
 MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
 
+#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
+#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
 /* reg_mtmp_sensor_index
  * Sensors index to access.
  * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
  * (module 0 is mapped to sensor_index 64).
  * Access: Index
  */
-MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
 
 /* Convert to milli degrees Celsius */
 #define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
@@ -8107,7 +8109,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
  */
 MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
 
-static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
 				       bool max_temp_enable,
 				       bool max_temp_reset)
 {
@@ -8156,7 +8158,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
  * 64-127 are mapped to the SFP+/QSFP modules sequentially).
  * Access: Index
  */
-MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12);
 
 /* reg_mtbr_num_rec
  * Request: Number of records to read
@@ -8183,7 +8185,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
 MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
 		     MLXSW_REG_MTBR_REC_LEN, 0x00, false);
 
-static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
 				       u8 num_rec)
 {
 	MLXSW_REG_ZERO(mtbr, payload);
@@ -9043,6 +9045,57 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
 	mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
 }
 
+/* MGPIR - Management General Peripheral Information Register
+ * ----------------------------------------------------------
+ * MGPIR register allows software to query the hardware and
+ * firmware general information of peripheral entities.
+ */
+#define MLXSW_REG_MGPIR_ID 0x9100
+#define MLXSW_REG_MGPIR_LEN 0xA0
+
+MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN);
+
+enum mlxsw_reg_mgpir_device_type {
+	MLXSW_REG_MGPIR_DEVICE_TYPE_NONE,
+	MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
+};
+
+/* device_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4);
+
+/* devices_per_flash
+ * Number of devices of device_type per flash (can be shared by few devices).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
+
+/* num_of_devices
+ * Number of devices of device_type.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+
+static inline void mlxsw_reg_mgpir_pack(char *payload)
+{
+	MLXSW_REG_ZERO(mgpir, payload);
+}
+
+static inline void
+mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
+		       enum mlxsw_reg_mgpir_device_type *device_type,
+		       u8 *devices_per_flash)
+{
+	if (num_of_devices)
+		*num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
+	if (device_type)
+		*device_type = mlxsw_reg_mgpir_device_type_get(payload);
+	if (devices_per_flash)
+		*devices_per_flash =
+				mlxsw_reg_mgpir_devices_per_flash_get(payload);
+}
+
 /* TNGCR - Tunneling NVE General Configuration Register
  * ----------------------------------------------------
  * The TNGCR register is used for setting up the NVE Tunneling configuration.
@@ -10058,6 +10111,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(mcda),
 	MLXSW_REG(mgpc),
 	MLXSW_REG(mprs),
+	MLXSW_REG(mgpir),
 	MLXSW_REG(tngcr),
 	MLXSW_REG(tnumt),
 	MLXSW_REG(tnqcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1cda8a248b12..0ec52be7cc33 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2886,7 +2886,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
 		return false;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct in6_addr *gw;
 		int ifindex, weight;
 
@@ -2958,7 +2958,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
 	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
+		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
 		val ^= dev->ifindex;
 	}
 
@@ -3960,9 +3960,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
+		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
-				    &rt->fib6_nh.fib_nh_gw6))
+				    &rt->fib6_nh->fib_nh_gw6))
 			return nh;
 		continue;
 	}
@@ -4022,13 +4022,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
+				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
-		struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
 		struct mlxsw_sp_nexthop *nh;
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
@@ -4050,7 +4050,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
+		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -4928,7 +4928,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
-	return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
+	return !(rt->fib6_flags & RTF_ADDRCONF) &&
+		rt->fib6_nh->fib_nh_gw_family;
 }
 
 static struct fib6_info *
@@ -4987,8 +4988,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
 					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
-	return rt->fib6_nh.fib_nh_dev &&
-	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
+	return rt->fib6_nh->fib_nh_dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
 }
 
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -4998,7 +4999,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
@@ -5041,11 +5042,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop *nh,
 				  const struct fib6_info *rt)
 {
-	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
 
 	nh->nh_grp = nh_grp;
-	nh->nh_weight = rt->fib6_nh.fib_nh_weight;
-	memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
+	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 
 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -5068,7 +5069,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
 				    const struct fib6_info *rt)
 {
-	return rt->fib6_nh.fib_nh_gw_family ||
+	return rt->fib6_nh->fib_nh_gw_family ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
 }
 
diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
index cb52a3b128ae..5e694dc1f7f8 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR MIT)
 obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
 mscc_ocelot_common-y := ocelot.o ocelot_io.o
-mscc_ocelot_common-y += ocelot_regs.o
+mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o
 obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 02ad11e0b0d8..3e692755c02f 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -884,6 +884,13 @@ static int ocelot_set_features(struct net_device *dev,
 	struct ocelot_port *port = netdev_priv(dev);
 	netdev_features_t changed = dev->features ^ features;
 
+	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+	    port->tc.offload_cnt) {
+		netdev_err(dev,
+			   "Cannot disable HW TC offload while offloads active\n");
+		return -EBUSY;
+	}
+
 	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER)
 		ocelot_vlan_mode(port, features);
 
@@ -917,6 +924,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
 	.ndo_vlan_rx_kill_vid		= ocelot_vlan_rx_kill_vid,
 	.ndo_set_features		= ocelot_set_features,
 	.ndo_get_port_parent_id		= ocelot_get_port_parent_id,
+	.ndo_setup_tc			= ocelot_setup_tc,
 };
 
 static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -1636,8 +1644,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 	dev->netdev_ops = &ocelot_port_netdev_ops;
 	dev->ethtool_ops = &ocelot_ethtool_ops;
 
-	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS;
-	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS |
+		NETIF_F_HW_TC;
+	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
 
 	memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
 	dev->dev_addr[ETH_ALEN - 1] += port;
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 541fe41e60b0..04d87a7ecaba 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -22,6 +22,7 @@
 #include "ocelot_rew.h"
 #include "ocelot_sys.h"
 #include "ocelot_qs.h"
+#include "ocelot_tc.h"
 
 #define PGID_AGGR    64
 #define PGID_SRC     80
@@ -454,6 +455,8 @@ struct ocelot_port {
 
 	phy_interface_t phy_mode;
 	struct phy *serdes;
+
+	struct ocelot_port_tc tc;
 };
 
 u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c
new file mode 100644
index 000000000000..701e82dd749a
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_police.h"
+
+enum mscc_qos_rate_mode {
+	MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */
+	MSCC_QOS_RATE_MODE_LINE, /* Measure line rate in kbps incl. IPG */
+	MSCC_QOS_RATE_MODE_DATA, /* Measures data rate in kbps excl. IPG */
+	MSCC_QOS_RATE_MODE_FRAME, /* Measures frame rate in fps */
+	__MSCC_QOS_RATE_MODE_END,
+	NUM_MSCC_QOS_RATE_MODE = __MSCC_QOS_RATE_MODE_END,
+	MSCC_QOS_RATE_MODE_MAX = __MSCC_QOS_RATE_MODE_END - 1,
+};
+
+/* Types for ANA:POL[0-192]:POL_MODE_CFG.FRM_MODE */
+#define POL_MODE_LINERATE   0 /* Incl IPG. Unit: 33 1/3 kbps, 4096 bytes */
+#define POL_MODE_DATARATE   1 /* Excl IPG. Unit: 33 1/3 kbps, 4096 bytes  */
+#define POL_MODE_FRMRATE_HI 2 /* Unit: 33 1/3 fps, 32.8 frames */
+#define POL_MODE_FRMRATE_LO 3 /* Unit: 1/3 fps, 0.3 frames */
+
+/* Policer indexes */
+#define POL_IX_PORT    0    /* 0-11    : Port policers */
+#define POL_IX_QUEUE   32   /* 32-127  : Queue policers  */
+
+/* Default policer order */
+#define POL_ORDER 0x1d3 /* Ocelot policer order: Serial (QoS -> Port -> VCAP) */
+
+struct qos_policer_conf {
+	enum mscc_qos_rate_mode mode;
+	bool dlb; /* Enable DLB (dual leaky bucket mode */
+	bool cf;  /* Coupling flag (ignored in SLB mode) */
+	u32  cir; /* CIR in kbps/fps (ignored in SLB mode) */
+	u32  cbs; /* CBS in bytes/frames (ignored in SLB mode) */
+	u32  pir; /* PIR in kbps/fps */
+	u32  pbs; /* PBS in bytes/frames */
+	u8   ipg; /* Size of IPG when MSCC_QOS_RATE_MODE_LINE is chosen */
+};
+
+static int qos_policer_conf_set(struct ocelot_port *port, u32 pol_ix,
+				struct qos_policer_conf *conf)
+{
+	u32 cf = 0, cir_ena = 0, frm_mode = POL_MODE_LINERATE;
+	u32 cir = 0, cbs = 0, pir = 0, pbs = 0;
+	bool cir_discard = 0, pir_discard = 0;
+	struct ocelot *ocelot = port->ocelot;
+	u32 pbs_max = 0, cbs_max = 0;
+	u8 ipg = 20;
+	u32 value;
+
+	pir = conf->pir;
+	pbs = conf->pbs;
+
+	switch (conf->mode) {
+	case MSCC_QOS_RATE_MODE_LINE:
+	case MSCC_QOS_RATE_MODE_DATA:
+		if (conf->mode == MSCC_QOS_RATE_MODE_LINE) {
+			frm_mode = POL_MODE_LINERATE;
+			ipg = min_t(u8, GENMASK(4, 0), conf->ipg);
+		} else {
+			frm_mode = POL_MODE_DATARATE;
+		}
+		if (conf->dlb) {
+			cir_ena = 1;
+			cir = conf->cir;
+			cbs = conf->cbs;
+			if (cir == 0 && cbs == 0) {
+				/* Discard cir frames */
+				cir_discard = 1;
+			} else {
+				cir = DIV_ROUND_UP(cir, 100);
+				cir *= 3; /* 33 1/3 kbps */
+				cbs = DIV_ROUND_UP(cbs, 4096);
+				cbs = (cbs ? cbs : 1); /* No zero burst size */
+				cbs_max = 60; /* Limit burst size */
+				cf = conf->cf;
+				if (cf)
+					pir += conf->cir;
+			}
+		}
+		if (pir == 0 && pbs == 0) {
+			/* Discard PIR frames */
+			pir_discard = 1;
+		} else {
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 kbps */
+			pbs = DIV_ROUND_UP(pbs, 4096);
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = 60; /* Limit burst size */
+		}
+		break;
+	case MSCC_QOS_RATE_MODE_FRAME:
+		if (pir >= 100) {
+			frm_mode = POL_MODE_FRMRATE_HI;
+			pir = DIV_ROUND_UP(pir, 100);
+			pir *= 3;  /* 33 1/3 fps */
+			pbs = (pbs * 10) / 328; /* 32.8 frames */
+			pbs = (pbs ? pbs : 1); /* No zero burst size */
+			pbs_max = GENMASK(6, 0); /* Limit burst size */
+		} else {
+			frm_mode = POL_MODE_FRMRATE_LO;
+			if (pir == 0 && pbs == 0) {
+				/* Discard all frames */
+				pir_discard = 1;
+				cir_discard = 1;
+			} else {
+				pir *= 3; /* 1/3 fps */
+				pbs = (pbs * 10) / 3; /* 0.3 frames */
+				pbs = (pbs ? pbs : 1); /* No zero burst size */
+				pbs_max = 61; /* Limit burst size */
+			}
+		}
+		break;
+	default: /* MSCC_QOS_RATE_MODE_DISABLED */
+		/* Disable policer using maximum rate and zero burst */
+		pir = GENMASK(15, 0);
+		pbs = 0;
+		break;
+	}
+
+	/* Check limits */
+	if (pir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid pir\n");
+		return -EINVAL;
+	}
+
+	if (cir > GENMASK(15, 0)) {
+		netdev_err(port->dev, "Invalid cir\n");
+		return -EINVAL;
+	}
+
+	if (pbs > pbs_max) {
+		netdev_err(port->dev, "Invalid pbs\n");
+		return -EINVAL;
+	}
+
+	if (cbs > cbs_max) {
+		netdev_err(port->dev, "Invalid cbs\n");
+		return -EINVAL;
+	}
+
+	value = (ANA_POL_MODE_CFG_IPG_SIZE(ipg) |
+		 ANA_POL_MODE_CFG_FRM_MODE(frm_mode) |
+		 (cf ? ANA_POL_MODE_CFG_DLB_COUPLED : 0) |
+		 (cir_ena ? ANA_POL_MODE_CFG_CIR_ENA : 0) |
+		 ANA_POL_MODE_CFG_OVERSHOOT_ENA);
+
+	ocelot_write_gix(ocelot, value, ANA_POL_MODE_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_PIR_CFG_PIR_RATE(pir) |
+			 ANA_POL_PIR_CFG_PIR_BURST(pbs),
+			 ANA_POL_PIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (pir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_PIR_STATE, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 ANA_POL_CIR_CFG_CIR_RATE(cir) |
+			 ANA_POL_CIR_CFG_CIR_BURST(cbs),
+			 ANA_POL_CIR_CFG, pol_ix);
+
+	ocelot_write_gix(ocelot,
+			 (cir_discard ? GENMASK(22, 0) : 0),
+			 ANA_POL_CIR_STATE, pol_ix);
+
+	return 0;
+}
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	if (!pol)
+		return -EINVAL;
+
+	pp.mode = MSCC_QOS_RATE_MODE_DATA;
+	pp.pir = pol->rate;
+	pp.pbs = pol->burst;
+
+	netdev_dbg(port->dev,
+		   "%s: port %u pir %u kbps, pbs %u bytes\n",
+		   __func__, port->chip_port, pp.pir, pp.pbs);
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}
+
+int ocelot_port_policer_del(struct ocelot_port *port)
+{
+	struct ocelot *ocelot = port->ocelot;
+	struct qos_policer_conf pp = { 0 };
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u\n", __func__, port->chip_port);
+
+	pp.mode = MSCC_QOS_RATE_MODE_DISABLED;
+
+	err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+	if (err)
+		return err;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+		       ANA_PORT_POL_CFG_PORT_POL_ENA |
+		       ANA_PORT_POL_CFG_POL_ORDER_M,
+		       ANA_PORT_POL_CFG, port->chip_port);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h
new file mode 100644
index 000000000000..d1137f79efda
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_POLICE_H_
+#define _MSCC_OCELOT_POLICE_H_
+
+#include "ocelot.h"
+
+struct ocelot_policer {
+	u32 rate; /* kilobit per second */
+	u32 burst; /* bytes */
+};
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+			    struct ocelot_policer *pol);
+
+int ocelot_port_policer_del(struct ocelot_port *port);
+
+#endif /* _MSCC_OCELOT_POLICE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
new file mode 100644
index 000000000000..a0eaadccfecc
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch TC driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_tc.h"
+#include "ocelot_police.h"
+#include <net/pkt_cls.h>
+
+static int ocelot_setup_tc_cls_matchall(struct ocelot_port *port,
+					struct tc_cls_matchall_offload *f,
+					bool ingress)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct ocelot_policer pol = { 0 };
+	struct flow_action_entry *action;
+	int err;
+
+	netdev_dbg(port->dev, "%s: port %u command %d cookie %lu\n",
+		   __func__, port->chip_port, f->command, f->cookie);
+
+	if (!ingress) {
+		NL_SET_ERR_MSG_MOD(extack, "Only ingress is supported");
+		return -EOPNOTSUPP;
+	}
+
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		if (!flow_offload_has_one_action(&f->rule->action)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one action is supported");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.block_shared) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Rate limit is not supported on shared blocks");
+			return -EOPNOTSUPP;
+		}
+
+		action = &f->rule->action.entries[0];
+
+		if (action->id != FLOW_ACTION_POLICE) {
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+			return -EOPNOTSUPP;
+		}
+
+		if (port->tc.police_id && port->tc.police_id != f->cookie) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one policer per port is supported\n");
+			return -EEXIST;
+		}
+
+		pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
+		pol.burst = (u32)div_u64(action->police.rate_bytes_ps *
+					 PSCHED_NS2TICKS(action->police.burst),
+					 PSCHED_TICKS_PER_SEC);
+
+		err = ocelot_port_policer_add(port, &pol);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n");
+			return err;
+		}
+
+		port->tc.police_id = f->cookie;
+		port->tc.offload_cnt++;
+		return 0;
+	case TC_CLSMATCHALL_DESTROY:
+		if (port->tc.police_id != f->cookie)
+			return -ENOENT;
+
+		err = ocelot_port_policer_del(port);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Could not delete policer\n");
+			return err;
+		}
+		port->tc.police_id = 0;
+		port->tc.offload_cnt--;
+		return 0;
+	case TC_CLSMATCHALL_STATS: /* fall through */
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb(enum tc_setup_type type,
+				    void *type_data,
+				    void *cb_priv, bool ingress)
+{
+	struct ocelot_port *port = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(port->dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSMATCHALL %s\n",
+			   ingress ? "ingress" : "egress");
+
+		return ocelot_setup_tc_cls_matchall(port, type_data, ingress);
+	case TC_SETUP_CLSFLOWER:
+		netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSFLOWER %s\n",
+			   ingress ? "ingress" : "egress");
+
+		return -EOPNOTSUPP;
+	default:
+		netdev_dbg(port->dev, "tc_block_cb: type %d %s\n",
+			   type,
+			   ingress ? "ingress" : "egress");
+
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ocelot_setup_tc_block_cb_ig(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, true);
+}
+
+static int ocelot_setup_tc_block_cb_eg(enum tc_setup_type type,
+				       void *type_data,
+				       void *cb_priv)
+{
+	return ocelot_setup_tc_block_cb(type, type_data,
+					cb_priv, false);
+}
+
+static int ocelot_setup_tc_block(struct ocelot_port *port,
+				 struct tc_block_offload *f)
+{
+	tc_setup_cb_t *cb;
+
+	netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
+		   f->command, f->binder_type);
+
+	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+		cb = ocelot_setup_tc_block_cb_ig;
+		port->tc.block_shared = tcf_block_shared(f->block);
+	} else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = ocelot_setup_tc_block_cb_eg;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, cb, port,
+					     port, f->extack);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, cb, port);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data)
+{
+	struct ocelot_port *port = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return ocelot_setup_tc_block(port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.h b/drivers/net/ethernet/mscc/ocelot_tc.h
new file mode 100644
index 000000000000..61757c2250a6
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_TC_H_
+#define _MSCC_OCELOT_TC_H_
+
+#include <linux/netdevice.h>
+
+struct ocelot_port_tc {
+	bool block_shared;
+	unsigned long offload_cnt;
+
+	unsigned long police_id;
+};
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		    void *type_data);
+
+#endif /* _MSCC_OCELOT_TC_H_ */
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index c5e96ce20f59..89fe091c958d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -140,6 +140,7 @@ struct qed_cxt_mngr;
 struct qed_sb_sp_info;
 struct qed_ll2_info;
 struct qed_mcp_info;
+struct qed_llh_info;
 
 struct qed_rt_data {
 	u32	*init_val;
@@ -741,6 +742,7 @@ struct qed_dev {
 #define QED_DEV_ID_MASK		0xff00
 #define QED_DEV_ID_MASK_BB	0x1600
 #define QED_DEV_ID_MASK_AH	0x8000
+#define QED_IS_E4(dev)  (QED_IS_BB(dev) || QED_IS_AH(dev))
 
 	u16	chip_num;
 #define CHIP_NUM_MASK                   0xffff
@@ -801,6 +803,11 @@ struct qed_dev {
 	u8				num_hwfns;
 	struct qed_hwfn			hwfns[MAX_HWFNS_PER_DEVICE];
 
+	/* Engine affinity */
+	u8				l2_affin_hint;
+	u8				fir_affin;
+	u8				iwarp_affin;
+
 	/* SRIOV */
 	struct qed_hw_sriov_info *p_iov_info;
 #define IS_QED_SRIOV(cdev)              (!!(cdev)->p_iov_info)
@@ -815,6 +822,10 @@ struct qed_dev {
 	/* Recovery */
 	bool recov_in_prog;
 
+	/* LLH info */
+	u8 ppfid_bitmap;
+	struct qed_llh_info *p_llh_info;
+
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -852,6 +863,9 @@ struct qed_dev {
 	u32 rdma_max_inline;
 	u32 rdma_max_srq_sge;
 	u16 tunn_feature_mask;
+
+	struct devlink			*dl;
+	bool				iwarp_cmt;
 };
 
 #define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
@@ -904,6 +918,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
 			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
+#define QED_IS_CMT(dev)		((dev)->num_hwfns > 1)
+/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */
+#define QED_FIR_AFFIN_HWFN(dev)		(&(dev)->hwfns[dev->fir_affin])
+#define QED_IWARP_AFFIN_HWFN(dev)       (&(dev)->hwfns[dev->iwarp_affin])
+#define QED_AFFIN_HWFN(dev)				   \
+	(QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \
+	 QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev))
+#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1)
 
 /* Flags for indication of required queues */
 #define PQ_FLAGS_RLS    (BIT(0))
@@ -923,8 +945,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
 u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 
-#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
-
 /* doorbell recovery mechanism */
 void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
 void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e61d1d905415..8e1bdf58b9e7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2351,7 +2351,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 
 	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
 	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
-			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
+			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
+			  NULL);
 
 	if (elem_type == QED_ELEM_CXT) {
 		u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2457,7 +2458,7 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
 				  (u64) (uintptr_t) &ilt_hw_entry,
 				  reg_offset,
 				  sizeof(ilt_hw_entry) / sizeof(u32),
-				  0);
+				  NULL);
 	}
 
 	qed_ptt_release(p_hwfn, p_ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 979f1e4bc18b..8525e6bf6ae5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -2537,7 +2537,7 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
 	    (len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
 	     wide_bus)) {
 		if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
-				       (u64)(uintptr_t)(dump_buf), len, 0))
+				       (u64)(uintptr_t)(dump_buf), len, NULL))
 			return len;
 		dev_data->use_dmae = 0;
 		DP_VERBOSE(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index fccdb06fc5c5..eec7cb65c7e6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -361,6 +361,926 @@ void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
 
 /******************** Doorbell Recovery end ****************/
 
+/********************************** NIG LLH ***********************************/
+
+enum qed_llh_filter_type {
+	QED_LLH_FILTER_TYPE_MAC,
+	QED_LLH_FILTER_TYPE_PROTOCOL,
+};
+
+struct qed_llh_mac_filter {
+	u8 addr[ETH_ALEN];
+};
+
+struct qed_llh_protocol_filter {
+	enum qed_llh_prot_filter_type_t type;
+	u16 source_port_or_eth_type;
+	u16 dest_port;
+};
+
+union qed_llh_filter {
+	struct qed_llh_mac_filter mac;
+	struct qed_llh_protocol_filter protocol;
+};
+
+struct qed_llh_filter_info {
+	bool b_enabled;
+	u32 ref_cnt;
+	enum qed_llh_filter_type type;
+	union qed_llh_filter filter;
+};
+
+struct qed_llh_info {
+	/* Number of LLH filters banks */
+	u8 num_ppfid;
+
+#define MAX_NUM_PPFID   8
+	u8 ppfid_array[MAX_NUM_PPFID];
+
+	/* Array of filters arrays:
+	 * "num_ppfid" elements of filters banks, where each is an array of
+	 * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters.
+	 */
+	struct qed_llh_filter_info **pp_filters;
+};
+
+static void qed_llh_free(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	u32 i;
+
+	if (p_llh_info) {
+		if (p_llh_info->pp_filters)
+			for (i = 0; i < p_llh_info->num_ppfid; i++)
+				kfree(p_llh_info->pp_filters[i]);
+
+		kfree(p_llh_info->pp_filters);
+	}
+
+	kfree(p_llh_info);
+	cdev->p_llh_info = NULL;
+}
+
+static int qed_llh_alloc(struct qed_dev *cdev)
+{
+	struct qed_llh_info *p_llh_info;
+	u32 size, i;
+
+	p_llh_info = kzalloc(sizeof(*p_llh_info), GFP_KERNEL);
+	if (!p_llh_info)
+		return -ENOMEM;
+	cdev->p_llh_info = p_llh_info;
+
+	for (i = 0; i < MAX_NUM_PPFID; i++) {
+		if (!(cdev->ppfid_bitmap & (0x1 << i)))
+			continue;
+
+		p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i;
+		DP_VERBOSE(cdev, QED_MSG_SP, "ppfid_array[%d] = %hhd\n",
+			   p_llh_info->num_ppfid, i);
+		p_llh_info->num_ppfid++;
+	}
+
+	size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters);
+	p_llh_info->pp_filters = kzalloc(size, GFP_KERNEL);
+	if (!p_llh_info->pp_filters)
+		return -ENOMEM;
+
+	size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+	    sizeof(**p_llh_info->pp_filters);
+	for (i = 0; i < p_llh_info->num_ppfid; i++) {
+		p_llh_info->pp_filters[i] = kzalloc(size, GFP_KERNEL);
+		if (!p_llh_info->pp_filters[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int qed_llh_shadow_sanity(struct qed_dev *cdev,
+				 u8 ppfid, u8 filter_idx, const char *action)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n",
+			  action, ppfid, p_llh_info->num_ppfid);
+		return -EINVAL;
+	}
+
+	if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+		DP_NOTICE(cdev,
+			  "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n",
+			  action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define QED_LLH_INVALID_FILTER_IDX      0xff
+
+static int
+qed_llh_shadow_search_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "search");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!memcmp(p_filter, &p_filters[i].filter,
+			    sizeof(*p_filter))) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_get_free_idx(struct qed_dev *cdev, u8 ppfid, u8 *p_filter_idx)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+	u8 i;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "get_free_idx");
+	if (rc)
+		return rc;
+
+	*p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (!p_filters[i].b_enabled) {
+			*p_filter_idx = i;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+__qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    u8 filter_idx,
+			    enum qed_llh_filter_type type,
+			    union qed_llh_filter *p_filter, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "add");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		p_filters[filter_idx].b_enabled = true;
+		p_filters[filter_idx].type = type;
+		memcpy(&p_filters[filter_idx].filter, p_filter,
+		       sizeof(p_filters[filter_idx].filter));
+	}
+
+	*p_ref_cnt = ++p_filters[filter_idx].ref_cnt;
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_add_filter(struct qed_dev *cdev,
+			  u8 ppfid,
+			  enum qed_llh_filter_type type,
+			  union qed_llh_filter *p_filter,
+			  u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	/* Check if the same filter already exist */
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* Find a new entry in case of a new filter */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		rc = qed_llh_shadow_get_free_idx(cdev, ppfid, p_filter_idx);
+		if (rc)
+			return rc;
+	}
+
+	/* No free entry was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev,
+			  "Failed to find an empty LLH filter to utilize [ppfid %d]\n",
+			  ppfid);
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_add_filter(cdev, ppfid, *p_filter_idx, type,
+					   p_filter, p_ref_cnt);
+}
+
+static int
+__qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 filter_idx, u32 *p_ref_cnt)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "remove");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	if (!p_filters[filter_idx].ref_cnt) {
+		DP_NOTICE(cdev,
+			  "LLH shadow: trying to remove a filter with ref_cnt=0\n");
+		return -EINVAL;
+	}
+
+	*p_ref_cnt = --p_filters[filter_idx].ref_cnt;
+	if (!p_filters[filter_idx].ref_cnt)
+		memset(&p_filters[filter_idx],
+		       0, sizeof(p_filters[filter_idx]));
+
+	return 0;
+}
+
+static int
+qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+			     u8 ppfid,
+			     union qed_llh_filter *p_filter,
+			     u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+	int rc;
+
+	rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+	if (rc)
+		return rc;
+
+	/* No matching filter was found */
+	if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+		DP_NOTICE(cdev, "Failed to find a filter in the LLH shadow\n");
+		return -EINVAL;
+	}
+
+	return __qed_llh_shadow_remove_filter(cdev, ppfid, *p_filter_idx,
+					      p_ref_cnt);
+}
+
+static int qed_llh_abs_ppfid(struct qed_dev *cdev, u8 ppfid, u8 *p_abs_ppfid)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+	if (ppfid >= p_llh_info->num_ppfid) {
+		DP_NOTICE(cdev,
+			  "ppfid %d is not valid, available indices are 0..%hhd\n",
+			  ppfid, p_llh_info->num_ppfid - 1);
+		return -EINVAL;
+	}
+
+	*p_abs_ppfid = p_llh_info->ppfid_array[ppfid];
+
+	return 0;
+}
+
+static int
+qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_eng eng;
+	u8 ppfid;
+	int rc;
+
+	rc = qed_mcp_get_engine_config(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to get the engine affinity configuration\n");
+		return rc;
+	}
+
+	/* RoCE PF is bound to a single engine */
+	if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+		rc = qed_llh_set_roce_affinity(cdev, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the RoCE engine affinity\n");
+			return rc;
+		}
+
+		DP_VERBOSE(cdev,
+			   QED_MSG_SP,
+			   "LLH: Set the engine affinity of RoCE packets as %d\n",
+			   eng);
+	}
+
+	/* Storage PF is bound to a single engine while L2 PF uses both */
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+	else			/* L2_PERSONALITY */
+		eng = QED_BOTH_ENG;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+		if (rc) {
+			DP_NOTICE(cdev,
+				  "Failed to set the engine affinity of ppfid %d\n",
+				  ppfid);
+			return rc;
+		}
+	}
+
+	DP_VERBOSE(cdev, QED_MSG_SP,
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
+static int qed_llh_hw_init_pf(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 ppfid, abs_ppfid;
+	int rc;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		u32 addr;
+
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			return rc;
+
+		addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
+	}
+
+	if (test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+	    !QED_IS_FCOE_PERSONALITY(p_hwfn)) {
+		rc = qed_llh_add_mac_filter(cdev, 0,
+					    p_hwfn->hw_info.hw_mac_addr);
+		if (rc)
+			DP_NOTICE(cdev,
+				  "Failed to add an LLH filter with the primary MAC\n");
+	}
+
+	if (QED_IS_CMT(cdev)) {
+		rc = qed_llh_set_engine_affin(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev)
+{
+	return cdev->p_llh_info->num_ppfid;
+}
+
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK             0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT            0
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK         0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT        2
+
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, u8 ppfid, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for ppfid [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+	val = qed_rd(p_hwfn, p_ptt, addr);
+	SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel);
+	qed_wr(p_hwfn, p_ptt, addr, val);
+
+	/* The iWARP affinity is set as the affinity of ppfid 0 */
+	if (!ppfid && QED_IS_IWARP_PERSONALITY(p_hwfn))
+		cdev->iwarp_affin = (eng == QED_ENG1) ? 1 : 0;
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 addr, val, eng_sel;
+	u8 ppfid, abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!QED_IS_CMT(cdev))
+		goto out;
+
+	switch (eng) {
+	case QED_ENG0:
+		eng_sel = 0;
+		break;
+	case QED_ENG1:
+		eng_sel = 1;
+		break;
+	case QED_BOTH_ENG:
+		eng_sel = 2;
+		qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL,
+		       0xf);  /* QP bit 15 */
+		break;
+	default:
+		DP_NOTICE(cdev, "Invalid affinity value for RoCE [%d]\n", eng);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto out;
+
+		addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+		val = qed_rd(p_hwfn, p_ptt, addr);
+		SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel);
+		qed_wr(p_hwfn, p_ptt, addr, val);
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+struct qed_llh_filter_details {
+	u64 value;
+	u32 mode;
+	u32 protocol_type;
+	u32 hdr_sel;
+	u32 enable;
+};
+
+static int
+qed_llh_access_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u8 abs_ppfid,
+		      u8 filter_idx,
+		      struct qed_llh_filter_details *p_details)
+{
+	struct qed_dmae_params params = {0};
+	u32 addr;
+	u8 pfid;
+	int rc;
+
+	/* The NIG/LLH registers that are accessed in this function have only 16
+	 * rows which are exposed to a PF. I.e. only the 16 filters of its
+	 * default ppfid. Accessing filters of other ppfids requires pretending
+	 * to another PFs.
+	 * The calculation of PPFID->PFID in AH is based on the relative index
+	 * of a PF on its port.
+	 * For BB the pfid is actually the abs_ppfid.
+	 */
+	if (QED_IS_BB(p_hwfn->cdev))
+		pfid = abs_ppfid;
+	else
+		pfid = abs_ppfid * p_hwfn->cdev->num_ports_in_engine +
+		    MFW_PORT(p_hwfn);
+
+	/* Filter enable - should be done first when removing a filter */
+	if (!p_details->enable) {
+		qed_fid_pretend(p_hwfn, p_ptt,
+				pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+
+		qed_fid_pretend(p_hwfn, p_ptt,
+				p_hwfn->rel_pf_id <<
+				PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+	}
+
+	/* Filter value */
+	addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4;
+
+	params.flags = QED_DMAE_FLAG_PF_DST;
+	params.dst_pfid = pfid;
+	rc = qed_dmae_host2grc(p_hwfn,
+			       p_ptt,
+			       (u64)(uintptr_t)&p_details->value,
+			       addr, 2 /* size_in_dwords */,
+			       &params);
+	if (rc)
+		return rc;
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	/* Filter mode */
+	addr = NIG_REG_LLH_FUNC_FILTER_MODE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->mode);
+
+	/* Filter protocol type */
+	addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->protocol_type);
+
+	/* Filter header select */
+	addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL + filter_idx * 0x4;
+	qed_wr(p_hwfn, p_ptt, addr, p_details->hdr_sel);
+
+	/* Filter enable - should be done last when adding a filter */
+	if (p_details->enable) {
+		addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+		qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+	}
+
+	qed_fid_pretend(p_hwfn, p_ptt,
+			p_hwfn->rel_pf_id <<
+			PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+	return 0;
+}
+
+static int
+qed_llh_add_filter(struct qed_hwfn *p_hwfn,
+		   struct qed_ptt *p_ptt,
+		   u8 abs_ppfid,
+		   u8 filter_idx, u8 filter_prot_type, u32 high, u32 low)
+{
+	struct qed_llh_filter_details filter_details;
+
+	filter_details.enable = 1;
+	filter_details.value = ((u64)high << 32) | low;
+	filter_details.hdr_sel = 0;
+	filter_details.protocol_type = filter_prot_type;
+	/* Mode: 0: MAC-address classification 1: protocol classification */
+	filter_details.mode = filter_prot_type ? 1 : 0;
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+static int
+qed_llh_remove_filter(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx)
+{
+	struct qed_llh_filter_details filter_details = {0};
+
+	return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+				     &filter_details);
+}
+
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	memcpy(filter.mac.addr, mac_addr, ETH_ALEN);
+	rc = qed_llh_shadow_add_filter(cdev, ppfid,
+				       QED_LLH_FILTER_TYPE_MAC,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+		if (rc)
+			goto err;
+
+		high = mac_addr[1] | (mac_addr[0] << 8);
+		low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) |
+		      (mac_addr[2] << 24);
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+					0, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added MAC filter [%pM] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to add MAC filter [%pM] to ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+static int
+qed_llh_protocol_filter_stringify(struct qed_dev *cdev,
+				  enum qed_llh_prot_filter_type_t type,
+				  u16 source_port_or_eth_type,
+				  u16 dest_port, u8 *str, size_t str_len)
+{
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		snprintf(str, str_len, "Ethertype 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+		snprintf(str, str_len, "TCP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		snprintf(str, str_len, "UDP src port 0x%04x",
+			 source_port_or_eth_type);
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+		snprintf(str, str_len, "TCP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		snprintf(str, str_len, "UDP dst port 0x%04x", dest_port);
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "TCP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		snprintf(str, str_len, "UDP src/dst ports 0x%04x/0x%04x",
+			 source_port_or_eth_type, dest_port);
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+qed_llh_protocol_filter_to_hilo(struct qed_dev *cdev,
+				enum qed_llh_prot_filter_type_t type,
+				u16 source_port_or_eth_type,
+				u16 dest_port, u32 *p_high, u32 *p_low)
+{
+	*p_high = 0;
+	*p_low = 0;
+
+	switch (type) {
+	case QED_LLH_FILTER_ETHERTYPE:
+		*p_high = source_port_or_eth_type;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_PORT:
+	case QED_LLH_FILTER_UDP_SRC_PORT:
+		*p_low = source_port_or_eth_type << 16;
+		break;
+	case QED_LLH_FILTER_TCP_DEST_PORT:
+	case QED_LLH_FILTER_UDP_DEST_PORT:
+		*p_low = dest_port;
+		break;
+	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+		*p_low = (source_port_or_eth_type << 16) | dest_port;
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Non valid LLH protocol filter type %d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32], type_bitmap;
+	union qed_llh_filter filter = {};
+	u32 high, low, ref_cnt;
+	int rc = 0;
+
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_add_filter(cdev,
+				       ppfid,
+				       QED_LLH_FILTER_TYPE_PROTOCOL,
+				       &filter, &filter_idx, &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Configure the LLH only in case of a new the filter */
+	if (ref_cnt == 1) {
+		rc = qed_llh_protocol_filter_to_hilo(cdev, type,
+						     source_port_or_eth_type,
+						     dest_port, &high, &low);
+		if (rc)
+			goto err;
+
+		type_bitmap = 0x1 << type;
+		rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid,
+					filter_idx, type_bitmap, high, low);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(p_hwfn,
+		  "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	union qed_llh_filter filter = {};
+	u8 filter_idx, abs_ppfid;
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	ether_addr_copy(filter.mac.addr, mac_addr);
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed MAC filter [%pM] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove MAC filter [%pM] from ppfid %hhd\n",
+		  mac_addr, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+void qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+				    u8 ppfid,
+				    enum qed_llh_prot_filter_type_t type,
+				    u16 source_port_or_eth_type, u16 dest_port)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid, str[32];
+	union qed_llh_filter filter = {};
+	int rc = 0;
+	u32 ref_cnt;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_protocol_filter_stringify(cdev, type,
+					       source_port_or_eth_type,
+					       dest_port, str, sizeof(str));
+	if (rc)
+		goto err;
+
+	filter.protocol.type = type;
+	filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+	filter.protocol.dest_port = dest_port;
+	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+					  &ref_cnt);
+	if (rc)
+		goto err;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto err;
+
+	/* Remove from the LLH in case the filter is not in use */
+	if (!ref_cnt) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+					   filter_idx);
+		if (rc)
+			goto err;
+	}
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_SP,
+		   "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+		   str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+	goto out;
+
+err:	DP_NOTICE(cdev,
+		  "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n",
+		  str, ppfid);
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+/******************************* NIG LLH - End ********************************/
+
 #define QED_MIN_DPIS            (4)
 #define QED_MIN_PWM_REGION      (QED_WID_SIZE * QED_MIN_DPIS)
 
@@ -461,6 +1381,8 @@ void qed_resc_free(struct qed_dev *cdev)
 	kfree(cdev->reset_stats);
 	cdev->reset_stats = NULL;
 
+	qed_llh_free(cdev);
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -1428,6 +2350,13 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			goto alloc_err;
 	}
 
+	rc = qed_llh_alloc(cdev);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to allocate memory for the llh_info structure\n");
+		goto alloc_err;
+	}
+
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
 	if (!cdev->reset_stats)
 		goto alloc_no_mem;
@@ -1879,6 +2808,10 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
 {
 	int rc = 0;
 
+	/* In CMT the gate should be cleared by the 2nd hwfn */
+	if (!QED_IS_CMT(p_hwfn->cdev) || !IS_LEAD_HWFN(p_hwfn))
+		STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0);
+
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
 	if (rc)
 		return rc;
@@ -1964,6 +2897,13 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
+	/* Use the leading hwfn since in CMT only NIG #0 is operational */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_llh_hw_init_pf(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	if (b_hw_start) {
 		/* enable interrupts */
 		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -2393,6 +3333,12 @@ int qed_hw_stop(struct qed_dev *cdev)
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
+		if (IS_LEAD_HWFN(p_hwfn) &&
+		    test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+		    !QED_IS_FCOE_PERSONALITY(p_hwfn))
+			qed_llh_remove_mac_filter(cdev, 0,
+						  p_hwfn->hw_info.hw_mac_addr);
+
 		if (!cdev->recov_in_prog) {
 			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
 			if (rc) {
@@ -2868,6 +3814,36 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn)
 	return 0;
 }
 
+static int qed_hw_get_ppfid_bitmap(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 native_ppfid_idx;
+	int rc;
+
+	/* Calculation of BB/AH is different for native_ppfid_idx */
+	if (QED_IS_BB(cdev))
+		native_ppfid_idx = p_hwfn->rel_pf_id;
+	else
+		native_ppfid_idx = p_hwfn->rel_pf_id /
+		    cdev->num_ports_in_engine;
+
+	rc = qed_mcp_get_ppfid_bitmap(p_hwfn, p_ptt);
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return rc;
+	else if (rc == -EOPNOTSUPP)
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+
+	if (!(cdev->ppfid_bitmap & (0x1 << native_ppfid_idx))) {
+		DP_INFO(p_hwfn,
+			"Fix the PPFID bitmap to include the native PPFID [native_ppfid_idx %hhd, orig_bitmap 0x%hhx]\n",
+			native_ppfid_idx, cdev->ppfid_bitmap);
+		cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+	}
+
+	return 0;
+}
+
 static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_resc_unlock_params resc_unlock_params;
@@ -2925,6 +3901,13 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 				"Failed to release the resource lock for the resource allocation commands\n");
 	}
 
+	/* PPFID bitmap */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_hw_get_ppfid_bitmap(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
 	/* Sanity for ILT */
 	if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) ||
 	    (!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) {
@@ -3443,6 +4426,7 @@ static void qed_nvm_info_free(struct qed_hwfn *p_hwfn)
 static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_regview,
 				 void __iomem *p_doorbells,
+				 u64 db_phys_addr,
 				 enum qed_pci_personality personality)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
@@ -3451,6 +4435,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	/* Split PCI bars evenly between hwfns */
 	p_hwfn->regview = p_regview;
 	p_hwfn->doorbells = p_doorbells;
+	p_hwfn->db_phys_addr = db_phys_addr;
 
 	if (IS_VF(p_hwfn->cdev))
 		return qed_vf_hw_prepare(p_hwfn);
@@ -3546,7 +4531,9 @@ int qed_hw_prepare(struct qed_dev *cdev,
 	/* Initialize the first hwfn - will learn number of hwfns */
 	rc = qed_hw_prepare_single(p_hwfn,
 				   cdev->regview,
-				   cdev->doorbells, personality);
+				   cdev->doorbells,
+				   cdev->db_phys_addr,
+				   personality);
 	if (rc)
 		return rc;
 
@@ -3555,22 +4542,25 @@ int qed_hw_prepare(struct qed_dev *cdev,
 	/* Initialize the rest of the hwfns */
 	if (cdev->num_hwfns > 1) {
 		void __iomem *p_regview, *p_doorbell;
-		u8 __iomem *addr;
+		u64 db_phys_addr;
+		u32 offset;
 
 		/* adjust bar offset for second engine */
-		addr = cdev->regview +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_0) / 2;
-		p_regview = addr;
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_0) / 2;
+		p_regview = cdev->regview + offset;
+
+		offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+					 BAR_ID_1) / 2;
 
-		addr = cdev->doorbells +
-		       qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
-				       BAR_ID_1) / 2;
-		p_doorbell = addr;
+		p_doorbell = cdev->doorbells + offset;
+
+		db_phys_addr = cdev->db_phys_addr + offset;
 
 		/* prepare second hw function */
 		rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
-					   p_doorbell, personality);
+					   p_doorbell, db_phys_addr,
+					   personality);
 
 		/* in case of error, need to free the previously
 		 * initiliazed hwfn 0.
@@ -3951,269 +4941,6 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 	return 0;
 }
 
-static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low,
-				  u8 *p_filter)
-{
-	*p_high = p_filter[1] | (p_filter[0] << 8);
-	*p_low = p_filter[5] | (p_filter[4] << 8) |
-		 (p_filter[3] << 16) | (p_filter[2] << 24);
-}
-
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-
-	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-		   "mac: %pM is added at %d\n",
-		   p_filter, i);
-
-	return 0;
-}
-
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	qed_llh_mac_to_filter(&high, &low, p_filter);
-
-	/* Find the entry and clean it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "mac: %pM is removed from %d\n",
-			   p_filter, i);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
-int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port, enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0, en;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return 0;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return -EINVAL;
-	}
-	/* Find a free entry and utilize it */
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		en = qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
-		if (en)
-			continue;
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       2 * i * sizeof(u32), low);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), high);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 1);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 1 << type);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
-		break;
-	}
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to find an empty LLH filter to utilize\n");
-		return -EINVAL;
-	}
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "ETH type %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src port %x is added at %d\n",
-			   source_port_or_eth_type, i);
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP dst port %x is added at %d\n", dest_port, i);
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "TCP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
-			   "UDP src/dst ports %x/%x are added at %d\n",
-			   source_port_or_eth_type, dest_port, i);
-		break;
-	}
-	return 0;
-}
-
-void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type)
-{
-	u32 high = 0, low = 0;
-	int i;
-
-	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
-		return;
-
-	switch (type) {
-	case QED_LLH_FILTER_ETHERTYPE:
-		high = source_port_or_eth_type;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_PORT:
-	case QED_LLH_FILTER_UDP_SRC_PORT:
-		low = source_port_or_eth_type << 16;
-		break;
-	case QED_LLH_FILTER_TCP_DEST_PORT:
-	case QED_LLH_FILTER_UDP_DEST_PORT:
-		low = dest_port;
-		break;
-	case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-	case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-		low = (source_port_or_eth_type << 16) | dest_port;
-		break;
-	default:
-		DP_NOTICE(p_hwfn,
-			  "Non valid LLH protocol filter type %d\n", type);
-		return;
-	}
-
-	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)))
-			continue;
-		if (!qed_rd(p_hwfn, p_ptt,
-			    NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32)))
-			continue;
-		if (!(qed_rd(p_hwfn, p_ptt,
-			     NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-			     i * sizeof(u32)) & BIT(type)))
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   2 * i * sizeof(u32)) != low)
-			continue;
-		if (qed_rd(p_hwfn, p_ptt,
-			   NIG_REG_LLH_FUNC_FILTER_VALUE +
-			   (2 * i + 1) * sizeof(u32)) != high)
-			continue;
-
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
-		       i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_FILTER_VALUE +
-		       (2 * i + 1) * sizeof(u32), 0);
-		break;
-	}
-
-	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
 static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			    u32 hw_addr, void *p_eth_qzone,
 			    size_t eth_qzone_size, u8 timeset)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..47376d4d071f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -241,11 +241,17 @@ enum qed_dmae_address_type_t {
 #define QED_DMAE_FLAG_VF_SRC		0x00000002
 #define QED_DMAE_FLAG_VF_DST		0x00000004
 #define QED_DMAE_FLAG_COMPLETION_DST	0x00000008
+#define QED_DMAE_FLAG_PORT		0x00000010
+#define QED_DMAE_FLAG_PF_SRC		0x00000020
+#define QED_DMAE_FLAG_PF_DST		0x00000040
 
 struct qed_dmae_params {
 	u32 flags; /* consists of QED_DMAE_FLAG_* values */
 	u8 src_vfid;
 	u8 dst_vfid;
+	u8 port_id;
+	u8 src_pfid;
+	u8 dst_pfid;
 };
 
 /**
@@ -257,7 +263,7 @@ struct qed_dmae_params {
  * @param source_addr
  * @param grc_addr (dmae_data_offset)
  * @param size_in_dwords
- * @param flags (one of the flags defined above)
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int
 qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
@@ -265,7 +271,7 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		  u64 source_addr,
 		  u32 grc_addr,
 		  u32 size_in_dwords,
-		  u32 flags);
+		  struct qed_dmae_params *p_params);
 
  /**
  * @brief qed_dmae_grc2host - Read data from dmae data offset
@@ -275,11 +281,11 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
  * @param grc_addr (dmae_data_offset)
  * @param dest_addr
  * @param size_in_dwords
- * @param flags - one of the flags defined above
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		      u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
-		      u32 flags);
+		      struct qed_dmae_params *p_params);
 
 /**
  * @brief qed_dmae_host2host - copy data from to source address
@@ -290,7 +296,7 @@ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
  * @param source_addr
  * @param dest_addr
  * @param size_in_dwords
- * @param params
+ * @param p_params (default parameters will be used in case of NULL)
  */
 int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
@@ -368,26 +374,66 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
 		   u8 *dst_id);
 
 /**
- * @brief qed_llh_add_mac_filter - configures a MAC filter in llh
+ * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
+ *	banks that are allocated to the PF.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to add
+ * @param cdev
+ *
+ * @return u8 - Number of LLH filter banks
  */
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt, u8 *p_filter);
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
+
+enum qed_eng {
+	QED_ENG0,
+	QED_ENG1,
+	QED_BOTH_ENG,
+};
 
 /**
- * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh
+ * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
+ *	LLH filter bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
+			       u8 ppfid, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
+ *
+ * @param cdev
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+ *	bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to add
+ */
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+			   u8 ppfid, u8 mac_addr[ETH_ALEN]);
+
+/**
+ * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+ *	filter bank.
  *
- * @param p_hwfn
  * @param p_ptt
  * @param p_filter - MAC to remove
  */
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt, u8 *p_filter);
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+			       u8 ppfid, u8 mac_addr[ETH_ALEN]);
 
-enum qed_llh_port_filter_type_t {
+enum qed_llh_prot_filter_type_t {
 	QED_LLH_FILTER_ETHERTYPE,
 	QED_LLH_FILTER_TCP_SRC_PORT,
 	QED_LLH_FILTER_TCP_DEST_PORT,
@@ -398,36 +444,37 @@ enum qed_llh_port_filter_type_t {
 };
 
 /**
- * @brief qed_llh_add_protocol_filter - configures a protocol filter in llh
+ * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
+ *	given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
  * @param type - type of filters and comparing
  */
 int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    u16 source_port_or_eth_type,
-			    u16 dest_port,
-			    enum qed_llh_port_filter_type_t type);
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+			    u8 ppfid,
+			    enum qed_llh_prot_filter_type_t type,
+			    u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * @brief qed_llh_remove_protocol_filter - remove a protocol filter in llh
+ * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
+ *	the given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
- * @param type - type of filters and comparing
  */
 void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt,
-			       u16 source_port_or_eth_type,
-			       u16 dest_port,
-			       enum qed_llh_port_filter_type_t type);
+qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+			       u8 ppfid,
+			       enum qed_llh_prot_filter_type_t type,
+			       u16 source_port_or_eth_type, u16 dest_port);
 
 /**
  * *@brief Cleanup of previous driver remains prior to load
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d3b9b5..de31a382f58e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -745,7 +745,7 @@ struct qed_hash_fcoe_con {
 static int qed_fill_fcoe_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_fcoe_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 	int rc;
 
 	memset(info, 0, sizeof(*info));
@@ -806,15 +806,15 @@ static int qed_fcoe_stop(struct qed_dev *cdev)
 		return -EINVAL;
 	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	p_ptt = qed_ptt_acquire(QED_AFFIN_HWFN(cdev));
 	if (!p_ptt)
 		return -EAGAIN;
 
 	/* Stop the fcoe */
-	rc = qed_sp_fcoe_func_stop(QED_LEADING_HWFN(cdev), p_ptt,
+	rc = qed_sp_fcoe_func_stop(QED_AFFIN_HWFN(cdev), p_ptt,
 				   QED_SPQ_MODE_EBLOCK, NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	qed_ptt_release(QED_AFFIN_HWFN(cdev), p_ptt);
 
 	return rc;
 }
@@ -828,8 +828,8 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
 		return 0;
 	}
 
-	rc = qed_sp_fcoe_func_start(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_fcoe_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start fcoe\n");
 		return rc;
@@ -849,7 +849,7 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
 			return -ENOMEM;
 		}
 
-		rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), tid_info);
+		rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 		if (rc) {
 			DP_NOTICE(cdev, "Failed to gather task information\n");
 			qed_fcoe_stop(cdev);
@@ -884,7 +884,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
 	}
 
 	/* Acquire the connection */
-	rc = qed_fcoe_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_fcoe_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					 &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -898,7 +898,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_fcoe_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_fcoe_get_db_addr(QED_AFFIN_HWFN(cdev),
 						   *handle);
 
 	return 0;
@@ -916,7 +916,7 @@ static int qed_fcoe_release_conn(struct qed_dev *cdev, u32 handle)
 	}
 
 	hlist_del(&hash_con->node);
-	qed_fcoe_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_fcoe_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -971,7 +971,7 @@ static int qed_fcoe_offload_conn(struct qed_dev *cdev,
 	con->d_id.addr_mid = conn_info->d_id.addr_mid;
 	con->d_id.addr_lo = conn_info->d_id.addr_lo;
 
-	return qed_sp_fcoe_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -992,13 +992,13 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
 	con = hash_con->con;
 	con->terminate_params = terminate_params;
 
-	return qed_sp_fcoe_conn_destroy(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_fcoe_conn_destroy(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
 {
-	return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 37edaa847512..e054f6c69e3a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12612,8 +12612,10 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
-#define DRV_MSG_CODE_RESOURCE_CMD	0x00230000
+#define DRV_MSG_CODE_RESOURCE_CMD		0x00230000
 #define DRV_MSG_CODE_GET_TLV_DONE		0x002f0000
+#define DRV_MSG_CODE_GET_ENGINE_CONFIG		0x00370000
+#define DRV_MSG_CODE_GET_PPFID_BITMAP		0x43000000
 
 #define RESOURCE_CMD_REQ_RESC_MASK		0x0000001F
 #define RESOURCE_CMD_REQ_RESC_SHIFT		0
@@ -12802,6 +12804,18 @@ struct public_drv_mb {
 
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK   0x00000001
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_SHIFT 0
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK   0x00000002
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_SHIFT 1
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK    0x00000004
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_SHIFT  2
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK    0x00000008
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_SHIFT  3
+
+#define FW_MB_PARAM_PPFID_BITMAP_MASK	0xFF
+#define FW_MB_PARAM_PPFID_BITMAP_SHIFT	0
+
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK			0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..a4de9e3ef72c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -392,11 +392,15 @@ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid)
 }
 
 /* DMAE */
+#define QED_DMAE_FLAGS_IS_SET(params, flag) \
+	((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag))
+
 static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    const u8 is_src_type_grc,
 			    const u8 is_dst_type_grc,
 			    struct qed_dmae_params *p_params)
 {
+	u8 src_pfid, dst_pfid, port_id;
 	u16 opcode_b = 0;
 	u32 opcode = 0;
 
@@ -407,14 +411,18 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 	opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
 				   : DMAE_CMD_SRC_MASK_PCIE) <<
 		   DMAE_CMD_SRC_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
+	src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
+		   p_params->src_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
 		   DMAE_CMD_SRC_PF_ID_SHIFT);
 
 	/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
 	opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
 				   : DMAE_CMD_DST_MASK_PCIE) <<
 		   DMAE_CMD_DST_SHIFT;
-	opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
+	dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
+		   p_params->dst_pfid : p_hwfn->rel_pf_id;
+	opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
 		   DMAE_CMD_DST_PF_ID_SHIFT);
 
 	/* Whether to write a completion word to the completion destination:
@@ -425,12 +433,14 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
 		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
 
-	if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST)
+	if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
 		opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
 
 	opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
 
-	opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT);
+	port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
+		   p_params->port_id : p_hwfn->port_id;
+	opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT);
 
 	/* reset source address in next go */
 	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
@@ -441,7 +451,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 		   DMAE_CMD_DST_ADDR_RESET_SHIFT);
 
 	/* SRC/DST VFID: all 1's - pf, otherwise VF id */
-	if (p_params->flags & QED_DMAE_FLAG_VF_SRC) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
 		opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT;
 	} else {
@@ -449,7 +459,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    DMAE_CMD_SRC_VF_ID_SHIFT;
 	}
 
-	if (p_params->flags & QED_DMAE_FLAG_VF_DST) {
+	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
 		opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
 		opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
 	} else {
@@ -733,7 +743,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 	for (i = 0; i <= cnt_split; i++) {
 		offset = length_limit * i;
 
-		if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) {
+		if (!QED_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) {
 			if (src_type == QED_DMAE_ADDRESS_GRC)
 				src_addr_split = src_addr + offset;
 			else
@@ -771,14 +781,12 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
-		  u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
+		      u64 source_addr, u32 grc_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
@@ -786,7 +794,7 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 				      grc_addr_in_dw,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
 				      QED_DMAE_ADDRESS_GRC,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -796,21 +804,19 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
 		      u32 grc_addr,
-		      dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+		      dma_addr_t dest_addr, u32 size_in_dwords,
+		      struct qed_dmae_params *p_params)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-	struct qed_dmae_params params;
 	int rc;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = flags;
 
 	mutex_lock(&p_hwfn->dmae_info.mutex);
 
 	rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
 				      dest_addr, QED_DMAE_ADDRESS_GRC,
 				      QED_DMAE_ADDRESS_HOST_VIRT,
-				      size_in_dwords, &params);
+				      size_in_dwords, p_params);
 
 	mutex_unlock(&p_hwfn->dmae_info.mutex);
 
@@ -842,7 +848,6 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase)
 {
 	u32 size = PAGE_SIZE / 2, val;
-	struct qed_dmae_params params;
 	int rc = 0;
 	dma_addr_t p_phys;
 	void *p_virt;
@@ -875,9 +880,8 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		   (u64)p_phys,
 		   p_virt, (u64)(p_phys + size), (u8 *)p_virt + size, size);
 
-	memset(&params, 0, sizeof(params));
 	rc = qed_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size,
-				size / 4 /* size_in_dwords */, &params);
+				size / 4, NULL);
 	if (rc) {
 		DP_NOTICE(p_hwfn,
 			  "DMAE sanity [%s]: qed_dmae_host2host() failed. rc = %d.\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 34193c2f1699..a868d7f88601 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -131,7 +131,7 @@ static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(p_init_val + i),
-				       addr + (i << 2), segment, 0);
+				       addr + (i << 2), segment, NULL);
 		if (rc)
 			return rc;
 
@@ -194,7 +194,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
 	} else {
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(buf + dmae_data_offset),
-				       addr, size, 0);
+				       addr, size, NULL);
 	}
 
 	return rc;
@@ -205,6 +205,7 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 			      u32 addr, u32 fill, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+	struct qed_dmae_params params = {};
 
 	memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
 
@@ -214,10 +215,10 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 	 * 3. p_hwfb->temp_data,
 	 * 4. fill_count
 	 */
-
+	params.flags = QED_DMAE_FLAG_RW_REPL_SRC;
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
-				 addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
+				 addr, fill_count, &params);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index fdfedbc8e431..4e8118a08654 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1508,10 +1508,10 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr,
 				  CAU_REG_SB_ADDR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 		qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  igu_sb_id * sizeof(u64), 2, 0);
+				  igu_sb_id * sizeof(u64), 2, NULL);
 	} else {
 		/* Initialize Status Block Address */
 		STORE_RT_REG_AGG(p_hwfn,
@@ -2362,7 +2362,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       sb_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2376,7 +2376,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 			       (u64)(uintptr_t)&sb_entry,
 			       CAU_REG_SB_VAR_MEMORY +
-			       sb_id * sizeof(u64), 2, 0);
+			       sb_id * sizeof(u64), 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685d1a55..5585c18053ec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -1082,7 +1082,7 @@ struct qed_hash_iscsi_con {
 static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
 				   struct qed_dev_iscsi_info *info)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
 
 	int rc;
 
@@ -1141,8 +1141,8 @@ static int qed_iscsi_stop(struct qed_dev *cdev)
 	}
 
 	/* Stop the iscsi */
-	rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
-				    QED_SPQ_MODE_EBLOCK, NULL);
+	rc = qed_sp_iscsi_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				    NULL);
 	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
 
 	return rc;
@@ -1161,9 +1161,8 @@ static int qed_iscsi_start(struct qed_dev *cdev,
 		return 0;
 	}
 
-	rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
-				     QED_SPQ_MODE_EBLOCK, NULL, event_context,
-				     async_event_cb);
+	rc = qed_sp_iscsi_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				     NULL, event_context, async_event_cb);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to start iscsi\n");
 		return rc;
@@ -1182,8 +1181,7 @@ static int qed_iscsi_start(struct qed_dev *cdev,
 		return -ENOMEM;
 	}
 
-	rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
-				      tid_info);
+	rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to gather task information\n");
 		qed_iscsi_stop(cdev);
@@ -1215,7 +1213,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
 		return -ENOMEM;
 
 	/* Acquire the connection */
-	rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+	rc = qed_iscsi_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
 					  &hash_con->con);
 	if (rc) {
 		DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -1229,7 +1227,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
 	hash_add(cdev->connections, &hash_con->node, *handle);
 
 	if (p_doorbell)
-		*p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+		*p_doorbell = qed_iscsi_get_db_addr(QED_AFFIN_HWFN(cdev),
 						    *handle);
 
 	return 0;
@@ -1247,7 +1245,7 @@ static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
 	}
 
 	hlist_del(&hash_con->node);
-	qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	qed_iscsi_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
 	kfree(hash_con);
 
 	return 0;
@@ -1324,7 +1322,7 @@ static int qed_iscsi_offload_conn(struct qed_dev *cdev,
 	/* Set default values on other connection fields */
 	con->offl_flags = 0x1;
 
-	return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_offload(QED_AFFIN_HWFN(cdev), con,
 					 QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1351,7 +1349,7 @@ static int qed_iscsi_update_conn(struct qed_dev *cdev,
 	con->first_seq_length = conn_info->first_seq_length;
 	con->exp_stat_sn = conn_info->exp_stat_sn;
 
-	return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+	return qed_sp_iscsi_conn_update(QED_AFFIN_HWFN(cdev), con,
 					QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1366,8 +1364,7 @@ static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
-					  hash_con->con,
+	return qed_sp_iscsi_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con,
 					  QED_SPQ_MODE_EBLOCK, NULL);
 }
 
@@ -1385,14 +1382,13 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
 
 	hash_con->con->abortive_dsconnect = abrt_conn;
 
-	return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
-					   hash_con->con,
+	return qed_sp_iscsi_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con,
 					   QED_SPQ_MODE_EBLOCK, NULL);
 }
 
 static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
 {
-	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+	return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
 }
 
 static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1407,8 +1403,7 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
 		return -EINVAL;
 	}
 
-	return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev),
-				       hash_con->con,
+	return qed_sp_iscsi_mac_update(QED_AFFIN_HWFN(cdev), hash_con->con,
 				       QED_SPQ_MODE_EBLOCK, NULL);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ded556b7bab5..7c71ea15251f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -2528,7 +2528,7 @@ qed_iwarp_ll2_slowpath(void *cxt,
 		memset(fpdu, 0, sizeof(*fpdu));
 }
 
-static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 	int rc = 0;
@@ -2563,8 +2563,9 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
 	}
 
-	qed_llh_remove_mac_filter(p_hwfn,
-				  p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+	qed_llh_remove_mac_filter(p_hwfn->cdev, 0,
+				  p_hwfn->p_rdma_info->iwarp.mac_addr);
+
 	return rc;
 }
 
@@ -2608,8 +2609,7 @@ qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
 
 static int
 qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
-		    struct qed_rdma_start_in_params *params,
-		    struct qed_ptt *p_ptt)
+		    struct qed_rdma_start_in_params *params)
 {
 	struct qed_iwarp_info *iwarp_info;
 	struct qed_ll2_acquire_data data;
@@ -2628,7 +2628,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 
 	ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
 
-	rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+	rc = qed_llh_add_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 	if (rc)
 		return rc;
 
@@ -2653,7 +2653,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+		qed_llh_remove_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
 		return rc;
 	}
 
@@ -2757,12 +2757,12 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 			      &iwarp_info->mpa_buf_list);
 	return rc;
 err:
-	qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	qed_iwarp_ll2_stop(p_hwfn);
 
 	return rc;
 }
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params)
 {
 	struct qed_iwarp_info *iwarp_info;
@@ -2794,10 +2794,10 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				  qed_iwarp_async_event);
 	qed_ooo_setup(p_hwfn);
 
-	return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+	return qed_iwarp_ll2_start(p_hwfn, params);
 }
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn)
 {
 	int rc;
 
@@ -2808,7 +2808,7 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
 
-	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+	return qed_iwarp_ll2_stop(p_hwfn);
 }
 
 static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 7ac959038324..c1b2057d23b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -183,13 +183,13 @@ struct qed_iwarp_listener {
 
 int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
 
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
 		    struct qed_rdma_start_in_params *params);
 
 void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 			      struct iwarp_init_func_ramrod_data *p_ramrod);
 
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn);
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 57641728df69..9f36e7948222 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2111,7 +2111,7 @@ int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
@@ -2144,7 +2144,7 @@ int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
 
 	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
 			       p_cid->sb_igu_id * sizeof(u64),
-			       (u64)(uintptr_t)&sb_entry, 2, 0);
+			       (u64)(uintptr_t)&sb_entry, 2, NULL);
 	if (rc) {
 		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index b5f419b71287..19a1a58d60f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -239,9 +239,8 @@ out_post1:
 	buffer->phys_addr = new_phys_addr;
 
 out_post:
-	rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle,
-				    buffer->phys_addr, 0,  buffer, 1);
-
+	rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle,
+				    buffer->phys_addr, 0, buffer, 1);
 	if (rc)
 		qed_ll2_dealloc_buffer(cdev, buffer);
 }
@@ -926,16 +925,15 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
 	return 0;
 }
 
-static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+static void qed_ll2_stop_ooo(struct qed_hwfn *p_hwfn)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 
-	DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
-		   *handle);
+	DP_VERBOSE(p_hwfn, (QED_MSG_STORAGE | QED_MSG_LL2),
+		   "Stopping LL2 OOO queue [%02x]\n", *handle);
 
-	qed_ll2_terminate_connection(hwfn, *handle);
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_terminate_connection(p_hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 	*handle = QED_LL2_UNUSED_HANDLE;
 }
 
@@ -1574,12 +1572,12 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-						    ETH_P_FCOE, 0,
-						    QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_add_protocol_filter(p_hwfn, p_ptt,
-					    ETH_P_FIP, 0,
-					    QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+						    QED_LLH_FILTER_ETHERTYPE,
+						    ETH_P_FCOE, 0);
+		qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+					    QED_LLH_FILTER_ETHERTYPE,
+					    ETH_P_FIP, 0);
 	}
 
 out:
@@ -1980,12 +1978,12 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
 		if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
-			qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-						       ETH_P_FCOE, 0,
-						      QED_LLH_FILTER_ETHERTYPE);
-		qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
-					       ETH_P_FIP, 0,
-					       QED_LLH_FILTER_ETHERTYPE);
+			qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+						       QED_LLH_FILTER_ETHERTYPE,
+						       ETH_P_FCOE, 0);
+		qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+					       QED_LLH_FILTER_ETHERTYPE,
+					       ETH_P_FIP, 0);
 	}
 
 out:
@@ -2086,12 +2084,12 @@ static void _qed_ll2_get_port_stats(struct qed_hwfn *p_hwfn,
 			TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)),
 			sizeof(port_stats));
 
-	p_stats->gsi_invalid_hdr = HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
-	p_stats->gsi_invalid_pkt_length =
+	p_stats->gsi_invalid_hdr += HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
+	p_stats->gsi_invalid_pkt_length +=
 	    HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length);
-	p_stats->gsi_unsupported_pkt_typ =
+	p_stats->gsi_unsupported_pkt_typ +=
 	    HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ);
-	p_stats->gsi_crcchksm_error =
+	p_stats->gsi_crcchksm_error +=
 	    HILO_64_REGPAIR(port_stats.gsi_crcchksm_error);
 }
 
@@ -2109,9 +2107,9 @@ static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
 
-	p_stats->packet_too_big_discard =
+	p_stats->packet_too_big_discard +=
 			HILO_64_REGPAIR(tstats.packet_too_big_discard);
-	p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard);
+	p_stats->no_buff_discard += HILO_64_REGPAIR(tstats.no_buff_discard);
 }
 
 static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
@@ -2128,12 +2126,12 @@ static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid);
 	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
 
-	p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
-	p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
-	p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
-	p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
-	p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
-	p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+	p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+	p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+	p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+	p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+	p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+	p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
 }
 
 static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
@@ -2150,23 +2148,21 @@ static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
 		      CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id);
 	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
 
-	p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes);
-	p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes);
-	p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes);
-	p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts);
-	p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts);
-	p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+	p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+	p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+	p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+	p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+	p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+	p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts);
 }
 
-int qed_ll2_get_stats(void *cxt,
-		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+static int __qed_ll2_get_stats(void *cxt, u8 connection_handle,
+			       struct qed_ll2_stats *p_stats)
 {
 	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn = NULL;
 	struct qed_ptt *p_ptt;
 
-	memset(p_stats, 0, sizeof(*p_stats));
-
 	if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) ||
 	    !p_hwfn->p_ll2_info)
 		return -EINVAL;
@@ -2181,15 +2177,26 @@ int qed_ll2_get_stats(void *cxt,
 
 	if (p_ll2_conn->input.gsi_enable)
 		_qed_ll2_get_port_stats(p_hwfn, p_ptt, p_stats);
+
 	_qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	_qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
 	if (p_ll2_conn->tx_stats_en)
 		_qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
 
 	qed_ptt_release(p_hwfn, p_ptt);
+
 	return 0;
 }
 
+int qed_ll2_get_stats(void *cxt,
+		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+{
+	memset(p_stats, 0, sizeof(*p_stats));
+	return __qed_ll2_get_stats(cxt, connection_handle, p_stats);
+}
+
 static void qed_ll2b_release_rx_packet(void *cxt,
 				       u8 connection_handle,
 				       void *cookie,
@@ -2216,7 +2223,7 @@ struct qed_ll2_cbs ll2_cbs = {
 	.tx_release_cb = &qed_ll2b_complete_tx_packet,
 };
 
-static void qed_ll2_set_conn_data(struct qed_dev *cdev,
+static void qed_ll2_set_conn_data(struct qed_hwfn *p_hwfn,
 				  struct qed_ll2_acquire_data *data,
 				  struct qed_ll2_params *params,
 				  enum qed_ll2_conn_type conn_type,
@@ -2232,7 +2239,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	data->input.tx_num_desc = QED_LL2_TX_SIZE;
 	data->p_connection_handle = handle;
 	data->cbs = &ll2_cbs;
-	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
+	ll2_cbs.cookie = p_hwfn;
 
 	if (lb) {
 		data->input.tx_tc = PKT_LB_TC;
@@ -2243,74 +2250,102 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	}
 }
 
-static int qed_ll2_start_ooo(struct qed_dev *cdev,
+static int qed_ll2_start_ooo(struct qed_hwfn *p_hwfn,
 			     struct qed_ll2_params *params)
 {
-	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
-	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
 	struct qed_ll2_acquire_data data;
 	int rc;
 
-	qed_ll2_set_conn_data(cdev, &data, params,
+	qed_ll2_set_conn_data(p_hwfn, &data, params,
 			      QED_LL2_TYPE_OOO, handle, true);
 
-	rc = qed_ll2_acquire_connection(hwfn, &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to acquire LL2 OOO connection\n");
 		goto out;
 	}
 
-	rc = qed_ll2_establish_connection(hwfn, *handle);
+	rc = qed_ll2_establish_connection(p_hwfn, *handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+		DP_INFO(p_hwfn, "Failed to establish LL2 OOO connection\n");
 		goto fail;
 	}
 
 	return 0;
 
 fail:
-	qed_ll2_release_connection(hwfn, *handle);
+	qed_ll2_release_connection(p_hwfn, *handle);
 out:
 	*handle = QED_LL2_UNUSED_HANDLE;
 	return rc;
 }
 
-static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev)
 {
-	struct qed_ll2_buffer *buffer, *tmp_buffer;
-	enum qed_ll2_conn_type conn_type;
-	struct qed_ll2_acquire_data data;
-	struct qed_ptt *p_ptt;
-	int rc, i;
+	return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
+		QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
+		(QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev));
+}
 
+static int __qed_ll2_stop(struct qed_hwfn *p_hwfn)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
 
-	/* Initialize LL2 locks & lists */
-	INIT_LIST_HEAD(&cdev->ll2->list);
-	spin_lock_init(&cdev->ll2->lock);
-	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
-			     L1_CACHE_BYTES + params->mtu;
+	rc = qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+	if (rc)
+		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
 
-	/*Allocate memory for LL2 */
-	DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n",
-		cdev->ll2->rx_size);
-	for (i = 0; i < QED_LL2_RX_SIZE; i++) {
-		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-		if (!buffer) {
-			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
-			goto fail;
-		}
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
 
-		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
-					  &buffer->phys_addr);
-		if (rc) {
-			kfree(buffer);
-			goto fail;
-		}
+	return rc;
+}
 
-		list_add_tail(&buffer->list, &cdev->ll2->list);
+static int qed_ll2_stop(struct qed_dev *cdev)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc = 0, rc2 = 0;
+
+	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
+		return 0;
+
+	qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
+	eth_zero_addr(cdev->ll2_mac_address);
+
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc2 = __qed_ll2_stop(QED_LEADING_HWFN(cdev));
+		if (rc2)
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to stop LL2 on engine 0\n");
 	}
 
-	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
+	rc = __qed_ll2_stop(p_hwfn);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Failed to stop LL2\n");
+
+	qed_ll2_kill_buffers(cdev);
+
+	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+	return rc | rc2;
+}
+
+static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
+			   struct qed_ll2_params *params)
+{
+	struct qed_ll2_buffer *buffer, *tmp_buffer;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	enum qed_ll2_conn_type conn_type;
+	struct qed_ll2_acquire_data data;
+	int rc, rx_cnt;
+
+	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_FCOE:
 		conn_type = QED_LL2_TYPE_FCOE;
 		break;
@@ -2321,33 +2356,34 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		conn_type = QED_LL2_TYPE_ROCE;
 		break;
 	default:
+
 		conn_type = QED_LL2_TYPE_TEST;
 	}
 
-	qed_ll2_set_conn_data(cdev, &data, params, conn_type,
+	qed_ll2_set_conn_data(p_hwfn, &data, params, conn_type,
 			      &cdev->ll2->handle, false);
 
-	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
 	if (rc) {
-		DP_INFO(cdev, "Failed to acquire LL2 connection\n");
-		goto fail;
+		DP_INFO(p_hwfn, "Failed to acquire LL2 connection\n");
+		return rc;
 	}
 
-	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
+	rc = qed_ll2_establish_connection(p_hwfn, cdev->ll2->handle);
 	if (rc) {
-		DP_INFO(cdev, "Failed to establish LL2 connection\n");
-		goto release_fail;
+		DP_INFO(p_hwfn, "Failed to establish LL2 connection\n");
+		goto release_conn;
 	}
 
 	/* Post all Rx buffers to FW */
 	spin_lock_bh(&cdev->ll2->lock);
+	rx_cnt = cdev->ll2->rx_cnt;
 	list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) {
-		rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_post_rx_buffer(p_hwfn,
 					    cdev->ll2->handle,
 					    buffer->phys_addr, 0, buffer, 1);
 		if (rc) {
-			DP_INFO(cdev,
+			DP_INFO(p_hwfn,
 				"Failed to post an Rx buffer; Deleting it\n");
 			dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
 					 cdev->ll2->rx_size, DMA_FROM_DEVICE);
@@ -2355,100 +2391,127 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 			list_del(&buffer->list);
 			kfree(buffer);
 		} else {
-			cdev->ll2->rx_cnt++;
+			rx_cnt++;
 		}
 	}
 	spin_unlock_bh(&cdev->ll2->lock);
 
-	if (!cdev->ll2->rx_cnt) {
-		DP_INFO(cdev, "Failed passing even a single Rx buffer\n");
-		goto release_terminate;
+	if (rx_cnt == cdev->ll2->rx_cnt) {
+		DP_NOTICE(p_hwfn, "Failed passing even a single Rx buffer\n");
+		goto terminate_conn;
 	}
+	cdev->ll2->rx_cnt = rx_cnt;
+
+	return 0;
+
+terminate_conn:
+	qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+release_conn:
+	qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
+	return rc;
+}
+
+static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+{
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	struct qed_ll2_buffer *buffer;
+	int rx_num_desc, i, rc;
 
 	if (!is_valid_ether_addr(params->ll2_mac_address)) {
-		DP_INFO(cdev, "Invalid Ethernet address\n");
-		goto release_terminate;
+		DP_NOTICE(cdev, "Invalid Ethernet address\n");
+		return -EINVAL;
 	}
 
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) {
-		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
-		rc = qed_ll2_start_ooo(cdev, params);
+	WARN_ON(!cdev->ll2->cbs);
+
+	/* Initialize LL2 locks & lists */
+	INIT_LIST_HEAD(&cdev->ll2->list);
+	spin_lock_init(&cdev->ll2->lock);
+
+	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+			     L1_CACHE_BYTES + params->mtu;
+
+	/* Allocate memory for LL2.
+	 * In CMT mode, in case of a storage PF which is affintized to engine 1,
+	 * LL2 is started also on engine 0 and thus we need twofold buffers.
+	 */
+	rx_num_desc = QED_LL2_RX_SIZE * (b_is_storage_eng1 ? 2 : 1);
+	DP_INFO(cdev, "Allocating %d LL2 buffers of size %08x bytes\n",
+		rx_num_desc, cdev->ll2->rx_size);
+	for (i = 0; i < rx_num_desc; i++) {
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!buffer) {
+			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
+					  &buffer->phys_addr);
 		if (rc) {
-			DP_INFO(cdev,
-				"Failed to initialize the OOO LL2 queue\n");
-			goto release_terminate;
+			kfree(buffer);
+			goto err0;
 		}
-	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto release_terminate;
+		list_add_tail(&buffer->list, &cdev->ll2->list);
 	}
 
-	rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				    params->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	rc = __qed_ll2_start(p_hwfn, params);
 	if (rc) {
-		DP_ERR(cdev, "Failed to allocate LLH filter\n");
-		goto release_terminate_all;
+		DP_NOTICE(cdev, "Failed to start LL2\n");
+		goto err0;
 	}
 
-	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
-	return 0;
-
-release_terminate_all:
-
-release_terminate:
-	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-release_fail:
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-fail:
-	qed_ll2_kill_buffers(cdev);
-	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-	return -EINVAL;
-}
-
-static int qed_ll2_stop(struct qed_dev *cdev)
-{
-	struct qed_ptt *p_ptt;
-	int rc;
-
-	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
-		return 0;
+	/* In CMT mode, always need to start LL2 on engine 0 for a storage PF,
+	 * since broadcast/mutlicast packets are routed to engine 0.
+	 */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_start(QED_LEADING_HWFN(cdev), params);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to start LL2 on engine 0\n");
+			goto err1;
+		}
+	}
 
-	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
-	if (!p_ptt) {
-		DP_INFO(cdev, "Failed to acquire PTT\n");
-		goto fail;
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) {
+		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+		rc = qed_ll2_start_ooo(p_hwfn, params);
+		if (rc) {
+			DP_NOTICE(cdev, "Failed to start OOO LL2\n");
+			goto err2;
+		}
 	}
 
-	qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
-				  cdev->ll2_mac_address);
-	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
-	eth_zero_addr(cdev->ll2_mac_address);
+	rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to add an LLH filter\n");
+		goto err3;
+	}
 
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI)
-		qed_ll2_stop_ooo(cdev);
+	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
 
-	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
-					  cdev->ll2->handle);
-	if (rc)
-		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
+	return 0;
 
+err3:
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+		qed_ll2_stop_ooo(p_hwfn);
+err2:
+	if (b_is_storage_eng1)
+		__qed_ll2_stop(QED_LEADING_HWFN(cdev));
+err1:
+	__qed_ll2_stop(p_hwfn);
+err0:
 	qed_ll2_kill_buffers(cdev);
-
-	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
 	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-
 	return rc;
-fail:
-	return -EINVAL;
 }
 
 static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 			      unsigned long xmit_flags)
 {
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 	struct qed_ll2_tx_pkt_info pkt;
 	const skb_frag_t *frag;
 	u8 flags = 0, nr_frags;
@@ -2506,7 +2569,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 	 * routine may run and free the SKB, so no dereferencing the SKB
 	 * beyond this point unless skb has any fragments.
 	 */
-	rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle,
 				       &pkt, 1);
 	if (rc)
 		goto err;
@@ -2524,13 +2587,13 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 			goto err;
 		}
 
-		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+		rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
 						       cdev->ll2->handle,
 						       mapping,
 						       skb_frag_size(frag));
 
 		/* if failed not much to do here, partial packet has been posted
-		 * we can't free memory, will need to wait for completion.
+		 * we can't free memory, will need to wait for completion
 		 */
 		if (rc)
 			goto err2;
@@ -2540,18 +2603,37 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
 
 err:
 	dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE);
-
 err2:
 	return rc;
 }
 
 static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
 {
+	bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+	int rc;
+
 	if (!cdev->ll2)
 		return -EINVAL;
 
-	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
-				 cdev->ll2->handle, stats);
+	rc = qed_ll2_get_stats(p_hwfn, cdev->ll2->handle, stats);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to get LL2 stats\n");
+		return rc;
+	}
+
+	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+	if (b_is_storage_eng1) {
+		rc = __qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+					 cdev->ll2->handle, stats);
+		if (rc) {
+			DP_NOTICE(QED_LEADING_HWFN(cdev),
+				  "Failed to get LL2 stats on engine 0\n");
+			return rc;
+		}
+	}
+
+	return 0;
 }
 
 const struct qed_ll2_ops qed_ll2_ops_pass = {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 6de23b56b294..829dd60ab937 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -48,6 +48,7 @@
 #include <linux/crc32.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
+#include <net/devlink.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
@@ -342,6 +343,107 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 	return 0;
 }
 
+struct qed_devlink {
+	struct qed_dev *cdev;
+};
+
+enum qed_devlink_param_id {
+	QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	ctx->val.vbool = cdev->iwarp_cmt;
+
+	return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct qed_devlink *qed_dl;
+	struct qed_dev *cdev;
+
+	qed_dl = devlink_priv(dl);
+	cdev = qed_dl->cdev;
+	cdev->iwarp_cmt = ctx->val.vbool;
+
+	return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+	DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+			     "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static const struct devlink_ops qed_dl_ops;
+
+static int qed_devlink_register(struct qed_dev *cdev)
+{
+	union devlink_param_value value;
+	struct qed_devlink *qed_dl;
+	struct devlink *dl;
+	int rc;
+
+	dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
+	if (!dl)
+		return -ENOMEM;
+
+	qed_dl = devlink_priv(dl);
+
+	cdev->dl = dl;
+	qed_dl->cdev = cdev;
+
+	rc = devlink_register(dl, &cdev->pdev->dev);
+	if (rc)
+		goto err_free;
+
+	rc = devlink_params_register(dl, qed_devlink_params,
+				     ARRAY_SIZE(qed_devlink_params));
+	if (rc)
+		goto err_unregister;
+
+	value.vbool = false;
+	devlink_param_driverinit_value_set(dl,
+					   QED_DEVLINK_PARAM_ID_IWARP_CMT,
+					   value);
+
+	devlink_params_publish(dl);
+	cdev->iwarp_cmt = false;
+
+	return 0;
+
+err_unregister:
+	devlink_unregister(dl);
+
+err_free:
+	cdev->dl = NULL;
+	devlink_free(dl);
+
+	return rc;
+}
+
+static void qed_devlink_unregister(struct qed_dev *cdev)
+{
+	if (!cdev->dl)
+		return;
+
+	devlink_params_unregister(cdev->dl, qed_devlink_params,
+				  ARRAY_SIZE(qed_devlink_params));
+
+	devlink_unregister(cdev->dl);
+	devlink_free(cdev->dl);
+}
+
 /* probing */
 static struct qed_dev *qed_probe(struct pci_dev *pdev,
 				 struct qed_probe_params *params)
@@ -370,6 +472,12 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 	}
 	DP_INFO(cdev, "PCI init completed successfully\n");
 
+	rc = qed_devlink_register(cdev);
+	if (rc) {
+		DP_INFO(cdev, "Failed to register devlink.\n");
+		goto err2;
+	}
+
 	rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
 	if (rc) {
 		DP_ERR(cdev, "hw prepare failed\n");
@@ -399,6 +507,8 @@ static void qed_remove(struct qed_dev *cdev)
 
 	qed_set_power_state(cdev, PCI_D3hot);
 
+	qed_devlink_unregister(cdev);
+
 	qed_free_cdev(cdev);
 }
 
@@ -1301,26 +1411,21 @@ static u32 qed_sb_init(struct qed_dev *cdev,
 {
 	struct qed_hwfn *p_hwfn;
 	struct qed_ptt *p_ptt;
-	int hwfn_index;
 	u16 rel_sb_id;
-	u8 n_hwfns;
 	u32 rc;
 
-	/* RoCE uses single engine and CMT uses two engines. When using both
-	 * we force only a single engine. Storage uses only engine 0 too.
-	 */
-	if (type == QED_SB_TYPE_L2_QUEUE)
-		n_hwfns = cdev->num_hwfns;
-	else
-		n_hwfns = 1;
-
-	hwfn_index = sb_id % n_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / n_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	if (IS_PF(p_hwfn->cdev)) {
 		p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1339,20 +1444,26 @@ static u32 qed_sb_init(struct qed_dev *cdev,
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-			  struct qed_sb_info *sb_info, u16 sb_id)
+			  struct qed_sb_info *sb_info,
+			  u16 sb_id,
+			  enum qed_sb_type type)
 {
 	struct qed_hwfn *p_hwfn;
-	int hwfn_index;
 	u16 rel_sb_id;
 	u32 rc;
 
-	hwfn_index = sb_id % cdev->num_hwfns;
-	p_hwfn = &cdev->hwfns[hwfn_index];
-	rel_sb_id = sb_id / cdev->num_hwfns;
+	/* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+	if (type == QED_SB_TYPE_L2_QUEUE) {
+		p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+		rel_sb_id = sb_id / cdev->num_hwfns;
+	} else {
+		p_hwfn = QED_AFFIN_HWFN(cdev);
+		rel_sb_id = sb_id;
+	}
 
 	DP_VERBOSE(cdev, NETIF_MSG_INTR,
 		   "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
-		   hwfn_index, rel_sb_id, sb_id);
+		   IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
 
 	rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id);
 
@@ -2372,6 +2483,11 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
 	return rc;
 }
 
+static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev)
+{
+	return QED_AFFIN_HWFN_IDX(cdev);
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -2419,6 +2535,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.db_recovery_add = &qed_db_recovery_add,
 	.db_recovery_del = &qed_db_recovery_del,
 	.read_module_eeprom = &qed_read_module_eeprom,
+	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index cc27fd60d689..758702c1ce9c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3685,3 +3685,68 @@ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
 			   features, &mcp_resp, &mcp_param);
 }
+
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u8 fir_valid, l2_valid;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_engine_config command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	fir_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID);
+	if (fir_valid)
+		cdev->fir_affin =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE);
+
+	l2_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+				     FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID);
+	if (l2_valid)
+		cdev->l2_affin_hint =
+		    QED_MFW_GET_FIELD(mb_params.mcp_param,
+				      FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE);
+
+	DP_INFO(p_hwfn,
+		"Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n",
+		fir_valid, cdev->fir_affin, l2_valid, cdev->l2_affin_hint);
+
+	return 0;
+}
+
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_mb_params mb_params = {0};
+	struct qed_dev *cdev = p_hwfn->cdev;
+	int rc;
+
+	mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The get_ppfid_bitmap command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	}
+
+	cdev->ppfid_bitmap = QED_MFW_GET_FIELD(mb_params.mcp_param,
+					       FW_MB_PARAM_PPFID_BITMAP);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "PPFID bitmap 0x%hhx\n",
+		   cdev->ppfid_bitmap);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 261c1a392e2c..e4f8fe4bd062 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1186,4 +1186,20 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  */
 int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
 
+/**
+ * @brief Get the engine affinity configuration.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Get the PPFID bitmap.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 1302b308bd87..f3ebdc5e8f85 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -157,7 +157,8 @@ static int qed_ptp_hw_read_tx_ts(struct qed_dev *cdev, u64 *timestamp)
 	*timestamp = 0;
 	val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID);
 	if (!(val & QED_TIMESTAMP_MASK)) {
-		DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val);
+		DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+			   "Invalid Tx timestamp, buf_seqid = %08x\n", val);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 7873d6dfd91f..f900fde448db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -700,7 +700,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+		rc = qed_iwarp_setup(p_hwfn, params);
 		if (rc)
 			return rc;
 	} else {
@@ -742,7 +742,7 @@ static int qed_rdma_stop(void *rdma_cxt)
 	       (ll2_ethertype_en & 0xFFFE));
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
-		rc = qed_iwarp_stop(p_hwfn, p_ptt);
+		rc = qed_iwarp_stop(p_hwfn);
 		if (rc) {
 			qed_ptt_release(p_hwfn, p_ptt);
 			return rc;
@@ -803,7 +803,7 @@ static int qed_rdma_add_user(void *rdma_cxt,
 				     dpi_start_offset +
 				     ((out_params->dpi) * p_hwfn->dpi_size));
 
-	out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
+	out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
 				    dpi_start_offset +
 				    ((out_params->dpi) * p_hwfn->dpi_size);
 
@@ -818,14 +818,17 @@ static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 	struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+	struct qed_mcp_link_state *p_link_output;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
 
-	/* Link may have changed */
-	p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
-			     QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+	/* The link state is saved only for the leading hwfn */
+	p_link_output = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output;
 
-	p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+	p_port->port_state = p_link_output->link_up ? QED_RDMA_PORT_UP
+	    : QED_RDMA_PORT_DOWN;
+
+	p_port->link_speed = p_link_output->speed;
 
 	p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE;
 
@@ -870,7 +873,7 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
 static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
 				  struct qed_dev_rdma_info *info)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
 
 	memset(info, 0, sizeof(*info));
 
@@ -889,9 +892,9 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
 	int feat_num;
 
 	if (cdev->num_hwfns > 1)
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE);
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE);
 	else
-		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) *
+		feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE) *
 			   cdev->num_hwfns;
 
 	return feat_num;
@@ -899,7 +902,7 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
 
 static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev)
 {
-	int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ);
+	int n_cnq = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_RDMA_CNQ);
 	int n_msix = cdev->int_params.rdma_msix_cnt;
 
 	return min_t(int, n_cnq, n_msix);
@@ -1653,7 +1656,7 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
 
 static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
 {
-	return QED_LEADING_HWFN(cdev);
+	return QED_AFFIN_HWFN(cdev);
 }
 
 static int qed_rdma_modify_srq(void *rdma_cxt,
@@ -1881,7 +1884,7 @@ err:
 static int qed_rdma_init(struct qed_dev *cdev,
 			 struct qed_rdma_start_in_params *params)
 {
-	return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
+	return qed_rdma_start(QED_AFFIN_HWFN(cdev), params);
 }
 
 static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
@@ -1899,23 +1902,12 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 				       u8 *old_mac_address,
 				       u8 *new_mac_address)
 {
-	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
-	struct qed_ptt *p_ptt;
 	int rc = 0;
 
-	p_ptt = qed_ptt_acquire(p_hwfn);
-	if (!p_ptt) {
-		DP_ERR(cdev,
-		       "qed roce ll2 mac filter set: failed to acquire PTT\n");
-		return -EINVAL;
-	}
-
 	if (old_mac_address)
-		qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address);
+		qed_llh_remove_mac_filter(cdev, 0, old_mac_address);
 	if (new_mac_address)
-		rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address);
-
-	qed_ptt_release(p_hwfn, p_ptt);
+		rc = qed_llh_add_mac_filter(cdev, 0, new_mac_address);
 
 	if (rc)
 		DP_ERR(cdev,
@@ -1924,6 +1916,36 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 	return rc;
 }
 
+static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset)
+{
+	enum qed_eng eng;
+	u8 ppfid = 0;
+	int rc;
+
+	/* Make sure iwarp cmt mode is enabled before setting affinity */
+	if (!cdev->iwarp_cmt)
+		return -EINVAL;
+
+	if (b_reset)
+		eng = QED_BOTH_ENG;
+	else
+		eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0;
+
+	rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+	if (rc) {
+		DP_NOTICE(cdev,
+			  "Failed to set the engine affinity of ppfid %d\n",
+			  ppfid);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP),
+		   "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+		   eng);
+
+	return 0;
+}
+
 static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.fill_dev_info = &qed_fill_rdma_dev_info,
@@ -1963,6 +1985,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_set_engine_affin = &qed_iwarp_set_engine_affin,
 	.iwarp_connect = &qed_iwarp_connect,
 	.iwarp_create_listen = &qed_iwarp_create_listen,
 	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 5ce825ca5f24..60f850c3bdd6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -254,6 +254,10 @@
 	0x500840UL
 #define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \
 	0x50196cUL
+#define NIG_REG_LLH_PPFID2PFID_TBL_0 \
+	0x501970UL
+#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL	\
+	0x50
 #define NIG_REG_LLH_CLS_TYPE_DUALMODE \
 	0x501964UL
 #define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL
@@ -1626,6 +1630,8 @@
 #define PHY_PCIE_REG_PHY1_K2_E5 \
 	0x624000UL
 #define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL
+#define NIG_REG_PPF_TO_ENGINE_SEL_SIZE 8
 #define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
 #define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
 #define DORQ_REG_PF_DPM_ENABLE 0x100510UL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2f318aaf2b05..78f77b712b10 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -917,10 +917,11 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 		/* Configure igu sb in CAU which were marked valid */
 		qed_init_cau_sb_entry(p_hwfn, &sb_entry,
 				      p_hwfn->rel_pf_id, vf->abs_vf_id, 1);
+
 		qed_dmae_host2grc(p_hwfn, p_ptt,
 				  (u64)(uintptr_t)&sb_entry,
 				  CAU_REG_SB_VAR_MEMORY +
-				  p_block->igu_sb_id * sizeof(u64), 2, 0);
+				  p_block->igu_sb_id * sizeof(u64), 2, NULL);
 	}
 
 	vf->num_sbs = (u8) num_rx_queues;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 92fe226980fd..b972ab07c18b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -92,6 +92,7 @@ struct qede_stats_common {
 	u64 non_coalesced_pkts;
 	u64 coalesced_bytes;
 	u64 link_change_count;
+	u64 ptp_skip_txts;
 
 	/* port */
 	u64 rx_64_byte_packets;
@@ -189,6 +190,7 @@ struct qede_dev {
 
 	const struct qed_eth_ops	*ops;
 	struct qede_ptp			*ptp;
+	u64				ptp_skip_txts;
 
 	struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev)	((edev)->dev_info.num_queues)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 8911a97ab0ca..e85f9fef930c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -174,6 +174,7 @@ static const struct {
 	QEDE_STAT(coalesced_bytes),
 
 	QEDE_STAT(link_change_count),
+	QEDE_STAT(ptp_skip_txts),
 };
 
 #define QEDE_NUM_STATS	ARRAY_SIZE(qede_stats_arr)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 02a97c659e29..741377b7c00d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -390,6 +390,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
 	p_common->brb_discards = stats.common.brb_discards;
 	p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
 	p_common->link_change_count = stats.common.link_change_count;
+	p_common->ptp_skip_txts = edev->ptp_skip_txts;
 
 	if (QEDE_IS_BB(edev)) {
 		struct qede_stats_bb *p_bb = &edev->stats.bb;
@@ -1306,7 +1307,8 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info,
 			     u16 sb_id)
 {
 	if (sb_info->sb_virt) {
-		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id);
+		edev->ops->common->sb_release(edev->cdev, sb_info, sb_id,
+					      QED_SB_TYPE_L2_QUEUE);
 		dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt),
 				  (void *)sb_info->sb_virt, sb_info->sb_phys);
 		memset(sb_info, 0, sizeof(*sb_info));
@@ -2231,6 +2233,8 @@ out:
 	if (mode != QEDE_UNLOAD_RECOVERY)
 		DP_NOTICE(edev, "Link is down\n");
 
+	edev->ptp_skip_txts = 0;
+
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index bddb2b5982dc..f815435cf106 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #include "qede_ptp.h"
+#define QEDE_PTP_TX_TIMEOUT (2 * HZ)
 
 struct qede_ptp {
 	const struct qed_eth_ptp_ops	*ops;
@@ -38,6 +39,7 @@ struct qede_ptp {
 	struct timecounter		tc;
 	struct ptp_clock		*clock;
 	struct work_struct		work;
+	unsigned long			ptp_tx_start;
 	struct qede_dev			*edev;
 	struct sk_buff			*tx_skb;
 
@@ -160,18 +162,30 @@ static void qede_ptp_task(struct work_struct *work)
 	struct qede_dev *edev;
 	struct qede_ptp *ptp;
 	u64 timestamp, ns;
+	bool timedout;
 	int rc;
 
 	ptp = container_of(work, struct qede_ptp, work);
 	edev = ptp->edev;
+	timedout = time_is_before_jiffies(ptp->ptp_tx_start +
+					  QEDE_PTP_TX_TIMEOUT);
 
 	/* Read Tx timestamp registers */
 	spin_lock_bh(&ptp->lock);
 	rc = ptp->ops->read_tx_ts(edev->cdev, &timestamp);
 	spin_unlock_bh(&ptp->lock);
 	if (rc) {
-		/* Reschedule to keep checking for a valid timestamp value */
-		schedule_work(&ptp->work);
+		if (unlikely(timedout)) {
+			DP_INFO(edev, "Tx timestamp is not recorded\n");
+			dev_kfree_skb_any(ptp->tx_skb);
+			ptp->tx_skb = NULL;
+			clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+					 &edev->flags);
+			edev->ptp_skip_txts++;
+		} else {
+			/* Reschedule to keep checking for a valid TS value */
+			schedule_work(&ptp->work);
+		}
 		return;
 	}
 
@@ -514,19 +528,28 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb)
 	if (!ptp)
 		return;
 
-	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags))
+	if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+				  &edev->flags)) {
+		DP_ERR(edev, "Timestamping in progress\n");
+		edev->ptp_skip_txts++;
 		return;
+	}
 
 	if (unlikely(!test_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags))) {
-		DP_NOTICE(edev,
-			  "Tx timestamping was not enabled, this packet will not be timestamped\n");
+		DP_ERR(edev,
+		       "Tx timestamping was not enabled, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else if (unlikely(ptp->tx_skb)) {
-		DP_NOTICE(edev,
-			  "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		DP_ERR(edev,
+		       "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+		clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+		edev->ptp_skip_txts++;
 	} else {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		/* schedule check for Tx timestamp */
 		ptp->tx_skb = skb_get(skb);
+		ptp->ptp_tx_start = jiffies;
 		schedule_work(&ptp->work);
 	}
 }
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 884f1f52dcc2..991d7e285736 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -12,6 +12,7 @@
 
 #ifndef _RMNET_MAP_H_
 #define _RMNET_MAP_H_
+#include <linux/if_rmnet.h>
 
 struct rmnet_map_control_command {
 	u8  command_name;
@@ -39,30 +40,6 @@ enum rmnet_map_commands {
 	RMNET_MAP_COMMAND_ENUM_LENGTH
 };
 
-struct rmnet_map_header {
-	u8  pad_len:6;
-	u8  reserved_bit:1;
-	u8  cd_bit:1;
-	u8  mux_id;
-	__be16 pkt_len;
-}  __aligned(1);
-
-struct rmnet_map_dl_csum_trailer {
-	u8  reserved1;
-	u8  valid:1;
-	u8  reserved2:7;
-	u16 csum_start_offset;
-	u16 csum_length;
-	__be16 csum_value;
-} __aligned(1);
-
-struct rmnet_map_ul_csum_header {
-	__be16 csum_start_offset;
-	u16 csum_insert_offset:14;
-	u16 udp_ip4_ind:1;
-	u16 csum_enabled:1;
-} __aligned(1);
-
 #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
 				 (Y)->data)->mux_id)
 #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index d06a61f00e78..962ef3d8c507 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -81,7 +81,7 @@ static const int multicast_filter_limit = 32;
 #define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
-	RTL_GIGA_MAC_VER_01 = 0,
+	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
 	RTL_GIGA_MAC_VER_02,
 	RTL_GIGA_MAC_VER_03,
 	RTL_GIGA_MAC_VER_04,
@@ -132,7 +132,7 @@ enum mac_version {
 	RTL_GIGA_MAC_VER_49,
 	RTL_GIGA_MAC_VER_50,
 	RTL_GIGA_MAC_VER_51,
-	RTL_GIGA_MAC_NONE   = 0xff,
+	RTL_GIGA_MAC_NONE
 };
 
 #define JUMBO_1K	ETH_DATA_LEN
@@ -146,7 +146,6 @@ static const struct {
 	const char *fw_name;
 } rtl_chip_infos[] = {
 	/* PCI devices. */
-	[RTL_GIGA_MAC_VER_01] = {"RTL8169"				},
 	[RTL_GIGA_MAC_VER_02] = {"RTL8169s"				},
 	[RTL_GIGA_MAC_VER_03] = {"RTL8110s"				},
 	[RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb"			},
@@ -406,8 +405,6 @@ enum rtl_register_content {
 	RxOK		= 0x0001,
 
 	/* RxStatusDesc */
-	RxBOVF	= (1 << 24),
-	RxFOVF	= (1 << 23),
 	RxRWT	= (1 << 22),
 	RxRES	= (1 << 21),
 	RxRUNT	= (1 << 20),
@@ -503,9 +500,6 @@ enum rtl_register_content {
 	LinkStatus	= 0x02,
 	FullDup		= 0x01,
 
-	/* _TBICSRBit */
-	TBILinkOK	= 0x02000000,
-
 	/* ResetCounterCommand */
 	CounterReset	= 0x1,
 
@@ -632,6 +626,10 @@ struct rtl8169_stats {
 	struct u64_stats_sync	syncp;
 };
 
+struct rtl8169_private;
+typedef void (*rtl_fw_write_t)(struct rtl8169_private *tp, int reg, int val);
+typedef int (*rtl_fw_read_t)(struct rtl8169_private *tp, int reg);
+
 struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;
@@ -639,7 +637,7 @@ struct rtl8169_private {
 	struct phy_device *phydev;
 	struct napi_struct napi;
 	u32 msg_enable;
-	u16 mac_version;
+	enum mac_version mac_version;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
 	u32 dirty_tx;
@@ -685,6 +683,10 @@ struct rtl8169_private {
 
 	const char *fw_name;
 	struct rtl_fw {
+		rtl_fw_write_t phy_write;
+		rtl_fw_read_t phy_read;
+		rtl_fw_write_t mac_mcu_write;
+		rtl_fw_read_t mac_mcu_read;
 		const struct firmware *fw;
 
 #define RTL_VER_SIZE		32
@@ -1015,7 +1017,7 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 	return value;
 }
 
-static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val)
+static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
 {
 	tp->mdio_ops.write(tp, location, val);
 }
@@ -1424,7 +1426,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	}
 
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_17:
 		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
@@ -2252,7 +2254,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
 		{ 0xfc8, 0x100,	RTL_GIGA_MAC_VER_04 },
 		{ 0xfc8, 0x040,	RTL_GIGA_MAC_VER_03 },
 		{ 0xfc8, 0x008,	RTL_GIGA_MAC_VER_02 },
-		{ 0xfc8, 0x000,	RTL_GIGA_MAC_VER_01 },
 
 		/* Catch-all */
 		{ 0x000, 0x000,	RTL_GIGA_MAC_NONE   }
@@ -2321,50 +2322,45 @@ static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 	const struct firmware *fw = rtl_fw->fw;
 	struct fw_info *fw_info = (struct fw_info *)fw->data;
 	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	char *version = rtl_fw->version;
-	bool rc = false;
 
 	if (fw->size < FW_OPCODE_SIZE)
-		goto out;
+		return false;
 
 	if (!fw_info->magic) {
 		size_t i, size, start;
 		u8 checksum = 0;
 
 		if (fw->size < sizeof(*fw_info))
-			goto out;
+			return false;
 
 		for (i = 0; i < fw->size; i++)
 			checksum += fw->data[i];
 		if (checksum != 0)
-			goto out;
+			return false;
 
 		start = le32_to_cpu(fw_info->fw_start);
 		if (start > fw->size)
-			goto out;
+			return false;
 
 		size = le32_to_cpu(fw_info->fw_len);
 		if (size > (fw->size - start) / FW_OPCODE_SIZE)
-			goto out;
+			return false;
 
-		memcpy(version, fw_info->version, RTL_VER_SIZE);
+		strscpy(rtl_fw->version, fw_info->version, RTL_VER_SIZE);
 
 		pa->code = (__le32 *)(fw->data + start);
 		pa->size = size;
 	} else {
 		if (fw->size % FW_OPCODE_SIZE)
-			goto out;
+			return false;
 
-		strlcpy(version, tp->fw_name, RTL_VER_SIZE);
+		strscpy(rtl_fw->version, tp->fw_name, RTL_VER_SIZE);
 
 		pa->code = (__le32 *)fw->data;
 		pa->size = fw->size / FW_OPCODE_SIZE;
 	}
-	version[RTL_VER_SIZE - 1] = 0;
 
-	rc = true;
-out:
-	return rc;
+	return true;
 }
 
 static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev,
@@ -2439,17 +2435,15 @@ out:
 	return rc;
 }
 
-static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+static void rtl_fw_write_firmware(struct rtl8169_private *tp,
+				  struct rtl_fw *rtl_fw)
 {
 	struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
-	struct mdio_ops org, *ops = &tp->mdio_ops;
-	u32 predata, count;
+	rtl_fw_write_t fw_write = rtl_fw->phy_write;
+	rtl_fw_read_t fw_read = rtl_fw->phy_read;
+	int predata = 0, count = 0;
 	size_t index;
 
-	predata = count = 0;
-	org.write = ops->write;
-	org.read = ops->read;
-
 	for (index = 0; index < pa->size; ) {
 		u32 action = le32_to_cpu(pa->code[index]);
 		u32 data = action & 0x0000ffff;
@@ -2460,7 +2454,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 
 		switch(action & 0xf0000000) {
 		case PHY_READ:
-			predata = rtl_readphy(tp, regno);
+			predata = fw_read(tp, regno);
 			count++;
 			index++;
 			break;
@@ -2477,11 +2471,11 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 			break;
 		case PHY_MDIO_CHG:
 			if (data == 0) {
-				ops->write = org.write;
-				ops->read = org.read;
+				fw_write = rtl_fw->phy_write;
+				fw_read = rtl_fw->phy_read;
 			} else if (data == 1) {
-				ops->write = mac_mcu_write;
-				ops->read = mac_mcu_read;
+				fw_write = rtl_fw->mac_mcu_write;
+				fw_read = rtl_fw->mac_mcu_read;
 			}
 
 			index++;
@@ -2491,7 +2485,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 			index++;
 			break;
 		case PHY_WRITE:
-			rtl_writephy(tp, regno, data);
+			fw_write(tp, regno, data);
 			index++;
 			break;
 		case PHY_READCOUNT_EQ_SKIP:
@@ -2508,7 +2502,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 			index++;
 			break;
 		case PHY_WRITE_PREVIOUS:
-			rtl_writephy(tp, regno, predata);
+			fw_write(tp, regno, predata);
 			index++;
 			break;
 		case PHY_SKIPN:
@@ -2523,9 +2517,6 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
 			BUG();
 		}
 	}
-
-	ops->write = org.write;
-	ops->read = org.read;
 }
 
 static void rtl_release_firmware(struct rtl8169_private *tp)
@@ -2539,9 +2530,9 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
 
 static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
+	/* TODO: release firmware if rtl_fw_write_firmware signals failure. */
 	if (tp->rtl_fw)
-		rtl_phy_write_fw(tp, tp->rtl_fw);
+		rtl_fw_write_firmware(tp, tp->rtl_fw);
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2910,50 +2901,59 @@ static void rtl8168c_4_hw_phy_config(struct rtl8169_private *tp)
 	rtl8168c_3_hw_phy_config(tp);
 }
 
-static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+	/* Channel Estimation */
+	{ 0x1f, 0x0001 },
+	{ 0x06, 0x4064 },
+	{ 0x07, 0x2863 },
+	{ 0x08, 0x059c },
+	{ 0x09, 0x26b4 },
+	{ 0x0a, 0x6a19 },
+	{ 0x0b, 0xdcc8 },
+	{ 0x10, 0xf06d },
+	{ 0x14, 0x7f68 },
+	{ 0x18, 0x7fd9 },
+	{ 0x1c, 0xf0ff },
+	{ 0x1d, 0x3d9c },
+	{ 0x1f, 0x0003 },
+	{ 0x12, 0xf49f },
+	{ 0x13, 0x070b },
+	{ 0x1a, 0x05ad },
+	{ 0x14, 0x94c0 },
 
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
+	/*
+	 * Tx Error Issue
+	 * Enhance line driver power
+	 */
+	{ 0x1f, 0x0002 },
+	{ 0x06, 0x5561 },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8332 },
+	{ 0x06, 0x5561 },
 
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
+	/*
+	 * Can not link to 1Gbps with bad cable
+	 * Decrease SNR threshold form 21.07dB to 19.04dB
+	 */
+	{ 0x1f, 0x0001 },
+	{ 0x17, 0x0cc0 },
 
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
+	{ 0x1f, 0x0000 },
+	{ 0x0d, 0xf880 }
+};
 
-	rtl_writephy_batch(tp, phy_reg_init_0);
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+	{ 0x1f, 0x0002 },
+	{ 0x05, 0x669a },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8330 },
+	{ 0x06, 0x669a },
+	{ 0x1f, 0x0002 }
+};
+
+static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
+{
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	/*
 	 * Rx Error Issue
@@ -2964,17 +2964,9 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init);
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 
@@ -3023,62 +3015,12 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 
 static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
 {
-	static const struct phy_reg phy_reg_init_0[] = {
-		/* Channel Estimation */
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x4064 },
-		{ 0x07, 0x2863 },
-		{ 0x08, 0x059c },
-		{ 0x09, 0x26b4 },
-		{ 0x0a, 0x6a19 },
-		{ 0x0b, 0xdcc8 },
-		{ 0x10, 0xf06d },
-		{ 0x14, 0x7f68 },
-		{ 0x18, 0x7fd9 },
-		{ 0x1c, 0xf0ff },
-		{ 0x1d, 0x3d9c },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xf49f },
-		{ 0x13, 0x070b },
-		{ 0x1a, 0x05ad },
-		{ 0x14, 0x94c0 },
-
-		/*
-		 * Tx Error Issue
-		 * Enhance line driver power
-		 */
-		{ 0x1f, 0x0002 },
-		{ 0x06, 0x5561 },
-		{ 0x1f, 0x0005 },
-		{ 0x05, 0x8332 },
-		{ 0x06, 0x5561 },
-
-		/*
-		 * Can not link to 1Gbps with bad cable
-		 * Decrease SNR threshold form 21.07dB to 19.04dB
-		 */
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init_0);
+	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
 
 	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		static const struct phy_reg phy_reg_init[] = {
-			{ 0x1f, 0x0002 },
-			{ 0x05, 0x669a },
-			{ 0x1f, 0x0005 },
-			{ 0x05, 0x8330 },
-			{ 0x06, 0x669a },
-
-			{ 0x1f, 0x0002 }
-		};
 		int val;
 
-		rtl_writephy_batch(tp, phy_reg_init);
+		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
 
 		val = rtl_readphy(tp, 0x0d);
 		if ((val & 0x00ff) != 0x006c) {
@@ -3985,7 +3927,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 {
 	static const rtl_generic_fct phy_configs[] = {
 		/* PCI devices. */
-		[RTL_GIGA_MAC_VER_01] = NULL,
 		[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
 		[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
 		[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
@@ -4050,12 +3991,6 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
 		schedule_work(&tp->wk.work);
 }
 
-static bool rtl_tbi_enabled(struct rtl8169_private *tp)
-{
-	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	       (RTL_R8(tp, PHYstatus) & TBI_Enable);
-}
-
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
 	rtl_hw_phy_config(dev);
@@ -4168,7 +4103,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 	}
 }
 
-static void r8168_pll_power_down(struct rtl8169_private *tp)
+static void rtl_pll_power_down(struct rtl8169_private *tp)
 {
 	if (r8168_check_dash(tp))
 		return;
@@ -4203,10 +4138,12 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 		rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
+	default:
+		break;
 	}
 }
 
-static void r8168_pll_power_up(struct rtl8169_private *tp)
+static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
@@ -4230,6 +4167,8 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
 		break;
+	default:
+		break;
 	}
 
 	phy_resume(tp->phydev);
@@ -4237,32 +4176,10 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	msleep(20);
 }
 
-static void rtl_pll_power_down(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_down(tp);
-	}
-}
-
-static void rtl_pll_power_up(struct rtl8169_private *tp)
-{
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
-	case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
-		break;
-	default:
-		r8168_pll_power_up(tp);
-	}
-}
-
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
 		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
@@ -4445,6 +4362,11 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 	if (!rtl_fw)
 		goto err_warn;
 
+	rtl_fw->phy_write = rtl_writephy;
+	rtl_fw->phy_read = rtl_readphy;
+	rtl_fw->mac_mcu_write = mac_mcu_write;
+	rtl_fw->mac_mcu_read = mac_mcu_read;
+
 	rc = request_firmware(&rtl_fw->fw, tp->fw_name, tp_to_dev(tp));
 	if (rc < 0)
 		goto err_free;
@@ -6264,14 +6186,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 				dev->stats.rx_length_errors++;
 			if (status & RxCRC)
 				dev->stats.rx_crc_errors++;
-			/* RxFOVF is a reserved bit on later chip versions */
-			if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
-			    status & RxFOVF) {
-				rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
-				dev->stats.rx_fifo_errors++;
-			} else if (status & (RxRUNT | RxCRC) &&
-				   !(status & RxRWT) &&
-				   dev->features & NETIF_F_RXALL) {
+			if (status & (RxRUNT | RxCRC) && !(status & RxRWT) &&
+			    dev->features & NETIF_F_RXALL) {
 				goto process_pkt;
 			}
 		} else {
@@ -6451,7 +6367,10 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
 	if (ret)
 		return ret;
 
-	if (!tp->supports_gmii)
+	if (tp->supports_gmii)
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	else
 		phy_set_max_speed(phydev, SPEED_100);
 
 	phy_support_asym_pause(phydev);
@@ -7046,25 +6965,18 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	data |= (1 << 15);
 	r8168_mac_ocp_write(tp, 0xe8de, data);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
-		return;
-}
-
-static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
-{
-	rtl8168ep_stop_cmac(tp);
-	rtl_hw_init_8168g(tp);
+	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
 static void rtl_hw_initialize(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
+		rtl8168ep_stop_cmac(tp);
+		/* fall through */
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
 		rtl_hw_init_8168g(tp);
 		break;
-	case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
-		rtl_hw_init_8168ep(tp);
-		break;
 	default:
 		break;
 	}
@@ -7074,7 +6986,7 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
 static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
 		return false;
 	default:
@@ -7090,7 +7002,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
 
 	switch (tp->mac_version) {
 	/* RTL8169 */
-	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 		return JUMBO_7K;
 	/* RTL8168b */
 	case RTL_GIGA_MAC_VER_11:
@@ -7204,11 +7116,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (tp->mac_version == RTL_GIGA_MAC_NONE)
 		return -ENODEV;
 
-	if (rtl_tbi_enabled(tp)) {
-		dev_err(&pdev->dev, "TBI fiber mode not supported\n");
-		return -ENODEV;
-	}
-
 	tp->cp_cmd = RTL_R16(tp, CPlusCmd);
 
 	if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 06545d7399fc..0b5c8d74c683 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -13,6 +13,16 @@ config STMMAC_ETH
 
 if STMMAC_ETH
 
+config STMMAC_SELFTESTS
+	bool "Support for STMMAC Selftests"
+	depends on INET
+	depends on STMMAC_ETH
+	default n
+	---help---
+	  This adds support for STMMAC Selftests using ethtool. Enable this
+	  feature if you are facing problems with your HW and submit the test
+	  results to the netdev Mailing List.
+
 config STMMAC_PLATFORM
 	tristate "STMMAC Platform bus support"
 	depends on STMMAC_ETH
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c529c21e9bdd..c59926d96bcc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -8,6 +8,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
 	      $(stmmac-y)
 
+stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
+
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 272b9ca66314..1961fe9144ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -424,6 +424,7 @@ struct mac_device_info {
 	const struct stmmac_mode_ops *mode;
 	const struct stmmac_hwtimestamp *ptp;
 	const struct stmmac_tc_ops *tc;
+	const struct stmmac_mmc_ops *mmc;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 7fdd1760a74c..5ae474ebaaed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Amlogic Meson6 and Meson8 DWMAC glue layer
  *
  * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/device.h>
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index c5979569fd60..c06295ec1ef0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Amlogic Meson8b, Meson8m2 and GXBB DWMAC glue layer
  *
  * Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index ba124a4da793..3c7b779dcd4e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -986,6 +986,18 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
 		regulator_disable(gmac->regulator);
 }
 
+static void sun8i_dwmac_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + EMAC_BASIC_CTL0);
+
+	if (enable)
+		value |= EMAC_LOOPBACK;
+	else
+		value &= ~EMAC_LOOPBACK;
+
+	writel(value, ioaddr + EMAC_BASIC_CTL0);
+}
+
 static const struct stmmac_ops sun8i_dwmac_ops = {
 	.core_init = sun8i_dwmac_core_init,
 	.set_mac = sun8i_dwmac_set_mac,
@@ -995,6 +1007,7 @@ static const struct stmmac_ops sun8i_dwmac_ops = {
 	.flow_ctrl = sun8i_dwmac_flow_ctrl,
 	.set_umac_addr = sun8i_dwmac_set_umac_addr,
 	.get_umac_addr = sun8i_dwmac_get_umac_addr,
+	.set_mac_loopback = sun8i_dwmac_set_mac_loopback,
 };
 
 static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 184ca13c8f79..56a69fb6f0b9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -146,6 +146,7 @@ enum inter_frame_gap {
 #define GMAC_FRAME_FILTER_DAIF	0x00000008	/* DA Inverse Filtering */
 #define GMAC_FRAME_FILTER_PM	0x00000010	/* Pass all multicast */
 #define GMAC_FRAME_FILTER_DBF	0x00000020	/* Disable Broadcast frames */
+#define GMAC_FRAME_FILTER_PCF	0x00000080	/* Pass Control frames */
 #define GMAC_FRAME_FILTER_SAIF	0x00000100	/* Inverse Filtering */
 #define GMAC_FRAME_FILTER_SAF	0x00000200	/* Source Address Filter */
 #define GMAC_FRAME_FILTER_HPF	0x00000400	/* Hash or perfect Filter */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 0877bde6e860..ebe41dd09bab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -172,7 +172,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 	memset(mc_filter, 0, sizeof(mc_filter));
 
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_FRAME_FILTER_PR;
+		value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF;
 	} else if (dev->flags & IFF_ALLMULTI) {
 		value = GMAC_FRAME_FILTER_PM;	/* pass all multi */
 	} else if (!netdev_mc_empty(dev)) {
@@ -198,6 +198,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 		}
 	}
 
+	value |= GMAC_FRAME_FILTER_HPF;
 	dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2);
 
 	/* Handle multiple unicast addresses (perfect filtering) */
@@ -216,6 +217,12 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
 					    GMAC_ADDR_LOW(reg));
 			reg++;
 		}
+
+		while (reg <= perfect_addr_number) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 #ifdef FRAME_FILTER_DEBUG
@@ -499,6 +506,18 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONTROL);
+
+	if (enable)
+		value |= GMAC_CONTROL_LM;
+	else
+		value &= ~GMAC_CONTROL_LM;
+
+	writel(value, ioaddr + GMAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac1000_ops = {
 	.core_init = dwmac1000_core_init,
 	.set_mac = stmmac_set_mac,
@@ -518,6 +537,7 @@ const struct stmmac_ops dwmac1000_ops = {
 	.pcs_ctrl_ane = dwmac1000_ctrl_ane,
 	.pcs_rane = dwmac1000_rane,
 	.pcs_get_adv_lp = dwmac1000_get_adv_lp,
+	.set_mac_loopback = dwmac1000_set_mac_loopback,
 };
 
 int dwmac1000_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index b735143987e1..d621b5189c41 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -160,6 +160,18 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode)
 	return;
 }
 
+static void dwmac100_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + MAC_CONTROL);
+
+	if (enable)
+		value |= MAC_CONTROL_OM;
+	else
+		value &= ~MAC_CONTROL_OM;
+
+	writel(value, ioaddr + MAC_CONTROL);
+}
+
 const struct stmmac_ops dwmac100_ops = {
 	.core_init = dwmac100_core_init,
 	.set_mac = stmmac_set_mac,
@@ -171,6 +183,7 @@ const struct stmmac_ops dwmac100_ops = {
 	.pmt = dwmac100_pmt,
 	.set_umac_addr = dwmac100_set_umac_addr,
 	.get_umac_addr = dwmac100_get_umac_addr,
+	.set_mac_loopback = dwmac100_set_mac_loopback,
 };
 
 int dwmac100_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index eb013d54025a..01c10893b7a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -64,6 +64,8 @@
 #define GMAC_PACKET_FILTER_PR		BIT(0)
 #define GMAC_PACKET_FILTER_HMC		BIT(2)
 #define GMAC_PACKET_FILTER_PM		BIT(4)
+#define GMAC_PACKET_FILTER_PCF		BIT(7)
+#define GMAC_PACKET_FILTER_HPF		BIT(10)
 
 #define GMAC_MAX_PERFECT_ADDRESSES	128
 
@@ -160,6 +162,7 @@ enum power_event {
 #define GMAC_CONFIG_PS			BIT(15)
 #define GMAC_CONFIG_FES			BIT(14)
 #define GMAC_CONFIG_DM			BIT(13)
+#define GMAC_CONFIG_LM			BIT(12)
 #define GMAC_CONFIG_DCRS		BIT(9)
 #define GMAC_CONFIG_TE			BIT(1)
 #define GMAC_CONFIG_RE			BIT(0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index b4bb5629de38..5e98da4e14f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -406,7 +406,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 	unsigned int value = 0;
 
 	if (dev->flags & IFF_PROMISC) {
-		value = GMAC_PACKET_FILTER_PR;
+		value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
 			(netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
 		/* Pass all multi */
@@ -440,20 +440,28 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 		writel(mc_filter[1], ioaddr + GMAC_HASH_TAB_32_63);
 	}
 
+	value |= GMAC_PACKET_FILTER_HPF;
+
 	/* Handle multiple unicast addresses */
 	if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
 		/* Switch to promiscuous mode if more than 128 addrs
 		 * are required
 		 */
 		value |= GMAC_PACKET_FILTER_PR;
-	} else if (!netdev_uc_empty(dev)) {
-		int reg = 1;
+	} else {
 		struct netdev_hw_addr *ha;
+		int reg = 1;
 
 		netdev_for_each_uc_addr(ha, dev) {
 			dwmac4_set_umac_addr(hw, ha->addr, reg);
 			reg++;
 		}
+
+		while (reg <= GMAC_MAX_PERFECT_ADDRESSES) {
+			writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+			writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+			reg++;
+		}
 	}
 
 	writel(value, ioaddr + GMAC_PACKET_FILTER);
@@ -703,6 +711,18 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 		x->mac_gmii_rx_proto_engine++;
 }
 
+static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + GMAC_CONFIG);
+
+	if (enable)
+		value |= GMAC_CONFIG_LM;
+	else
+		value &= ~GMAC_CONFIG_LM;
+
+	writel(value, ioaddr + GMAC_CONFIG);
+}
+
 const struct stmmac_ops dwmac4_ops = {
 	.core_init = dwmac4_core_init,
 	.set_mac = stmmac_set_mac,
@@ -732,6 +752,7 @@ const struct stmmac_ops dwmac4_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -763,6 +784,7 @@ const struct stmmac_ops dwmac410_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -799,6 +821,7 @@ const struct stmmac_ops dwmac510_ops = {
 	.safety_feat_dump = dwmac5_safety_feat_dump,
 	.rxp_config = dwmac5_rxp_config,
 	.flex_pps_config = dwmac5_flex_pps_config,
+	.set_mac_loopback = dwmac4_set_mac_loopback,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 545cb9c47433..99f8a391964c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -88,10 +88,6 @@ void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 
 	value &= ~DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
-
-	value = readl(ioaddr + GMAC_CONFIG);
-	value &= ~GMAC_CONFIG_RE;
-	writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 085b700a4994..b8296eb41011 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -29,6 +29,7 @@
 #define XGMAC_CONFIG_GPSL		GENMASK(29, 16)
 #define XGMAC_CONFIG_GPSL_SHIFT		16
 #define XGMAC_CONFIG_S2KP		BIT(11)
+#define XGMAC_CONFIG_LM			BIT(10)
 #define XGMAC_CONFIG_IPC		BIT(9)
 #define XGMAC_CONFIG_JE			BIT(8)
 #define XGMAC_CONFIG_WD			BIT(7)
@@ -39,6 +40,7 @@
 #define XGMAC_CORE_INIT_RX		0
 #define XGMAC_PACKET_FILTER		0x00000008
 #define XGMAC_FILTER_RA			BIT(31)
+#define XGMAC_FILTER_PCF		BIT(7)
 #define XGMAC_FILTER_PM			BIT(4)
 #define XGMAC_FILTER_HMC		BIT(2)
 #define XGMAC_FILTER_PR			BIT(0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 64b8cb88ea45..bfa7d6913fd4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -310,7 +310,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
 	u32 value = XGMAC_FILTER_RA;
 
 	if (dev->flags & IFF_PROMISC) {
-		value |= XGMAC_FILTER_PR;
+		value |= XGMAC_FILTER_PR | XGMAC_FILTER_PCF;
 	} else if ((dev->flags & IFF_ALLMULTI) ||
 		   (netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
 		value |= XGMAC_FILTER_PM;
@@ -321,6 +321,18 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
 	writel(value, ioaddr + XGMAC_PACKET_FILTER);
 }
 
+static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+	u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+	if (enable)
+		value |= XGMAC_CONFIG_LM;
+	else
+		value &= ~XGMAC_CONFIG_LM;
+
+	writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -350,6 +362,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.pcs_get_adv_lp = NULL,
 	.debug = NULL,
 	.set_filter = dwxgmac2_set_filter,
+	.set_mac_loopback = dwxgmac2_set_mac_loopback,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index e79037f511e1..7861a938420a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -299,10 +299,6 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 	value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
 	value &= ~XGMAC_RXST;
 	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
-
-	value = readl(ioaddr + XGMAC_RX_CONFIG);
-	value &= ~XGMAC_CONFIG_RE;
-	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
 static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 81b966a8261b..6c61b753b55e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -81,6 +81,7 @@ static const struct stmmac_hwif_entry {
 	const void *hwtimestamp;
 	const void *mode;
 	const void *tc;
+	const void *mmc;
 	int (*setup)(struct stmmac_priv *priv);
 	int (*quirks)(struct stmmac_priv *priv);
 } stmmac_hw[] = {
@@ -100,6 +101,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac100_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -117,6 +119,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = NULL,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac1000_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -134,6 +137,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = stmmac_dwmac4_quirks,
 	}, {
@@ -151,6 +155,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -168,6 +173,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -185,6 +191,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
 		.tc = &dwmac510_tc_ops,
+		.mmc = &dwmac_mmc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -202,6 +209,7 @@ static const struct stmmac_hwif_entry {
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
 		.tc = &dwmac510_tc_ops,
+		.mmc = NULL,
 		.setup = dwxgmac2_setup,
 		.quirks = NULL,
 	},
@@ -267,6 +275,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 		mac->ptp = mac->ptp ? : entry->hwtimestamp;
 		mac->mode = mac->mode ? : entry->mode;
 		mac->tc = mac->tc ? : entry->tc;
+		mac->mmc = mac->mmc ? : entry->mmc;
 
 		priv->hw = mac;
 		priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 5bb00234d961..2acfbc70e3c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -6,6 +6,7 @@
 #define __STMMAC_HWIF_H__
 
 #include <linux/netdevice.h>
+#include <linux/stmmac.h>
 
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
@@ -324,6 +325,8 @@ struct stmmac_ops {
 	int (*flex_pps_config)(void __iomem *ioaddr, int index,
 			       struct stmmac_pps_cfg *cfg, bool enable,
 			       u32 sub_second_inc, u32 systime_flags);
+	/* Loopback for selftests */
+	void (*set_mac_loopback)(void __iomem *ioaddr, bool enable);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -392,6 +395,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, rxp_config, __args)
 #define stmmac_flex_pps_config(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, flex_pps_config, __args)
+#define stmmac_set_mac_loopback(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -464,6 +469,21 @@ struct stmmac_tc_ops {
 #define stmmac_tc_setup_cbs(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, setup_cbs, __args)
 
+struct stmmac_counters;
+
+struct stmmac_mmc_ops {
+	void (*ctrl)(void __iomem *ioaddr, unsigned int mode);
+	void (*intr_all_mask)(void __iomem *ioaddr);
+	void (*read)(void __iomem *ioaddr, struct stmmac_counters *mmc);
+};
+
+#define stmmac_mmc_ctrl(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, ctrl, __args)
+#define stmmac_mmc_intr_all_mask(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, intr_all_mask, __args)
+#define stmmac_mmc_read(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mmc, read, __args)
+
 struct stmmac_regs_off {
 	u32 ptp_off;
 	u32 mmc_off;
@@ -482,6 +502,7 @@ extern const struct stmmac_tc_ops dwmac510_tc_ops;
 extern const struct stmmac_ops dwxgmac210_ops;
 extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
 extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
+extern const struct stmmac_mmc_ops dwmac_mmc_ops;
 
 #define GMAC_VERSION		0x00000020	/* GMAC CORE Version */
 #define GMAC4_VERSION		0x00000110	/* GMAC4+ CORE Version */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index c037326331f5..e2bd90a4d34f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -128,8 +128,4 @@ struct stmmac_counters {
 	unsigned int mmc_rx_icmp_err_octets;
 };
 
-void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode);
-void dwmac_mmc_intr_all_mask(void __iomem *ioaddr);
-void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc);
-
 #endif /* __MMC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index e9b04c28980f..b8c598125cfe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -20,6 +20,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include "hwif.h"
 #include "mmc.h"
 
 /* MAC Management Counters register offset */
@@ -128,7 +129,7 @@
 #define MMC_RX_ICMP_GD_OCTETS		0x180
 #define MMC_RX_ICMP_ERR_OCTETS		0x184
 
-void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
+static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
 {
 	u32 value = readl(mmcaddr + MMC_CNTRL);
 
@@ -141,7 +142,7 @@ void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
 }
 
 /* To mask all all interrupts.*/
-void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
+static void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
 {
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK);
 	writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK);
@@ -153,7 +154,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
  * counter after a read. So all the field of the mmc struct
  * have to be incremented.
  */
-void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
+static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 {
 	mmc->mmc_tx_octetcount_gb += readl(mmcaddr + MMC_TX_OCTETCOUNT_GB);
 	mmc->mmc_tx_framecount_gb += readl(mmcaddr + MMC_TX_FRAMECOUNT_GB);
@@ -266,3 +267,9 @@ void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 	mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
 	mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
 }
+
+const struct stmmac_mmc_ops dwmac_mmc_ops = {
+	.ctrl = dwmac_mmc_ctrl,
+	.intr_all_mask = dwmac_mmc_intr_all_mask,
+	.read = dwmac_mmc_read,
+};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dd95d959c1ce..a16ada8b8507 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -229,4 +229,26 @@ int stmmac_dvr_probe(struct device *device,
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
 
+#if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf);
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data);
+int stmmac_selftest_get_count(struct stmmac_priv *priv);
+#else
+static inline void stmmac_selftest_run(struct net_device *dev,
+				       struct ethtool_test *etest, u64 *buf)
+{
+	/* Not enabled */
+}
+static inline void stmmac_selftest_get_strings(struct stmmac_priv *priv,
+					       u8 *data)
+{
+	/* Not enabled */
+}
+static inline int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_STMMAC_SELFTESTS */
+
 #endif /* __STMMAC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e09522c5509a..cec51ba34296 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -537,7 +537,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 	if (ret) {
 		/* If supported, for new GMAC chips expose the MMC counters */
 		if (priv->dma_cap.rmon) {
-			dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
+			stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
 
 			for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) {
 				char *p;
@@ -589,6 +589,8 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 		}
 
 		return len;
+	case ETH_SS_TEST:
+		return stmmac_selftest_get_count(priv);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -625,6 +627,9 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_TEST:
+		stmmac_selftest_get_strings(priv, p);
+		break;
 	default:
 		WARN_ON(1);
 		break;
@@ -890,6 +895,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.nway_reset = phy_ethtool_nway_reset,
 	.get_pauseparam = stmmac_get_pauseparam,
 	.set_pauseparam = stmmac_set_pauseparam,
+	.self_test = stmmac_selftest_run,
 	.get_ethtool_stats = stmmac_get_ethtool_stats,
 	.get_strings = stmmac_get_strings,
 	.get_wol = stmmac_get_wol,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 65e57b9f6887..268af79e2632 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2058,6 +2058,9 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
 						 &priv->xstats, chan);
 	struct stmmac_channel *ch = &priv->channel[chan];
 
+	if (status)
+		status |= handle_rx | handle_tx;
+
 	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
 		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
 		napi_schedule_irqoff(&ch->rx_napi);
@@ -2128,10 +2131,10 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
 	unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
 			    MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
 
-	dwmac_mmc_intr_all_mask(priv->mmcaddr);
+	stmmac_mmc_intr_all_mask(priv, priv->mmcaddr);
 
 	if (priv->dma_cap.rmon) {
-		dwmac_mmc_ctrl(priv->mmcaddr, mode);
+		stmmac_mmc_ctrl(priv, priv->mmcaddr, mode);
 		memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
 	} else
 		netdev_info(priv->dev, "No MAC Management Counters available\n");
@@ -2164,8 +2167,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
 		stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
 		if (!is_valid_ether_addr(priv->dev->dev_addr))
 			eth_hw_addr_random(priv->dev);
-		netdev_info(priv->dev, "device MAC address %pM\n",
-			    priv->dev->dev_addr);
+		dev_info(priv->device, "device MAC address %pM\n",
+			 priv->dev->dev_addr);
 	}
 }
 
@@ -4241,9 +4244,8 @@ int stmmac_dvr_probe(struct device *device,
 	u32 queue, maxq;
 	int ret = 0;
 
-	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
-				  MTL_MAX_TX_QUEUES,
-				  MTL_MAX_RX_QUEUES);
+	ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
+				       MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES);
 	if (!ndev)
 		return -ENOMEM;
 
@@ -4275,8 +4277,7 @@ int stmmac_dvr_probe(struct device *device,
 	priv->wq = create_singlethread_workqueue("stmmac_wq");
 	if (!priv->wq) {
 		dev_err(priv->device, "failed to create workqueue\n");
-		ret = -ENOMEM;
-		goto error_wq;
+		return -ENOMEM;
 	}
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
@@ -4432,8 +4433,6 @@ error_mdio_register:
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
-error_wq:
-	free_netdev(ndev);
 
 	return ret;
 }
@@ -4470,7 +4469,6 @@ int stmmac_dvr_remove(struct device *dev)
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
 	mutex_destroy(&priv->lock);
-	free_netdev(ndev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
new file mode 100644
index 000000000000..a97b1ea76438
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates.
+ * stmmac Selftests Support
+ *
+ * Author: Jose Abreu <joabreu@synopsys.com>
+ */
+
+#include <linux/completion.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/phy.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include "stmmac.h"
+
+struct stmmachdr {
+	__be32 version;
+	__be64 magic;
+	u8 id;
+} __packed;
+
+#define STMMAC_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			      sizeof(struct stmmachdr))
+#define STMMAC_TEST_PKT_MAGIC	0xdeadcafecafedeadULL
+#define STMMAC_LB_TIMEOUT	msecs_to_jiffies(200)
+
+struct stmmac_packet_attrs {
+	int vlan;
+	int vlan_id_in;
+	int vlan_id_out;
+	unsigned char *src;
+	unsigned char *dst;
+	u32 ip_src;
+	u32 ip_dst;
+	int tcp;
+	int sport;
+	int dport;
+	u32 exp_hash;
+	int dont_wait;
+	int timeout;
+	int size;
+	int remove_sa;
+	u8 id;
+};
+
+static u8 stmmac_test_next_id;
+
+static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
+					       struct stmmac_packet_attrs *attr)
+{
+	struct sk_buff *skb = NULL;
+	struct udphdr *uhdr = NULL;
+	struct tcphdr *thdr = NULL;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct iphdr *ihdr;
+	int iplen, size;
+
+	size = attr->size + STMMAC_TEST_PKT_SIZE;
+	if (attr->vlan) {
+		size += 4;
+		if (attr->vlan > 1)
+			size += 4;
+	}
+
+	if (attr->tcp)
+		size += sizeof(struct tcphdr);
+	else
+		size += sizeof(struct udphdr);
+
+	skb = netdev_alloc_skb(priv->dev, size);
+	if (!skb)
+		return NULL;
+
+	prefetchw(skb->data);
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	if (attr->vlan > 1)
+		ehdr = skb_push(skb, ETH_HLEN + 8);
+	else if (attr->vlan)
+		ehdr = skb_push(skb, ETH_HLEN + 4);
+	else if (attr->remove_sa)
+		ehdr = skb_push(skb, ETH_HLEN - 6);
+	else
+		ehdr = skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	skb_set_network_header(skb, skb->len);
+	ihdr = skb_put(skb, sizeof(*ihdr));
+
+	skb_set_transport_header(skb, skb->len);
+	if (attr->tcp)
+		thdr = skb_put(skb, sizeof(*thdr));
+	else
+		uhdr = skb_put(skb, sizeof(*uhdr));
+
+	if (!attr->remove_sa)
+		eth_zero_addr(ehdr->h_source);
+	eth_zero_addr(ehdr->h_dest);
+	if (attr->src && !attr->remove_sa)
+		ether_addr_copy(ehdr->h_source, attr->src);
+	if (attr->dst)
+		ether_addr_copy(ehdr->h_dest, attr->dst);
+
+	if (!attr->remove_sa) {
+		ehdr->h_proto = htons(ETH_P_IP);
+	} else {
+		__be16 *ptr = (__be16 *)ehdr;
+
+		/* HACK */
+		ptr[3] = htons(ETH_P_IP);
+	}
+
+	if (attr->vlan) {
+		__be16 *tag, *proto;
+
+		if (!attr->remove_sa) {
+			tag = (void *)ehdr + ETH_HLEN;
+			proto = (void *)ehdr + (2 * ETH_ALEN);
+		} else {
+			tag = (void *)ehdr + ETH_HLEN - 6;
+			proto = (void *)ehdr + ETH_ALEN;
+		}
+
+		proto[0] = htons(ETH_P_8021Q);
+		tag[0] = htons(attr->vlan_id_out);
+		tag[1] = htons(ETH_P_IP);
+		if (attr->vlan > 1) {
+			proto[0] = htons(ETH_P_8021AD);
+			tag[1] = htons(ETH_P_8021Q);
+			tag[2] = htons(attr->vlan_id_in);
+			tag[3] = htons(ETH_P_IP);
+		}
+	}
+
+	if (attr->tcp) {
+		thdr->source = htons(attr->sport);
+		thdr->dest = htons(attr->dport);
+		thdr->doff = sizeof(struct tcphdr) / 4;
+		thdr->check = 0;
+	} else {
+		uhdr->source = htons(attr->sport);
+		uhdr->dest = htons(attr->dport);
+		uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+		uhdr->check = 0;
+	}
+
+	ihdr->ihl = 5;
+	ihdr->ttl = 32;
+	ihdr->version = 4;
+	if (attr->tcp)
+		ihdr->protocol = IPPROTO_TCP;
+	else
+		ihdr->protocol = IPPROTO_UDP;
+	iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size;
+	if (attr->tcp)
+		iplen += sizeof(*thdr);
+	else
+		iplen += sizeof(*uhdr);
+	ihdr->tot_len = htons(iplen);
+	ihdr->frag_off = 0;
+	ihdr->saddr = 0;
+	ihdr->daddr = htonl(attr->ip_dst);
+	ihdr->tos = 0;
+	ihdr->id = 0;
+	ip_send_check(ihdr);
+
+	shdr = skb_put(skb, sizeof(*shdr));
+	shdr->version = 0;
+	shdr->magic = cpu_to_be64(STMMAC_TEST_PKT_MAGIC);
+	attr->id = stmmac_test_next_id;
+	shdr->id = stmmac_test_next_id++;
+
+	if (attr->size)
+		skb_put(skb, attr->size);
+
+	skb->csum = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	if (attr->tcp) {
+		thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, ihdr->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct tcphdr, check);
+	} else {
+		udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->dev;
+
+	return skb;
+}
+
+struct stmmac_test_priv {
+	struct stmmac_packet_attrs *packet;
+	struct packet_type pt;
+	struct completion comp;
+	int double_vlan;
+	int vlan_id;
+	int ok;
+};
+
+static int stmmac_test_loopback_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct stmmachdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct tcphdr *thdr;
+	struct iphdr *ihdr;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (tpriv->packet->dst) {
+		if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+			goto out;
+	}
+	if (tpriv->packet->src) {
+		if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+			goto out;
+	}
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+
+	if (tpriv->packet->tcp) {
+		if (ihdr->protocol != IPPROTO_TCP)
+			goto out;
+
+		thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (thdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)thdr + sizeof(*thdr));
+	} else {
+		if (ihdr->protocol != IPPROTO_UDP)
+			goto out;
+
+		uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (uhdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+	}
+
+	if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+		goto out;
+	if (tpriv->packet->exp_hash && !skb->hash)
+		goto out;
+	if (tpriv->packet->id != shdr->id)
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int __stmmac_test_loopback(struct stmmac_priv *priv,
+				  struct stmmac_packet_attrs *attr)
+{
+	struct stmmac_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = stmmac_test_loopback_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = attr;
+	dev_add_pack(&tpriv->pt);
+
+	skb = stmmac_test_get_udp_skb(priv, attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	skb_set_queue_mapping(skb, 0);
+	ret = dev_queue_xmit(skb);
+	if (ret)
+		goto cleanup;
+
+	if (attr->dont_wait)
+		goto cleanup;
+
+	if (!attr->timeout)
+		attr->timeout = STMMAC_LB_TIMEOUT;
+
+	wait_for_completion_timeout(&tpriv->comp, attr->timeout);
+	ret = !tpriv->ok;
+
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int stmmac_test_mac_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+
+	attr.dst = priv->dev->dev_addr;
+	return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_phy_loopback(struct stmmac_priv *priv)
+{
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (!priv->dev->phydev)
+		return -EBUSY;
+
+	ret = phy_loopback(priv->dev->phydev, true);
+	if (ret)
+		return ret;
+
+	attr.dst = priv->dev->dev_addr;
+	ret = __stmmac_test_loopback(priv, &attr);
+
+	phy_loopback(priv->dev->phydev, false);
+	return ret;
+}
+
+static int stmmac_test_mmc(struct stmmac_priv *priv)
+{
+	struct stmmac_counters initial, final;
+	int ret;
+
+	memset(&initial, 0, sizeof(initial));
+	memset(&final, 0, sizeof(final));
+
+	if (!priv->dma_cap.rmon)
+		return -EOPNOTSUPP;
+
+	/* Save previous results into internal struct */
+	stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		return ret;
+
+	/* These will be loopback results so no need to save them */
+	stmmac_mmc_read(priv, priv->mmcaddr, &final);
+
+	/*
+	 * The number of MMC counters available depends on HW configuration
+	 * so we just use this one to validate the feature. I hope there is
+	 * not a version without this counter.
+	 */
+	if (final.mmc_tx_framecount_g <= initial.mmc_tx_framecount_g)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stmmac_test_eee(struct stmmac_priv *priv)
+{
+	struct stmmac_extra_stats *initial, *final;
+	int retries = 10;
+	int ret;
+
+	if (!priv->dma_cap.eee || !priv->eee_active)
+		return -EOPNOTSUPP;
+
+	initial = kzalloc(sizeof(*initial), GFP_KERNEL);
+	if (!initial)
+		return -ENOMEM;
+
+	final = kzalloc(sizeof(*final), GFP_KERNEL);
+	if (!final) {
+		ret = -ENOMEM;
+		goto out_free_initial;
+	}
+
+	memcpy(initial, &priv->xstats, sizeof(*initial));
+
+	ret = stmmac_test_mac_loopback(priv);
+	if (ret)
+		goto out_free_final;
+
+	/* We have no traffic in the line so, sooner or later it will go LPI */
+	while (--retries) {
+		memcpy(final, &priv->xstats, sizeof(*final));
+
+		if (final->irq_tx_path_in_lpi_mode_n >
+		    initial->irq_tx_path_in_lpi_mode_n)
+			break;
+		msleep(100);
+	}
+
+	if (!retries) {
+		ret = -ETIMEDOUT;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_in_lpi_mode_n <=
+	    initial->irq_tx_path_in_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+	if (final->irq_tx_path_exit_lpi_mode_n <=
+	    initial->irq_tx_path_exit_lpi_mode_n) {
+		ret = -EINVAL;
+		goto out_free_final;
+	}
+
+out_free_final:
+	kfree(final);
+out_free_initial:
+	kfree(initial);
+	return ret;
+}
+
+static int stmmac_filter_check(struct stmmac_priv *priv)
+{
+	if (!(priv->dev->flags & IFF_PROMISC))
+		return 0;
+
+	netdev_warn(priv->dev, "Test can't be run in promiscuous mode!\n");
+	return -EOPNOTSUPP;
+}
+
+static int stmmac_test_hfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
+	unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	ret = stmmac_filter_check(priv);
+	if (ret)
+		return ret;
+
+	ret = dev_mc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_mc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_test_pfilt(struct stmmac_priv *priv)
+{
+	unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	ret = dev_uc_add(priv->dev, gd_addr);
+	if (ret)
+		return ret;
+
+	attr.dst = gd_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = bd_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_uc_del(priv->dev, gd_addr);
+	return ret;
+}
+
+static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr)
+{
+	return 0;
+}
+
+static void stmmac_test_set_rx_mode(struct net_device *netdev)
+{
+	/* As we are in test mode of ethtool we already own the rtnl lock
+	 * so no address will change from user. We can just call the
+	 * ndo_set_rx_mode() callback directly */
+	if (netdev->netdev_ops->ndo_set_rx_mode)
+		netdev->netdev_ops->ndo_set_rx_mode(netdev);
+}
+
+static int stmmac_test_mcfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	/* Remove all MC addresses */
+	__dev_mc_unsync(priv->dev, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+
+	ret = dev_uc_add(priv->dev, uc_addr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = uc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = mc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_uc_del(priv->dev, uc_addr);
+	__dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+	return ret;
+}
+
+static int stmmac_test_ucfilt(struct stmmac_priv *priv)
+{
+	unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+	unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+	struct stmmac_packet_attrs attr = { };
+	int ret;
+
+	if (stmmac_filter_check(priv))
+		return -EOPNOTSUPP;
+
+	/* Remove all UC addresses */
+	__dev_uc_unsync(priv->dev, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+
+	ret = dev_mc_add(priv->dev, mc_addr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = mc_addr;
+
+	/* Shall receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto cleanup;
+
+	attr.dst = uc_addr;
+
+	/* Shall NOT receive packet */
+	ret = __stmmac_test_loopback(priv, &attr);
+	ret = !ret;
+
+cleanup:
+	dev_mc_del(priv->dev, mc_addr);
+	__dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL);
+	stmmac_test_set_rx_mode(priv->dev);
+	return ret;
+}
+
+static int stmmac_test_flowctrl_validate(struct sk_buff *skb,
+					 struct net_device *ndev,
+					 struct packet_type *pt,
+					 struct net_device *orig_ndev)
+{
+	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	struct ethhdr *ehdr;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+		goto out;
+	if (ehdr->h_proto != htons(ETH_P_PAUSE))
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int stmmac_test_flowctrl(struct stmmac_priv *priv)
+{
+	unsigned char paddr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x01};
+	struct phy_device *phydev = priv->dev->phydev;
+	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	struct stmmac_test_priv *tpriv;
+	unsigned int pkt_count;
+	int i, ret = 0;
+
+	if (!phydev || !phydev->pause)
+		return -EOPNOTSUPP;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+	tpriv->pt.type = htons(ETH_P_PAUSE);
+	tpriv->pt.func = stmmac_test_flowctrl_validate;
+	tpriv->pt.dev = priv->dev;
+	tpriv->pt.af_packet_priv = tpriv;
+	dev_add_pack(&tpriv->pt);
+
+	/* Compute minimum number of packets to make FIFO full */
+	pkt_count = priv->plat->rx_fifo_size;
+	if (!pkt_count)
+		pkt_count = priv->dma_cap.rx_fifo_size;
+	pkt_count /= 1400;
+	pkt_count *= 2;
+
+	for (i = 0; i < rx_cnt; i++)
+		stmmac_stop_rx(priv, priv->ioaddr, i);
+
+	ret = dev_set_promiscuity(priv->dev, 1);
+	if (ret)
+		goto cleanup;
+
+	ret = dev_mc_add(priv->dev, paddr);
+	if (ret)
+		goto cleanup;
+
+	for (i = 0; i < pkt_count; i++) {
+		struct stmmac_packet_attrs attr = { };
+
+		attr.dst = priv->dev->dev_addr;
+		attr.dont_wait = true;
+		attr.size = 1400;
+
+		ret = __stmmac_test_loopback(priv, &attr);
+		if (ret)
+			goto cleanup;
+		if (tpriv->ok)
+			break;
+	}
+
+	/* Wait for some time in case RX Watchdog is enabled */
+	msleep(200);
+
+	for (i = 0; i < rx_cnt; i++) {
+		struct stmmac_channel *ch = &priv->channel[i];
+
+		stmmac_start_rx(priv, priv->ioaddr, i);
+		local_bh_disable();
+		napi_reschedule(&ch->rx_napi);
+		local_bh_enable();
+	}
+
+	wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+	ret = !tpriv->ok;
+
+cleanup:
+	dev_mc_del(priv->dev, paddr);
+	dev_set_promiscuity(priv->dev, -1);
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+#define STMMAC_LOOPBACK_NONE	0
+#define STMMAC_LOOPBACK_MAC	1
+#define STMMAC_LOOPBACK_PHY	2
+
+static const struct stmmac_test {
+	char name[ETH_GSTRING_LEN];
+	int lb;
+	int (*fn)(struct stmmac_priv *priv);
+} stmmac_selftests[] = {
+	{
+		.name = "MAC Loopback         ",
+		.lb = STMMAC_LOOPBACK_MAC,
+		.fn = stmmac_test_mac_loopback,
+	}, {
+		.name = "PHY Loopback         ",
+		.lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
+		.fn = stmmac_test_phy_loopback,
+	}, {
+		.name = "MMC Counters         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mmc,
+	}, {
+		.name = "EEE                  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_eee,
+	}, {
+		.name = "Hash Filter MC       ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_hfilt,
+	}, {
+		.name = "Perfect Filter UC    ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_pfilt,
+	}, {
+		.name = "MC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_mcfilt,
+	}, {
+		.name = "UC Filter            ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_ucfilt,
+	}, {
+		.name = "Flow Control         ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_flowctrl,
+	},
+};
+
+void stmmac_selftest_run(struct net_device *dev,
+			 struct ethtool_test *etest, u64 *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int count = stmmac_selftest_get_count(priv);
+	int carrier = netif_carrier_ok(dev);
+	int i, ret;
+
+	memset(buf, 0, sizeof(*buf) * count);
+	stmmac_test_next_id = 0;
+
+	if (etest->flags != ETH_TEST_FL_OFFLINE) {
+		netdev_err(priv->dev, "Only offline tests are supported\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	} else if (!carrier) {
+		netdev_err(priv->dev, "You need valid Link to execute tests\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+	/* We don't want extra traffic */
+	netif_carrier_off(dev);
+
+	/* Wait for queues drain */
+	msleep(200);
+
+	for (i = 0; i < count; i++) {
+		ret = 0;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, true);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			ret = stmmac_set_mac_loopback(priv, priv->ioaddr, true);
+			break;
+		case STMMAC_LOOPBACK_NONE:
+			break;
+		default:
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		/*
+		 * First tests will always be MAC / PHY loobpack. If any of
+		 * them is not supported we abort earlier.
+		 */
+		if (ret) {
+			netdev_err(priv->dev, "Loopback is not supported\n");
+			etest->flags |= ETH_TEST_FL_FAILED;
+			break;
+		}
+
+		ret = stmmac_selftests[i].fn(priv);
+		if (ret && (ret != -EOPNOTSUPP))
+			etest->flags |= ETH_TEST_FL_FAILED;
+		buf[i] = ret;
+
+		switch (stmmac_selftests[i].lb) {
+		case STMMAC_LOOPBACK_PHY:
+			ret = -EOPNOTSUPP;
+			if (dev->phydev)
+				ret = phy_loopback(dev->phydev, false);
+			if (!ret)
+				break;
+			/* Fallthrough */
+		case STMMAC_LOOPBACK_MAC:
+			stmmac_set_mac_loopback(priv, priv->ioaddr, false);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Restart everything */
+	if (carrier)
+		netif_carrier_on(dev);
+}
+
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	for (i = 0; i < stmmac_selftest_get_count(priv); i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
+			 stmmac_selftests[i].name);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+	return ARRAY_SIZE(stmmac_selftests);
+}
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 634fc484a0b3..6d3f1f3f90cb 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1423,8 +1423,11 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	return 0;
 
 err_cleanup:
-	cpdma_ctlr_stop(cpsw->dma);
-	for_each_slave(priv, cpsw_slave_stop, cpsw);
+	if (!cpsw->usage_count) {
+		cpdma_ctlr_stop(cpsw->dma);
+		for_each_slave(priv, cpsw_slave_stop, cpsw);
+	}
+
 	pm_runtime_put_sync(cpsw->dev);
 	netif_carrier_off(priv->ndev);
 	return ret;
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 1aeda084b8f1..276292bca334 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -361,7 +361,7 @@ struct temac_local {
 	/* For synchronization of indirect register access.  Must be
 	 * shared mutex between interfaces in same TEMAC block.
 	 */
-	struct mutex *indirect_mutex;
+	spinlock_t *indirect_lock;
 	u32 options;			/* Current options word */
 	int last_link;
 	unsigned int temac_features;
@@ -388,8 +388,9 @@ struct temac_local {
 /* xilinx_temac.c */
 int temac_indirect_busywait(struct temac_local *lp);
 u32 temac_indirect_in32(struct temac_local *lp, int reg);
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg);
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value);
-
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value);
 
 /* xilinx_temac_mdio.c */
 int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 14870d659f7d..21c1b4322ea7 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -22,7 +22,6 @@
  *
  * TODO:
  * - Factor out locallink DMA code into separate driver
- * - Fix multicast assignment.
  * - Fix support for hardware checksumming.
  * - Testing.  Lots and lots of testing.
  *
@@ -53,6 +52,7 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/processor.h>
 #include <linux/platform_data/xilinx-ll-temac.h>
 
 #include "ll_temac.h"
@@ -84,51 +84,118 @@ static void _temac_iow_le(struct temac_local *lp, int offset, u32 value)
 	return iowrite32(value, lp->regs + offset);
 }
 
+static bool hard_acs_rdy(struct temac_local *lp)
+{
+	return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK;
+}
+
+static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout)
+{
+	ktime_t cur = ktime_get();
+
+	return hard_acs_rdy(lp) || ktime_after(cur, timeout);
+}
+
+/* Poll for maximum 20 ms.  This is similar to the 2 jiffies @ 100 Hz
+ * that was used before, and should cover MDIO bus speed down to 3200
+ * Hz.
+ */
+#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC)
+
+/**
+ * temac_indirect_busywait - Wait for current indirect register access
+ * to complete.
+ */
 int temac_indirect_busywait(struct temac_local *lp)
 {
-	unsigned long end = jiffies + 2;
+	ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS);
 
-	while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) {
-		if (time_before_eq(end, jiffies)) {
-			WARN_ON(1);
-			return -ETIMEDOUT;
-		}
-		usleep_range(500, 1000);
-	}
-	return 0;
+	spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout));
+	if (WARN_ON(!hard_acs_rdy(lp)))
+		return -ETIMEDOUT;
+	else
+		return 0;
 }
 
 /**
- * temac_indirect_in32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_in32 - Indirect register read access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 u32 temac_indirect_in32(struct temac_local *lp, int reg)
 {
-	u32 val;
+	unsigned long flags;
+	int val;
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+	return val;
+}
 
-	if (temac_indirect_busywait(lp))
+/**
+ * temac_indirect_in32_locked - Indirect register read access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg)
+{
+	/* This initial wait should normally not spin, as we always
+	 * try to wait for indirect access to complete before
+	 * releasing the indirect_lock.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return -ETIMEDOUT;
+	/* Initiate read from indirect register */
 	temac_iow(lp, XTE_CTL0_OFFSET, reg);
-	if (temac_indirect_busywait(lp))
+	/* Wait for indirect register access to complete.  We really
+	 * should not see timeouts, and could even end up causing
+	 * problem for following indirect access, so let's make a bit
+	 * of WARN noise.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return -ETIMEDOUT;
-	val = temac_ior(lp, XTE_LSW0_OFFSET);
-
-	return val;
+	/* Value is ready now */
+	return temac_ior(lp, XTE_LSW0_OFFSET);
 }
 
 /**
- * temac_indirect_out32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_out32 - Indirect register write access.  This function
+ * must be called without lp->indirect_lock being held.
  */
 void temac_indirect_out32(struct temac_local *lp, int reg, u32 value)
 {
-	if (temac_indirect_busywait(lp))
+	unsigned long flags;
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, reg, value);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+}
+
+/**
+ * temac_indirect_out32_locked - Indirect register write access.  This
+ * function must be called with lp->indirect_lock being held.  Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value)
+{
+	/* As in temac_indirect_in32_locked(), we should normally not
+	 * spin here.  And if it happens, we actually end up silently
+	 * ignoring the write request.  Ouch.
+	 */
+	if (WARN_ON(temac_indirect_busywait(lp)))
 		return;
+	/* Initiate write to indirect register */
 	temac_iow(lp, XTE_LSW0_OFFSET, value);
 	temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg);
-	temac_indirect_busywait(lp);
+	/* As in temac_indirect_in32_locked(), we should not see timeouts
+	 * here.  And if it happens, we continue before the write has
+	 * completed.  Not good.
+	 */
+	WARN_ON(temac_indirect_busywait(lp));
 }
 
 /**
@@ -344,20 +411,21 @@ out:
 static void temac_do_set_mac_address(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
+	unsigned long flags;
 
 	/* set up unicast MAC address filter set its mac address */
-	mutex_lock(lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_UAW0_OFFSET,
-			     (ndev->dev_addr[0]) |
-			     (ndev->dev_addr[1] << 8) |
-			     (ndev->dev_addr[2] << 16) |
-			     (ndev->dev_addr[3] << 24));
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET,
+				    (ndev->dev_addr[0]) |
+				    (ndev->dev_addr[1] << 8) |
+				    (ndev->dev_addr[2] << 16) |
+				    (ndev->dev_addr[3] << 24));
 	/* There are reserved bits in EUAW1
 	 * so don't affect them Set MAC bits [47:32] in EUAW1 */
-	temac_indirect_out32(lp, XTE_UAW1_OFFSET,
-			     (ndev->dev_addr[4] & 0x000000ff) |
-			     (ndev->dev_addr[5] << 8));
-	mutex_unlock(lp->indirect_mutex);
+	temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET,
+				    (ndev->dev_addr[4] & 0x000000ff) |
+				    (ndev->dev_addr[5] << 8));
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 }
 
 static int temac_init_mac_address(struct net_device *ndev, const void *address)
@@ -383,49 +451,58 @@ static int temac_set_mac_address(struct net_device *ndev, void *p)
 static void temac_set_multicast_list(struct net_device *ndev)
 {
 	struct temac_local *lp = netdev_priv(ndev);
-	u32 multi_addr_msw, multi_addr_lsw, val;
-	int i;
+	u32 multi_addr_msw, multi_addr_lsw;
+	int i = 0;
+	unsigned long flags;
+	bool promisc_mode_disabled = false;
 
-	mutex_lock(lp->indirect_mutex);
-	if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) ||
-	    netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) {
-		/*
-		 *	We must make the kernel realise we had to move
-		 *	into promisc mode or we start all out war on
-		 *	the cable. If it was a promisc request the
-		 *	flag is already set. If not we assert it.
-		 */
-		ndev->flags |= IFF_PROMISC;
+	if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) ||
+	    (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) {
 		temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
 		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
-	} else if (!netdev_mc_empty(ndev)) {
+		return;
+	}
+
+	spin_lock_irqsave(lp->indirect_lock, flags);
+
+	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
-		i = 0;
 		netdev_for_each_mc_addr(ha, ndev) {
-			if (i >= MULTICAST_CAM_TABLE_NUM)
+			if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM))
 				break;
 			multi_addr_msw = ((ha->addr[3] << 24) |
 					  (ha->addr[2] << 16) |
 					  (ha->addr[1] << 8) |
 					  (ha->addr[0]));
-			temac_indirect_out32(lp, XTE_MAW0_OFFSET,
-					     multi_addr_msw);
+			temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET,
+						    multi_addr_msw);
 			multi_addr_lsw = ((ha->addr[5] << 8) |
 					  (ha->addr[4]) | (i << 16));
-			temac_indirect_out32(lp, XTE_MAW1_OFFSET,
-					     multi_addr_lsw);
+			temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET,
+						    multi_addr_lsw);
 			i++;
 		}
-	} else {
-		val = temac_indirect_in32(lp, XTE_AFM_OFFSET);
-		temac_indirect_out32(lp, XTE_AFM_OFFSET,
-				     val & ~XTE_AFM_EPPRM_MASK);
-		temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0);
-		temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0);
-		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 	}
-	mutex_unlock(lp->indirect_mutex);
+
+	/* Clear all or remaining/unused address table entries */
+	while (i < MULTICAST_CAM_TABLE_NUM) {
+		temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0);
+		temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16);
+		i++;
+	}
+
+	/* Enable address filter block if currently disabled */
+	if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET)
+	    & XTE_AFM_EPPRM_MASK) {
+		temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0);
+		promisc_mode_disabled = true;
+	}
+
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
+
+	if (promisc_mode_disabled)
+		dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
 }
 
 static struct temac_option {
@@ -516,17 +593,19 @@ static u32 temac_setoptions(struct net_device *ndev, u32 options)
 	struct temac_local *lp = netdev_priv(ndev);
 	struct temac_option *tp = &temac_options[0];
 	int reg;
+	unsigned long flags;
 
-	mutex_lock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	while (tp->opt) {
-		reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or;
-		if (options & tp->opt)
+		reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or;
+		if (options & tp->opt) {
 			reg |= tp->m_or;
-		temac_indirect_out32(lp, tp->reg, reg);
+			temac_indirect_out32_locked(lp, tp->reg, reg);
+		}
 		tp++;
 	}
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 	lp->options |= options;
-	mutex_unlock(lp->indirect_mutex);
 
 	return 0;
 }
@@ -537,6 +616,7 @@ static void temac_device_reset(struct net_device *ndev)
 	struct temac_local *lp = netdev_priv(ndev);
 	u32 timeout;
 	u32 val;
+	unsigned long flags;
 
 	/* Perform a software reset */
 
@@ -545,7 +625,6 @@ static void temac_device_reset(struct net_device *ndev)
 
 	dev_dbg(&ndev->dev, "%s()\n", __func__);
 
-	mutex_lock(lp->indirect_mutex);
 	/* Reset the receiver and wait for it to finish reset */
 	temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK);
 	timeout = 1000;
@@ -571,8 +650,11 @@ static void temac_device_reset(struct net_device *ndev)
 	}
 
 	/* Disable the receiver */
-	val = temac_indirect_in32(lp, XTE_RXC1_OFFSET);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET,
+				    val & ~XTE_RXC1_RXEN_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Reset Local Link (DMA) */
 	lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST);
@@ -592,12 +674,12 @@ static void temac_device_reset(struct net_device *ndev)
 				"temac_device_reset descriptor allocation failed\n");
 	}
 
-	temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_TXC_OFFSET, 0);
-	temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
-
-	mutex_unlock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0);
+	temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	/* Sync default options with HW
 	 * but leave receiver and transmitter disabled.  */
@@ -621,13 +703,14 @@ static void temac_adjust_link(struct net_device *ndev)
 	struct phy_device *phy = ndev->phydev;
 	u32 mii_speed;
 	int link_state;
+	unsigned long flags;
 
 	/* hash together the state values to decide if something has changed */
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 
-	mutex_lock(lp->indirect_mutex);
 	if (lp->last_link != link_state) {
-		mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET);
+		spin_lock_irqsave(lp->indirect_lock, flags);
+		mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET);
 		mii_speed &= ~XTE_EMCFG_LINKSPD_MASK;
 
 		switch (phy->speed) {
@@ -637,11 +720,12 @@ static void temac_adjust_link(struct net_device *ndev)
 		}
 
 		/* Write new speed setting out to TEMAC */
-		temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed);
+		temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed);
+		spin_unlock_irqrestore(lp->indirect_lock, flags);
+
 		lp->last_link = link_state;
 		phy_print_status(phy);
 	}
-	mutex_unlock(lp->indirect_mutex);
 }
 
 #ifdef CONFIG_64BIT
@@ -1011,6 +1095,7 @@ static const struct net_device_ops temac_netdev_ops = {
 	.ndo_open = temac_open,
 	.ndo_stop = temac_stop,
 	.ndo_start_xmit = temac_start_xmit,
+	.ndo_set_rx_mode = temac_set_multicast_list,
 	.ndo_set_mac_address = temac_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_do_ioctl = temac_ioctl,
@@ -1076,7 +1161,6 @@ static int temac_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 	SET_NETDEV_DEV(ndev, &pdev->dev);
-	ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
 	ndev->features = NETIF_F_SG;
 	ndev->netdev_ops = &temac_netdev_ops;
 	ndev->ethtool_ops = &temac_ethtool_ops;
@@ -1103,17 +1187,17 @@ static int temac_probe(struct platform_device *pdev)
 
 	/* Setup mutex for synchronization of indirect register access */
 	if (pdata) {
-		if (!pdata->indirect_mutex) {
+		if (!pdata->indirect_lock) {
 			dev_err(&pdev->dev,
-				"indirect_mutex missing in platform_data\n");
+				"indirect_lock missing in platform_data\n");
 			return -EINVAL;
 		}
-		lp->indirect_mutex = pdata->indirect_mutex;
+		lp->indirect_lock = pdata->indirect_lock;
 	} else {
-		lp->indirect_mutex = devm_kmalloc(&pdev->dev,
-						  sizeof(*lp->indirect_mutex),
-						  GFP_KERNEL);
-		mutex_init(lp->indirect_mutex);
+		lp->indirect_lock = devm_kmalloc(&pdev->dev,
+						 sizeof(*lp->indirect_lock),
+						 GFP_KERNEL);
+		spin_lock_init(lp->indirect_lock);
 	}
 
 	/* map device registers */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
index a4667326f745..6fd2dea4e60f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
@@ -25,14 +25,15 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 {
 	struct temac_local *lp = bus->priv;
 	u32 rc;
+	unsigned long flags;
 
 	/* Write the PHY address to the MIIM Access Initiator register.
 	 * When the transfer completes, the PHY register value will appear
 	 * in the LSW0 register */
-	mutex_lock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
 	temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg);
-	rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET);
-	mutex_unlock(lp->indirect_mutex);
+	rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n",
 		phy_id, reg, rc);
@@ -43,6 +44,7 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
 {
 	struct temac_local *lp = bus->priv;
+	unsigned long flags;
 
 	dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n",
 		phy_id, reg, val);
@@ -50,10 +52,10 @@ static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
 	/* First write the desired value into the write data register
 	 * and then write the address into the access initiator register
 	 */
-	mutex_lock(lp->indirect_mutex);
-	temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val);
-	temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
-	mutex_unlock(lp->indirect_mutex);
+	spin_lock_irqsave(lp->indirect_lock, flags);
+	temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val);
+	temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
+	spin_unlock_irqrestore(lp->indirect_lock, flags);
 
 	return 0;
 }
@@ -87,9 +89,7 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
 
 	/* Enable the MDIO bus by asserting the enable bit and writing
 	 * in the clock config */
-	mutex_lock(lp->indirect_mutex);
 	temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div);
-	mutex_unlock(lp->indirect_mutex);
 
 	bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!bus)
@@ -116,10 +116,8 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	mutex_lock(lp->indirect_mutex);
 	dev_dbg(lp->dev, "MDIO bus registered;  MC:%x\n",
 		temac_indirect_in32(lp, XTE_MC_OFFSET));
-	mutex_unlock(lp->indirect_mutex);
 	return 0;
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 61550122b563..0ccabde8e9c9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -831,7 +831,7 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
 
-	strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
+	strscpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
 	ifrr.ifr_ifru = ifr->ifr_ifru;
 
 	switch (cmd) {
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index f99f27800fdb..db5645b0c898 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -416,6 +416,12 @@ config NATIONAL_PHY
 	---help---
 	  Currently supports the DP83865 PHY.
 
+config NXP_TJA11XX_PHY
+	tristate "NXP TJA11xx PHYs support"
+	depends on HWMON
+	---help---
+	  Currently supports the NXP TJA1100 and TJA1101 PHY.
+
 config QSEMI_PHY
 	tristate "Quality Semiconductor PHYs"
 	---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 27d7f9f3b0de..bac339e09042 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
 obj-$(CONFIG_MICROCHIP_T1_PHY)	+= microchip_t1.o
 obj-$(CONFIG_MICROSEMI_PHY)	+= mscc.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
+obj-$(CONFIG_NXP_TJA11XX_PHY)	+= nxp-tja11xx.o
 obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
 obj-$(CONFIG_RENESAS_PHY)	+= uPD60620.o
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 0fedd28fdb6e..3b29d381116f 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -27,6 +27,7 @@
 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK	GENMASK(7, 3)
 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR	0
 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI	2
+#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII	3
 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII	6
 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII	10
 
@@ -360,6 +361,9 @@ static int aqr107_read_status(struct phy_device *phydev)
 	case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
 		phydev->interface = PHY_INTERFACE_MODE_10GKR;
 		break;
+	case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII:
+		phydev->interface = PHY_INTERFACE_MODE_USXGMII;
+		break;
 	case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII:
 		phydev->interface = PHY_INTERFACE_MODE_SGMII;
 		break;
@@ -488,9 +492,13 @@ static int aqr107_config_init(struct phy_device *phydev)
 	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
 	    phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
 	    phydev->interface != PHY_INTERFACE_MODE_XGMII &&
+	    phydev->interface != PHY_INTERFACE_MODE_USXGMII &&
 	    phydev->interface != PHY_INTERFACE_MODE_10GKR)
 		return -ENODEV;
 
+	WARN(phydev->interface == PHY_INTERFACE_MODE_XGMII,
+	     "Your devicetree is out of date, please update it. The AQR107 family doesn't support XGMII, maybe you mean USXGMII.\n");
+
 	ret = aqr107_wait_reset_complete(phydev);
 	if (!ret)
 		aqr107_chip_info(phydev);
diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index f0c0eefe2202..f6dce6850850 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -81,22 +81,18 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev)
 }
 #endif /* CONFIG_OF_MDIO */
 
-static int bcm87xx_config_init(struct phy_device *phydev)
+static int bcm87xx_get_features(struct phy_device *phydev)
 {
-	linkmode_zero(phydev->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
 			 phydev->supported);
-	linkmode_zero(phydev->advertising);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
-			 phydev->advertising);
-	phydev->state = PHY_NOLINK;
-	phydev->autoneg = AUTONEG_DISABLE;
-
-	bcm87xx_of_reg_init(phydev);
-
 	return 0;
 }
 
+static int bcm87xx_config_init(struct phy_device *phydev)
+{
+	return bcm87xx_of_reg_init(phydev);
+}
+
 static int bcm87xx_config_aneg(struct phy_device *phydev)
 {
 	return -EINVAL;
@@ -194,7 +190,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.phy_id		= PHY_ID_BCM8706,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Broadcom BCM8706",
-	.features	= PHY_10GBIT_FEC_FEATURES,
+	.get_features	= bcm87xx_get_features,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
 	.read_status	= bcm87xx_read_status,
@@ -206,7 +202,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.phy_id		= PHY_ID_BCM8727,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Broadcom BCM8727",
-	.features	= PHY_10GBIT_FEC_FEATURES,
+	.get_features	= bcm87xx_get_features,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
 	.read_status	= bcm87xx_read_status,
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index c71c7d0f53f0..1f1ecee0ee2f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -34,6 +34,7 @@
 
 #define DP83867_RGMIICTL	0x0032
 #define DP83867_STRAP_STS1	0x006E
+#define DP83867_STRAP_STS2	0x006f
 #define DP83867_RGMIIDCTL	0x0086
 #define DP83867_IO_MUX_CFG	0x0170
 #define DP83867_10M_SGMII_CFG   0x016F
@@ -63,19 +64,30 @@
 /* STRAP_STS1 bits */
 #define DP83867_STRAP_STS1_RESERVED		BIT(11)
 
+/* STRAP_STS2 bits */
+#define DP83867_STRAP_STS2_CLK_SKEW_TX_MASK	GENMASK(6, 4)
+#define DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT	4
+#define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK	GENMASK(2, 0)
+#define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT	0
+#define DP83867_STRAP_STS2_CLK_SKEW_NONE	BIT(2)
+
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT		14
-#define DP83867_PHYCR_FIFO_DEPTH_MASK		(3 << 14)
+#define DP83867_PHYCR_FIFO_DEPTH_MAX		0x03
+#define DP83867_PHYCR_FIFO_DEPTH_MASK		GENMASK(15, 14)
 #define DP83867_PHYCR_RESERVED_MASK		BIT(11)
 
 /* RGMIIDCTL bits */
+#define DP83867_RGMII_TX_CLK_DELAY_MAX		0xf
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT	4
+#define DP83867_RGMII_RX_CLK_DELAY_MAX		0xf
+#define DP83867_RGMII_RX_CLK_DELAY_SHIFT	0
 
 /* IO_MUX_CFG bits */
-#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL	0x1f
-
+#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK	0x1f
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX	0x0
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN	0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_DISABLE	BIT(6)
 #define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK	(0x1f << 8)
 #define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT	8
 
@@ -89,13 +101,14 @@ enum {
 };
 
 struct dp83867_private {
-	int rx_id_delay;
-	int tx_id_delay;
-	int fifo_depth;
+	u32 rx_id_delay;
+	u32 tx_id_delay;
+	u32 fifo_depth;
 	int io_impedance;
 	int port_mirroring;
 	bool rxctrl_strap_quirk;
-	int clk_output_sel;
+	bool set_clk_output;
+	u32 clk_output_sel;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -157,38 +170,83 @@ static int dp83867_of_init(struct phy_device *phydev)
 	if (!of_node)
 		return -ENODEV;
 
-	dp83867->io_impedance = -EINVAL;
-
 	/* Optional configuration */
 	ret = of_property_read_u32(of_node, "ti,clk-output-sel",
 				   &dp83867->clk_output_sel);
-	if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
-		/* Keep the default value if ti,clk-output-sel is not set
-		 * or too high
+	/* If not set, keep default */
+	if (!ret) {
+		dp83867->set_clk_output = true;
+		/* Valid values are 0 to DP83867_CLK_O_SEL_REF_CLK or
+		 * DP83867_CLK_O_SEL_OFF.
 		 */
-		dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+		if (dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK &&
+		    dp83867->clk_output_sel != DP83867_CLK_O_SEL_OFF) {
+			phydev_err(phydev, "ti,clk-output-sel value %u out of range\n",
+				   dp83867->clk_output_sel);
+			return -EINVAL;
+		}
+	}
 
 	if (of_property_read_bool(of_node, "ti,max-output-impedance"))
 		dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
 	else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
 		dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN;
+	else
+		dp83867->io_impedance = -1; /* leave at default */
 
 	dp83867->rxctrl_strap_quirk = of_property_read_bool(of_node,
 					"ti,dp83867-rxctrl-strap-quirk");
 
-	ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
-				   &dp83867->rx_id_delay);
-	if (ret &&
-	    (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-	     phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID))
-		return ret;
+	/* Existing behavior was to use default pin strapping delay in rgmii
+	 * mode, but rgmii should have meant no delay.  Warn existing users.
+	 */
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII) {
+		const u16 val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS2);
+		const u16 txskew = (val & DP83867_STRAP_STS2_CLK_SKEW_TX_MASK) >>
+				   DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT;
+		const u16 rxskew = (val & DP83867_STRAP_STS2_CLK_SKEW_RX_MASK) >>
+				   DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT;
+
+		if (txskew != DP83867_STRAP_STS2_CLK_SKEW_NONE ||
+		    rxskew != DP83867_STRAP_STS2_CLK_SKEW_NONE)
+			phydev_warn(phydev,
+				    "PHY has delays via pin strapping, but phy-mode = 'rgmii'\n"
+				    "Should be 'rgmii-id' to use internal delays\n");
+	}
 
-	ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
-				   &dp83867->tx_id_delay);
-	if (ret &&
-	    (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-	     phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID))
-		return ret;
+	/* RX delay *must* be specified if internal delay of RX is used. */
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+		ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
+					   &dp83867->rx_id_delay);
+		if (ret) {
+			phydev_err(phydev, "ti,rx-internal-delay must be specified\n");
+			return ret;
+		}
+		if (dp83867->rx_id_delay > DP83867_RGMII_RX_CLK_DELAY_MAX) {
+			phydev_err(phydev,
+				   "ti,rx-internal-delay value of %u out of range\n",
+				   dp83867->rx_id_delay);
+			return -EINVAL;
+		}
+	}
+
+	/* TX delay *must* be specified if internal delay of RX is used. */
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+		ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
+					   &dp83867->tx_id_delay);
+		if (ret) {
+			phydev_err(phydev, "ti,tx-internal-delay must be specified\n");
+			return ret;
+		}
+		if (dp83867->tx_id_delay > DP83867_RGMII_TX_CLK_DELAY_MAX) {
+			phydev_err(phydev,
+				   "ti,tx-internal-delay value of %u out of range\n",
+				   dp83867->tx_id_delay);
+			return -EINVAL;
+		}
+	}
 
 	if (of_property_read_bool(of_node, "enet-phy-lane-swap"))
 		dp83867->port_mirroring = DP83867_PORT_MIRROING_EN;
@@ -196,8 +254,20 @@ static int dp83867_of_init(struct phy_device *phydev)
 	if (of_property_read_bool(of_node, "enet-phy-lane-no-swap"))
 		dp83867->port_mirroring = DP83867_PORT_MIRROING_DIS;
 
-	return of_property_read_u32(of_node, "ti,fifo-depth",
+	ret = of_property_read_u32(of_node, "ti,fifo-depth",
 				   &dp83867->fifo_depth);
+	if (ret) {
+		phydev_err(phydev,
+			   "ti,fifo-depth property is required\n");
+		return ret;
+	}
+	if (dp83867->fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) {
+		phydev_err(phydev,
+			   "ti,fifo-depth value %u out of range\n",
+			   dp83867->fifo_depth);
+		return -EINVAL;
+	}
+	return 0;
 }
 #else
 static int dp83867_of_init(struct phy_device *phydev)
@@ -206,25 +276,29 @@ static int dp83867_of_init(struct phy_device *phydev)
 }
 #endif /* CONFIG_OF_MDIO */
 
-static int dp83867_config_init(struct phy_device *phydev)
+static int dp83867_probe(struct phy_device *phydev)
 {
 	struct dp83867_private *dp83867;
+
+	dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
+			       GFP_KERNEL);
+	if (!dp83867)
+		return -ENOMEM;
+
+	phydev->priv = dp83867;
+
+	return 0;
+}
+
+static int dp83867_config_init(struct phy_device *phydev)
+{
+	struct dp83867_private *dp83867 = phydev->priv;
 	int ret, val, bs;
 	u16 delay;
 
-	if (!phydev->priv) {
-		dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
-				       GFP_KERNEL);
-		if (!dp83867)
-			return -ENOMEM;
-
-		phydev->priv = dp83867;
-		ret = dp83867_of_init(phydev);
-		if (ret)
-			return ret;
-	} else {
-		dp83867 = (struct dp83867_private *)phydev->priv;
-	}
+	ret = dp83867_of_init(phydev);
+	if (ret)
+		return ret;
 
 	/* RX_DV/RX_CTRL strapped in mode 1 or mode 2 workaround */
 	if (dp83867->rxctrl_strap_quirk)
@@ -256,9 +330,16 @@ static int dp83867_config_init(struct phy_device *phydev)
 		if (ret)
 			return ret;
 
-		/* Set up RGMII delays */
+		/* If rgmii mode with no internal delay is selected, we do NOT use
+		 * aligned mode as one might expect.  Instead we use the PHY's default
+		 * based on pin strapping.  And the "mode 0" default is to *use*
+		 * internal delay with a value of 7 (2.00 ns).
+		 *
+		 * Set up RGMII delays
+		 */
 		val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL);
 
+		val &= ~(DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN);
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
 			val |= (DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN);
 
@@ -275,14 +356,14 @@ static int dp83867_config_init(struct phy_device *phydev)
 
 		phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL,
 			      delay);
-
-		if (dp83867->io_impedance >= 0)
-			phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
-				       DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL,
-				       dp83867->io_impedance &
-				       DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL);
 	}
 
+	/* If specified, set io impedance */
+	if (dp83867->io_impedance >= 0)
+		phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
+			       DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK,
+			       dp83867->io_impedance);
+
 	if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
 		/* For support SPEED_10 in SGMII mode
 		 * DP83867_10M_SGMII_RATE_ADAPT bit
@@ -321,11 +402,20 @@ static int dp83867_config_init(struct phy_device *phydev)
 		dp83867_config_port_mirroring(phydev);
 
 	/* Clock output selection if muxing property is set */
-	if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK)
+	if (dp83867->set_clk_output) {
+		u16 mask = DP83867_IO_MUX_CFG_CLK_O_DISABLE;
+
+		if (dp83867->clk_output_sel == DP83867_CLK_O_SEL_OFF) {
+			val = DP83867_IO_MUX_CFG_CLK_O_DISABLE;
+		} else {
+			mask |= DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+			val = dp83867->clk_output_sel <<
+			      DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT;
+		}
+
 		phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
-			       DP83867_IO_MUX_CFG_CLK_O_SEL_MASK,
-			       dp83867->clk_output_sel <<
-			       DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+			       mask, val);
+	}
 
 	return 0;
 }
@@ -350,6 +440,7 @@ static struct phy_driver dp83867_driver[] = {
 		.name		= "TI DP83867",
 		/* PHY_GBIT_FEATURES */
 
+		.probe          = dp83867_probe,
 		.config_init	= dp83867_config_init,
 		.soft_reset	= dp83867_phy_reset,
 
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index 314486288119..356bd6472f49 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -262,6 +262,8 @@ static struct phy_driver lxt97x_driver[] = {
 	/* PHY_BASIC_FEATURES */
 	.ack_interrupt	= lxt971_ack_interrupt,
 	.config_intr	= lxt971_config_intr,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
 }, {
 	.phy_id		= 0x00137a10,
 	.name		= "LXT973-A2",
@@ -271,6 +273,8 @@ static struct phy_driver lxt97x_driver[] = {
 	.probe		= lxt973_probe,
 	.config_aneg	= lxt973_config_aneg,
 	.read_status	= lxt973a2_read_status,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
 }, {
 	.phy_id		= 0x00137a10,
 	.name		= "LXT973",
@@ -279,6 +283,8 @@ static struct phy_driver lxt97x_driver[] = {
 	.flags		= 0,
 	.probe		= lxt973_probe,
 	.config_aneg	= lxt973_config_aneg,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
 } };
 
 module_phy_driver(lxt97x_driver);
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
new file mode 100644
index 000000000000..b705d0bd798b
--- /dev/null
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/* NXP TJA1100 BroadRReach PHY driver
+ *
+ * Copyright (C) 2018 Marek Vasut <marex@denx.de>
+ */
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/hwmon.h>
+#include <linux/bitfield.h>
+
+#define PHY_ID_MASK			0xfffffff0
+#define PHY_ID_TJA1100			0x0180dc40
+#define PHY_ID_TJA1101			0x0180dd00
+
+#define MII_ECTRL			17
+#define MII_ECTRL_LINK_CONTROL		BIT(15)
+#define MII_ECTRL_POWER_MODE_MASK	GENMASK(14, 11)
+#define MII_ECTRL_POWER_MODE_NO_CHANGE	(0x0 << 11)
+#define MII_ECTRL_POWER_MODE_NORMAL	(0x3 << 11)
+#define MII_ECTRL_POWER_MODE_STANDBY	(0xc << 11)
+#define MII_ECTRL_CONFIG_EN		BIT(2)
+#define MII_ECTRL_WAKE_REQUEST		BIT(0)
+
+#define MII_CFG1			18
+#define MII_CFG1_AUTO_OP		BIT(14)
+#define MII_CFG1_SLEEP_CONFIRM		BIT(6)
+#define MII_CFG1_LED_MODE_MASK		GENMASK(5, 4)
+#define MII_CFG1_LED_MODE_LINKUP	0
+#define MII_CFG1_LED_ENABLE		BIT(3)
+
+#define MII_CFG2			19
+#define MII_CFG2_SLEEP_REQUEST_TO	GENMASK(1, 0)
+#define MII_CFG2_SLEEP_REQUEST_TO_16MS	0x3
+
+#define MII_INTSRC			21
+#define MII_INTSRC_TEMP_ERR		BIT(1)
+#define MII_INTSRC_UV_ERR		BIT(3)
+
+#define MII_COMMSTAT			23
+#define MII_COMMSTAT_LINK_UP		BIT(15)
+
+#define MII_GENSTAT			24
+#define MII_GENSTAT_PLL_LOCKED		BIT(14)
+
+#define MII_COMMCFG			27
+#define MII_COMMCFG_AUTO_OP		BIT(15)
+
+struct tja11xx_priv {
+	char		*hwmon_name;
+	struct device	*hwmon_dev;
+};
+
+struct tja11xx_phy_stats {
+	const char	*string;
+	u8		reg;
+	u8		off;
+	u16		mask;
+};
+
+static struct tja11xx_phy_stats tja11xx_hw_stats[] = {
+	{ "phy_symbol_error_count", 20, 0, GENMASK(15, 0) },
+	{ "phy_polarity_detect", 25, 6, BIT(6) },
+	{ "phy_open_detect", 25, 7, BIT(7) },
+	{ "phy_short_detect", 25, 8, BIT(8) },
+	{ "phy_rem_rcvr_count", 26, 0, GENMASK(7, 0) },
+	{ "phy_loc_rcvr_count", 26, 8, GENMASK(15, 8) },
+};
+
+static int tja11xx_check(struct phy_device *phydev, u8 reg, u16 mask, u16 set)
+{
+	int i, ret;
+
+	for (i = 0; i < 200; i++) {
+		ret = phy_read(phydev, reg);
+		if (ret < 0)
+			return ret;
+
+		if ((ret & mask) == set)
+			return 0;
+
+		usleep_range(100, 150);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int phy_modify_check(struct phy_device *phydev, u8 reg,
+			    u16 mask, u16 set)
+{
+	int ret;
+
+	ret = phy_modify(phydev, reg, mask, set);
+	if (ret)
+		return ret;
+
+	return tja11xx_check(phydev, reg, mask, set);
+}
+
+static int tja11xx_enable_reg_write(struct phy_device *phydev)
+{
+	return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_CONFIG_EN);
+}
+
+static int tja11xx_enable_link_control(struct phy_device *phydev)
+{
+	return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_LINK_CONTROL);
+}
+
+static int tja11xx_wakeup(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_ECTRL);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & MII_ECTRL_POWER_MODE_MASK) {
+	case MII_ECTRL_POWER_MODE_NO_CHANGE:
+		break;
+	case MII_ECTRL_POWER_MODE_NORMAL:
+		ret = phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_WAKE_REQUEST);
+		if (ret)
+			return ret;
+
+		ret = phy_clear_bits(phydev, MII_ECTRL, MII_ECTRL_WAKE_REQUEST);
+		if (ret)
+			return ret;
+		break;
+	case MII_ECTRL_POWER_MODE_STANDBY:
+		ret = phy_modify_check(phydev, MII_ECTRL,
+				       MII_ECTRL_POWER_MODE_MASK,
+				       MII_ECTRL_POWER_MODE_STANDBY);
+		if (ret)
+			return ret;
+
+		ret = phy_modify(phydev, MII_ECTRL, MII_ECTRL_POWER_MODE_MASK,
+				 MII_ECTRL_POWER_MODE_NORMAL);
+		if (ret)
+			return ret;
+
+		ret = phy_modify_check(phydev, MII_GENSTAT,
+				       MII_GENSTAT_PLL_LOCKED,
+				       MII_GENSTAT_PLL_LOCKED);
+		if (ret)
+			return ret;
+
+		return tja11xx_enable_link_control(phydev);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int tja11xx_soft_reset(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = tja11xx_enable_reg_write(phydev);
+	if (ret)
+		return ret;
+
+	return genphy_soft_reset(phydev);
+}
+
+static int tja11xx_config_init(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = tja11xx_enable_reg_write(phydev);
+	if (ret)
+		return ret;
+
+	phydev->autoneg = AUTONEG_DISABLE;
+	phydev->speed = SPEED_100;
+	phydev->duplex = DUPLEX_FULL;
+
+	switch (phydev->phy_id & PHY_ID_MASK) {
+	case PHY_ID_TJA1100:
+		ret = phy_modify(phydev, MII_CFG1,
+				 MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
+				 MII_CFG1_LED_ENABLE,
+				 MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
+				 MII_CFG1_LED_ENABLE);
+		if (ret)
+			return ret;
+		break;
+	case PHY_ID_TJA1101:
+		ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = phy_clear_bits(phydev, MII_CFG1, MII_CFG1_SLEEP_CONFIRM);
+	if (ret)
+		return ret;
+
+	ret = phy_modify(phydev, MII_CFG2, MII_CFG2_SLEEP_REQUEST_TO,
+			 MII_CFG2_SLEEP_REQUEST_TO_16MS);
+	if (ret)
+		return ret;
+
+	ret = tja11xx_wakeup(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* ACK interrupts by reading the status register */
+	ret = phy_read(phydev, MII_INTSRC);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int tja11xx_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_update_link(phydev);
+	if (ret)
+		return ret;
+
+	if (phydev->link) {
+		ret = phy_read(phydev, MII_COMMSTAT);
+		if (ret < 0)
+			return ret;
+
+		if (!(ret & MII_COMMSTAT_LINK_UP))
+			phydev->link = 0;
+	}
+
+	return 0;
+}
+
+static int tja11xx_get_sset_count(struct phy_device *phydev)
+{
+	return ARRAY_SIZE(tja11xx_hw_stats);
+}
+
+static void tja11xx_get_strings(struct phy_device *phydev, u8 *data)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) {
+		strncpy(data + i * ETH_GSTRING_LEN,
+			tja11xx_hw_stats[i].string, ETH_GSTRING_LEN);
+	}
+}
+
+static void tja11xx_get_stats(struct phy_device *phydev,
+			      struct ethtool_stats *stats, u64 *data)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) {
+		ret = phy_read(phydev, tja11xx_hw_stats[i].reg);
+		if (ret < 0)
+			data[i] = U64_MAX;
+		else {
+			data[i] = ret & tja11xx_hw_stats[i].mask;
+			data[i] >>= tja11xx_hw_stats[i].off;
+		}
+	}
+}
+
+static int tja11xx_hwmon_read(struct device *dev,
+			      enum hwmon_sensor_types type,
+			      u32 attr, int channel, long *value)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	int ret;
+
+	if (type == hwmon_in && attr == hwmon_in_lcrit_alarm) {
+		ret = phy_read(phydev, MII_INTSRC);
+		if (ret < 0)
+			return ret;
+
+		*value = !!(ret & MII_INTSRC_TEMP_ERR);
+		return 0;
+	}
+
+	if (type == hwmon_temp && attr == hwmon_temp_crit_alarm) {
+		ret = phy_read(phydev, MII_INTSRC);
+		if (ret < 0)
+			return ret;
+
+		*value = !!(ret & MII_INTSRC_UV_ERR);
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static umode_t tja11xx_hwmon_is_visible(const void *data,
+					enum hwmon_sensor_types type,
+					u32 attr, int channel)
+{
+	if (type == hwmon_in && attr == hwmon_in_lcrit_alarm)
+		return 0444;
+
+	if (type == hwmon_temp && attr == hwmon_temp_crit_alarm)
+		return 0444;
+
+	return 0;
+}
+
+static const struct hwmon_channel_info *tja11xx_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(in, HWMON_I_LCRIT_ALARM),
+	HWMON_CHANNEL_INFO(temp, HWMON_T_CRIT_ALARM),
+	NULL
+};
+
+static const struct hwmon_ops tja11xx_hwmon_hwmon_ops = {
+	.is_visible	= tja11xx_hwmon_is_visible,
+	.read		= tja11xx_hwmon_read,
+};
+
+static const struct hwmon_chip_info tja11xx_hwmon_chip_info = {
+	.ops		= &tja11xx_hwmon_hwmon_ops,
+	.info		= tja11xx_hwmon_info,
+};
+
+static int tja11xx_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct tja11xx_priv *priv;
+	int i;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->hwmon_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+	if (!priv->hwmon_name)
+		return -ENOMEM;
+
+	for (i = 0; priv->hwmon_name[i]; i++)
+		if (hwmon_is_bad_char(priv->hwmon_name[i]))
+			priv->hwmon_name[i] = '_';
+
+	priv->hwmon_dev =
+		devm_hwmon_device_register_with_info(dev, priv->hwmon_name,
+						     phydev,
+						     &tja11xx_hwmon_chip_info,
+						     NULL);
+
+	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
+}
+
+static struct phy_driver tja11xx_driver[] = {
+	{
+		PHY_ID_MATCH_MODEL(PHY_ID_TJA1100),
+		.name		= "NXP TJA1100",
+		.features       = PHY_BASIC_T1_FEATURES,
+		.probe		= tja11xx_probe,
+		.soft_reset	= tja11xx_soft_reset,
+		.config_init	= tja11xx_config_init,
+		.read_status	= tja11xx_read_status,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.set_loopback   = genphy_loopback,
+		/* Statistics */
+		.get_sset_count = tja11xx_get_sset_count,
+		.get_strings	= tja11xx_get_strings,
+		.get_stats	= tja11xx_get_stats,
+	}, {
+		PHY_ID_MATCH_MODEL(PHY_ID_TJA1101),
+		.name		= "NXP TJA1101",
+		.features       = PHY_BASIC_T1_FEATURES,
+		.probe		= tja11xx_probe,
+		.soft_reset	= tja11xx_soft_reset,
+		.config_init	= tja11xx_config_init,
+		.read_status	= tja11xx_read_status,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.set_loopback   = genphy_loopback,
+		/* Statistics */
+		.get_sset_count = tja11xx_get_sset_count,
+		.get_strings	= tja11xx_get_strings,
+		.get_stats	= tja11xx_get_stats,
+	}
+};
+
+module_phy_driver(tja11xx_driver);
+
+static struct mdio_device_id __maybe_unused tja11xx_tbl[] = {
+	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA1100) },
+	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA1101) },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, tja11xx_tbl);
+
+MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
+MODULE_DESCRIPTION("NXP TJA11xx BoardR-Reach PHY driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 3daf0214a242..16667fbac8bf 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -8,7 +8,7 @@
 
 const char *phy_speed_to_str(int speed)
 {
-	BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 67,
+	BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 69,
 		"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
 		"If a speed or mode has been added please update phy_speed_to_str "
 		"and the PHY settings array.\n");
@@ -131,9 +131,11 @@ static const struct phy_setting settings[] = {
 	PHY_SETTING(   1000, FULL,   1000baseKX_Full		),
 	PHY_SETTING(   1000, FULL,   1000baseT_Full		),
 	PHY_SETTING(   1000, HALF,   1000baseT_Half		),
+	PHY_SETTING(   1000, FULL,   1000baseT1_Full		),
 	PHY_SETTING(   1000, FULL,   1000baseX_Full		),
 	/* 100M */
 	PHY_SETTING(    100, FULL,    100baseT_Full		),
+	PHY_SETTING(    100, FULL,    100baseT1_Full		),
 	PHY_SETTING(    100, HALF,    100baseT_Half		),
 	/* 10M */
 	PHY_SETTING(     10, FULL,     10baseT_Full		),
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e8885429293a..84671d868a80 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -29,6 +29,8 @@
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
 
+#define PHY_STATE_TIME	HZ
+
 #define PHY_STATE_STR(_state)			\
 	case PHY_##_state:			\
 		return __stringify(_state);	\
@@ -478,12 +480,12 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 }
 EXPORT_SYMBOL(phy_mii_ioctl);
 
-static void phy_queue_state_machine(struct phy_device *phydev,
-				    unsigned int secs)
+void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies)
 {
 	mod_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-			 secs * HZ);
+			 jiffies);
 }
+EXPORT_SYMBOL(phy_queue_state_machine);
 
 static void phy_trigger_machine(struct phy_device *phydev)
 {
@@ -772,8 +774,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
 		return IRQ_NONE;
 
-	/* reschedule state queue work to run as soon as possible */
-	phy_trigger_machine(phydev);
+	if (phydev->drv->handle_interrupt) {
+		if (phydev->drv->handle_interrupt(phydev))
+			goto phy_err;
+	} else {
+		/* reschedule state queue work to run as soon as possible */
+		phy_trigger_machine(phydev);
+	}
 
 	if (phy_clear_interrupt(phydev))
 		goto phy_err;
@@ -799,10 +806,10 @@ static int phy_enable_interrupts(struct phy_device *phydev)
 }
 
 /**
- * phy_request_interrupt - request interrupt for a PHY device
+ * phy_request_interrupt - request and enable interrupt for a PHY device
  * @phydev: target phy_device struct
  *
- * Description: Request the interrupt for the given PHY.
+ * Description: Request and enable the interrupt for the given PHY.
  *   If this fails, then we set irq to PHY_POLL.
  *   This should only be called with a valid IRQ number.
  */
@@ -817,11 +824,31 @@ void phy_request_interrupt(struct phy_device *phydev)
 		phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n",
 			    err, phydev->irq);
 		phydev->irq = PHY_POLL;
+	} else {
+		if (phy_enable_interrupts(phydev)) {
+			phydev_warn(phydev, "Can't enable interrupt, falling back to polling\n");
+			phy_free_interrupt(phydev);
+			phydev->irq = PHY_POLL;
+		}
 	}
 }
 EXPORT_SYMBOL(phy_request_interrupt);
 
 /**
+ * phy_free_interrupt - disable and free interrupt for a PHY device
+ * @phydev: target phy_device struct
+ *
+ * Description: Disable and free the interrupt for the given PHY.
+ *   This should only be called with a valid IRQ number.
+ */
+void phy_free_interrupt(struct phy_device *phydev)
+{
+	phy_disable_interrupts(phydev);
+	free_irq(phydev->irq, phydev);
+}
+EXPORT_SYMBOL(phy_free_interrupt);
+
+/**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
  */
@@ -835,9 +862,6 @@ void phy_stop(struct phy_device *phydev)
 
 	mutex_lock(&phydev->lock);
 
-	if (phy_interrupt_is_valid(phydev))
-		phy_disable_interrupts(phydev);
-
 	phydev->state = PHY_HALTED;
 
 	mutex_unlock(&phydev->lock);
@@ -864,8 +888,6 @@ EXPORT_SYMBOL(phy_stop);
  */
 void phy_start(struct phy_device *phydev)
 {
-	int err;
-
 	mutex_lock(&phydev->lock);
 
 	if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) {
@@ -877,13 +899,6 @@ void phy_start(struct phy_device *phydev)
 	/* if phy was suspended, bring the physical link up again */
 	__phy_resume(phydev);
 
-	/* make sure interrupts are enabled for the PHY */
-	if (phy_interrupt_is_valid(phydev)) {
-		err = phy_enable_interrupts(phydev);
-		if (err < 0)
-			goto out;
-	}
-
 	phydev->state = PHY_UP;
 
 	phy_start_machine(phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index dcc93a873174..2c879ba01f35 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(phy_10_100_features_array);
 
 const int phy_basic_t1_features_array[2] = {
 	ETHTOOL_LINK_MODE_TP_BIT,
-	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
 };
 EXPORT_SYMBOL_GPL(phy_basic_t1_features_array);
 
@@ -948,6 +948,9 @@ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
 {
 	int rc;
 
+	if (!dev)
+		return -EINVAL;
+
 	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
 	if (rc)
 		return rc;
@@ -1013,7 +1016,7 @@ void phy_disconnect(struct phy_device *phydev)
 		phy_stop(phydev);
 
 	if (phy_interrupt_is_valid(phydev))
-		free_irq(phydev->irq, phydev);
+		phy_free_interrupt(phydev);
 
 	phydev->adjust_link = NULL;
 
@@ -1133,6 +1136,44 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 }
 EXPORT_SYMBOL(phy_attached_print);
 
+static void phy_sysfs_create_links(struct phy_device *phydev)
+{
+	struct net_device *dev = phydev->attached_dev;
+	int err;
+
+	if (!dev)
+		return;
+
+	err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
+				"attached_dev");
+	if (err)
+		return;
+
+	err = sysfs_create_link_nowarn(&dev->dev.kobj,
+				       &phydev->mdio.dev.kobj,
+				       "phydev");
+	if (err) {
+		dev_err(&dev->dev, "could not add device link to %s err %d\n",
+			kobject_name(&phydev->mdio.dev.kobj),
+			err);
+		/* non-fatal - some net drivers can use one netdevice
+		 * with more then one phy
+		 */
+	}
+
+	phydev->sysfs_links = true;
+}
+
+static ssize_t
+phy_standalone_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+
+	return sprintf(buf, "%d\n", !phydev->attached_dev);
+}
+static DEVICE_ATTR_RO(phy_standalone);
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -1151,9 +1192,9 @@ EXPORT_SYMBOL(phy_attached_print);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		      u32 flags, phy_interface_t interface)
 {
-	struct module *ndev_owner = dev->dev.parent->driver->owner;
 	struct mii_bus *bus = phydev->mdio.bus;
 	struct device *d = &phydev->mdio.dev;
+	struct module *ndev_owner = NULL;
 	bool using_genphy = false;
 	int err;
 
@@ -1162,8 +1203,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	 * our own module->refcnt here, otherwise we would not be able to
 	 * unload later on.
 	 */
+	if (dev)
+		ndev_owner = dev->dev.parent->driver->owner;
 	if (ndev_owner != bus->owner && !try_module_get(bus->owner)) {
-		dev_err(&dev->dev, "failed to get the bus module\n");
+		phydev_err(phydev, "failed to get the bus module\n");
 		return -EIO;
 	}
 
@@ -1182,7 +1225,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	}
 
 	if (!try_module_get(d->driver->owner)) {
-		dev_err(&dev->dev, "failed to get the device driver module\n");
+		phydev_err(phydev, "failed to get the device driver module\n");
 		err = -EIO;
 		goto error_put_device;
 	}
@@ -1203,8 +1246,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	}
 
 	phydev->phy_link_change = phy_link_change;
-	phydev->attached_dev = dev;
-	dev->phydev = phydev;
+	if (dev) {
+		phydev->attached_dev = dev;
+		dev->phydev = phydev;
+	}
 
 	/* Some Ethernet drivers try to connect to a PHY device before
 	 * calling register_netdevice() -> netdev_register_kobject() and
@@ -1216,22 +1261,13 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	 */
 	phydev->sysfs_links = false;
 
-	err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
-				"attached_dev");
-	if (!err) {
-		err = sysfs_create_link_nowarn(&dev->dev.kobj,
-					       &phydev->mdio.dev.kobj,
-					       "phydev");
-		if (err) {
-			dev_err(&dev->dev, "could not add device link to %s err %d\n",
-				kobject_name(&phydev->mdio.dev.kobj),
-				err);
-			/* non-fatal - some net drivers can use one netdevice
-			 * with more then one phy
-			 */
-		}
+	phy_sysfs_create_links(phydev);
 
-		phydev->sysfs_links = true;
+	if (!phydev->attached_dev) {
+		err = sysfs_create_file(&phydev->mdio.dev.kobj,
+					&dev_attr_phy_standalone.attr);
+		if (err)
+			phydev_err(phydev, "error creating 'phy_standalone' sysfs entry\n");
 	}
 
 	phydev->dev_flags = flags;
@@ -1243,7 +1279,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Initial carrier state is off as the phy is about to be
 	 * (re)initialized.
 	 */
-	netif_carrier_off(phydev->attached_dev);
+	if (dev)
+		netif_carrier_off(phydev->attached_dev);
 
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
@@ -1290,6 +1327,9 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
 	struct device *d;
 	int rc;
 
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
 	/* Search the list of PHY devices on the mdio bus for the
 	 * PHY with the requested name
 	 */
@@ -1349,16 +1389,24 @@ EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
 void phy_detach(struct phy_device *phydev)
 {
 	struct net_device *dev = phydev->attached_dev;
-	struct module *ndev_owner = dev->dev.parent->driver->owner;
+	struct module *ndev_owner = NULL;
 	struct mii_bus *bus;
 
 	if (phydev->sysfs_links) {
-		sysfs_remove_link(&dev->dev.kobj, "phydev");
+		if (dev)
+			sysfs_remove_link(&dev->dev.kobj, "phydev");
 		sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
 	}
+
+	if (!phydev->attached_dev)
+		sysfs_remove_file(&phydev->mdio.dev.kobj,
+				  &dev_attr_phy_standalone.attr);
+
 	phy_suspend(phydev);
-	phydev->attached_dev->phydev = NULL;
-	phydev->attached_dev = NULL;
+	if (dev) {
+		phydev->attached_dev->phydev = NULL;
+		phydev->attached_dev = NULL;
+	}
 	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
@@ -1381,6 +1429,8 @@ void phy_detach(struct phy_device *phydev)
 	bus = phydev->mdio.bus;
 
 	put_device(&phydev->mdio.dev);
+	if (dev)
+		ndev_owner = dev->dev.parent->driver->owner;
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
 
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9044b95d2afe..b7300ea2663c 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -41,6 +41,9 @@ struct phylink {
 	/* private: */
 	struct net_device *netdev;
 	const struct phylink_mac_ops *ops;
+	struct phylink_config *config;
+	struct device *dev;
+	unsigned int old_link_state:1;
 
 	unsigned long phylink_disable_state; /* bitmask of disables */
 	struct phy_device *phydev;
@@ -69,6 +72,23 @@ struct phylink {
 	struct sfp_bus *sfp_bus;
 };
 
+#define phylink_printk(level, pl, fmt, ...) \
+	do { \
+		if ((pl)->config->type == PHYLINK_NETDEV) \
+			netdev_printk(level, (pl)->netdev, fmt, ##__VA_ARGS__); \
+		else if ((pl)->config->type == PHYLINK_DEV) \
+			dev_printk(level, (pl)->dev, fmt, ##__VA_ARGS__); \
+	} while (0)
+
+#define phylink_err(pl, fmt, ...) \
+	phylink_printk(KERN_ERR, pl, fmt, ##__VA_ARGS__)
+#define phylink_warn(pl, fmt, ...) \
+	phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__)
+#define phylink_info(pl, fmt, ...) \
+	phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__)
+#define phylink_dbg(pl, fmt, ...) \
+	phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__)
+
 /**
  * phylink_set_port_modes() - set the port type modes in the ethtool mask
  * @mask: ethtool link mode mask
@@ -115,7 +135,7 @@ static const char *phylink_an_mode_str(unsigned int mode)
 static int phylink_validate(struct phylink *pl, unsigned long *supported,
 			    struct phylink_link_state *state)
 {
-	pl->ops->validate(pl->netdev, supported, state);
+	pl->ops->validate(pl->config, supported, state);
 
 	return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
 }
@@ -165,7 +185,7 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 		ret = fwnode_property_read_u32_array(fwnode, "fixed-link",
 						     NULL, 0);
 		if (ret != ARRAY_SIZE(prop)) {
-			netdev_err(pl->netdev, "broken fixed-link?\n");
+			phylink_err(pl, "broken fixed-link?\n");
 			return -EINVAL;
 		}
 
@@ -184,8 +204,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 
 	if (pl->link_config.speed > SPEED_1000 &&
 	    pl->link_config.duplex != DUPLEX_FULL)
-		netdev_warn(pl->netdev, "fixed link specifies half duplex for %dMbps link?\n",
-			    pl->link_config.speed);
+		phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n",
+			     pl->link_config.speed);
 
 	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	linkmode_copy(pl->link_config.advertising, pl->supported);
@@ -198,9 +218,9 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 	if (s) {
 		__set_bit(s->bit, pl->supported);
 	} else {
-		netdev_warn(pl->netdev, "fixed link %s duplex %dMbps not recognised\n",
-			    pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
-			    pl->link_config.speed);
+		phylink_warn(pl, "fixed link %s duplex %dMbps not recognised\n",
+			     pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
+			     pl->link_config.speed);
 	}
 
 	linkmode_and(pl->link_config.advertising, pl->link_config.advertising,
@@ -225,8 +245,8 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 	if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
 	    strcmp(managed, "in-band-status") == 0) {
 		if (pl->link_an_mode == MLO_AN_FIXED) {
-			netdev_err(pl->netdev,
-				   "can't use both fixed-link and in-band-status\n");
+			phylink_err(pl,
+				    "can't use both fixed-link and in-band-status\n");
 			return -EINVAL;
 		}
 
@@ -273,17 +293,17 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 			break;
 
 		default:
-			netdev_err(pl->netdev,
-				   "incorrect link mode %s for in-band status\n",
-				   phy_modes(pl->link_config.interface));
+			phylink_err(pl,
+				    "incorrect link mode %s for in-band status\n",
+				    phy_modes(pl->link_config.interface));
 			return -EINVAL;
 		}
 
 		linkmode_copy(pl->link_config.advertising, pl->supported);
 
 		if (phylink_validate(pl, pl->supported, &pl->link_config)) {
-			netdev_err(pl->netdev,
-				   "failed to validate link configuration for in-band status\n");
+			phylink_err(pl,
+				    "failed to validate link configuration for in-band status\n");
 			return -EINVAL;
 		}
 	}
@@ -294,16 +314,16 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 static void phylink_mac_config(struct phylink *pl,
 			       const struct phylink_link_state *state)
 {
-	netdev_dbg(pl->netdev,
-		   "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
-		   __func__, phylink_an_mode_str(pl->link_an_mode),
-		   phy_modes(state->interface),
-		   phy_speed_to_str(state->speed),
-		   phy_duplex_to_str(state->duplex),
-		   __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
-		   state->pause, state->link, state->an_enabled);
-
-	pl->ops->mac_config(pl->netdev, pl->link_an_mode, state);
+	phylink_dbg(pl,
+		    "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
+		    __func__, phylink_an_mode_str(pl->link_an_mode),
+		    phy_modes(state->interface),
+		    phy_speed_to_str(state->speed),
+		    phy_duplex_to_str(state->duplex),
+		    __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
+		    state->pause, state->link, state->an_enabled);
+
+	pl->ops->mac_config(pl->config, pl->link_an_mode, state);
 }
 
 static void phylink_mac_config_up(struct phylink *pl,
@@ -317,12 +337,11 @@ static void phylink_mac_an_restart(struct phylink *pl)
 {
 	if (pl->link_config.an_enabled &&
 	    phy_interface_mode_is_8023z(pl->link_config.interface))
-		pl->ops->mac_an_restart(pl->netdev);
+		pl->ops->mac_an_restart(pl->config);
 }
 
 static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *state)
 {
-	struct net_device *ndev = pl->netdev;
 
 	linkmode_copy(state->advertising, pl->link_config.advertising);
 	linkmode_zero(state->lp_advertising);
@@ -334,7 +353,7 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *
 	state->an_complete = 0;
 	state->link = 1;
 
-	return pl->ops->mac_link_state(ndev, state);
+	return pl->ops->mac_link_state(pl->config, state);
 }
 
 /* The fixed state is... fixed except for the link state,
@@ -399,11 +418,43 @@ static const char *phylink_pause_to_str(int pause)
 	}
 }
 
+static void phylink_mac_link_up(struct phylink *pl,
+				struct phylink_link_state link_state)
+{
+	struct net_device *ndev = pl->netdev;
+
+	pl->cur_interface = link_state.interface;
+	pl->ops->mac_link_up(pl->config, pl->link_an_mode,
+			     pl->phy_state.interface,
+			     pl->phydev);
+
+	if (ndev)
+		netif_carrier_on(ndev);
+
+	phylink_info(pl,
+		     "Link is Up - %s/%s - flow control %s\n",
+		     phy_speed_to_str(link_state.speed),
+		     phy_duplex_to_str(link_state.duplex),
+		     phylink_pause_to_str(link_state.pause));
+}
+
+static void phylink_mac_link_down(struct phylink *pl)
+{
+	struct net_device *ndev = pl->netdev;
+
+	if (ndev)
+		netif_carrier_off(ndev);
+	pl->ops->mac_link_down(pl->config, pl->link_an_mode,
+			       pl->cur_interface);
+	phylink_info(pl, "Link is Down\n");
+}
+
 static void phylink_resolve(struct work_struct *w)
 {
 	struct phylink *pl = container_of(w, struct phylink, resolve);
 	struct phylink_link_state link_state;
 	struct net_device *ndev = pl->netdev;
+	int link_changed;
 
 	mutex_lock(&pl->state_mutex);
 	if (pl->phylink_disable_state) {
@@ -446,25 +497,17 @@ static void phylink_resolve(struct work_struct *w)
 		}
 	}
 
-	if (link_state.link != netif_carrier_ok(ndev)) {
-		if (!link_state.link) {
-			netif_carrier_off(ndev);
-			pl->ops->mac_link_down(ndev, pl->link_an_mode,
-					       pl->cur_interface);
-			netdev_info(ndev, "Link is Down\n");
-		} else {
-			pl->cur_interface = link_state.interface;
-			pl->ops->mac_link_up(ndev, pl->link_an_mode,
-					     pl->cur_interface, pl->phydev);
-
-			netif_carrier_on(ndev);
-
-			netdev_info(ndev,
-				    "Link is Up - %s/%s - flow control %s\n",
-				    phy_speed_to_str(link_state.speed),
-				    phy_duplex_to_str(link_state.duplex),
-				    phylink_pause_to_str(link_state.pause));
-		}
+	if (pl->netdev)
+		link_changed = (link_state.link != netif_carrier_ok(ndev));
+	else
+		link_changed = (link_state.link != pl->old_link_state);
+
+	if (link_changed) {
+		pl->old_link_state = link_state.link;
+		if (!link_state.link)
+			phylink_mac_link_down(pl);
+		else
+			phylink_mac_link_up(pl, link_state);
 	}
 	if (!link_state.link && pl->mac_link_dropped) {
 		pl->mac_link_dropped = false;
@@ -516,8 +559,8 @@ static int phylink_register_sfp(struct phylink *pl,
 		if (ret == -ENOENT)
 			return 0;
 
-		netdev_err(pl->netdev, "unable to parse \"sfp\" node: %d\n",
-			   ret);
+		phylink_err(pl, "unable to parse \"sfp\" node: %d\n",
+			    ret);
 		return ret;
 	}
 
@@ -543,7 +586,7 @@ static int phylink_register_sfp(struct phylink *pl,
  * Returns a pointer to a &struct phylink, or an error-pointer value. Users
  * must use IS_ERR() to check for errors from this function.
  */
-struct phylink *phylink_create(struct net_device *ndev,
+struct phylink *phylink_create(struct phylink_config *config,
 			       struct fwnode_handle *fwnode,
 			       phy_interface_t iface,
 			       const struct phylink_mac_ops *ops)
@@ -557,7 +600,17 @@ struct phylink *phylink_create(struct net_device *ndev,
 
 	mutex_init(&pl->state_mutex);
 	INIT_WORK(&pl->resolve, phylink_resolve);
-	pl->netdev = ndev;
+
+	pl->config = config;
+	if (config->type == PHYLINK_NETDEV) {
+		pl->netdev = to_net_dev(config->dev);
+	} else if (config->type == PHYLINK_DEV) {
+		pl->dev = config->dev;
+	} else {
+		kfree(pl);
+		return ERR_PTR(-EINVAL);
+	}
+
 	pl->phy_state.interface = iface;
 	pl->link_interface = iface;
 	if (iface == PHY_INTERFACE_MODE_MOCA)
@@ -639,10 +692,10 @@ static void phylink_phy_change(struct phy_device *phydev, bool up,
 
 	phylink_run_resolve(pl);
 
-	netdev_dbg(pl->netdev, "phy link %s %s/%s/%s\n", up ? "up" : "down",
-		   phy_modes(phydev->interface),
-		   phy_speed_to_str(phydev->speed),
-		   phy_duplex_to_str(phydev->duplex));
+	phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+		    phy_modes(phydev->interface),
+		    phy_speed_to_str(phydev->speed),
+		    phy_duplex_to_str(phydev->duplex));
 }
 
 static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
@@ -675,9 +728,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 	phy->phylink = pl;
 	phy->phy_link_change = phylink_phy_change;
 
-	netdev_info(pl->netdev,
-		    "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
-		    phy->drv->name);
+	phylink_info(pl,
+		     "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
+		     phy->drv->name);
 
 	mutex_lock(&phy->lock);
 	mutex_lock(&pl->state_mutex);
@@ -690,10 +743,10 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 	mutex_unlock(&pl->state_mutex);
 	mutex_unlock(&phy->lock);
 
-	netdev_dbg(pl->netdev,
-		   "phy: setting supported %*pb advertising %*pb\n",
-		   __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
-		   __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising);
+	phylink_dbg(pl,
+		    "phy: setting supported %*pb advertising %*pb\n",
+		    __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising);
 
 	if (phy_interrupt_is_valid(phy))
 		phy_request_interrupt(phy);
@@ -871,7 +924,7 @@ void phylink_mac_change(struct phylink *pl, bool up)
 	if (!up)
 		pl->mac_link_dropped = true;
 	phylink_run_resolve(pl);
-	netdev_dbg(pl->netdev, "mac link %s\n", up ? "up" : "down");
+	phylink_dbg(pl, "mac link %s\n", up ? "up" : "down");
 }
 EXPORT_SYMBOL_GPL(phylink_mac_change);
 
@@ -887,12 +940,13 @@ void phylink_start(struct phylink *pl)
 {
 	ASSERT_RTNL();
 
-	netdev_info(pl->netdev, "configuring for %s/%s link mode\n",
-		    phylink_an_mode_str(pl->link_an_mode),
-		    phy_modes(pl->link_config.interface));
+	phylink_info(pl, "configuring for %s/%s link mode\n",
+		     phylink_an_mode_str(pl->link_an_mode),
+		     phy_modes(pl->link_config.interface));
 
 	/* Always set the carrier off */
-	netif_carrier_off(pl->netdev);
+	if (pl->netdev)
+		netif_carrier_off(pl->netdev);
 
 	/* Apply the link configuration to the MAC when starting. This allows
 	 * a fixed-link to start with the correct parameters, and also
@@ -1237,7 +1291,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
 		switch (pl->link_an_mode) {
 		case MLO_AN_PHY:
 			/* Silently mark the carrier down, and then trigger a resolve */
-			netif_carrier_off(pl->netdev);
+			if (pl->netdev)
+				netif_carrier_off(pl->netdev);
 			phylink_run_resolve(pl);
 			break;
 
@@ -1598,33 +1653,33 @@ static int phylink_sfp_module_insert(void *upstream,
 	/* Ignore errors if we're expecting a PHY to attach later */
 	ret = phylink_validate(pl, support, &config);
 	if (ret) {
-		netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
-			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		phylink_err(pl, "validation with support %*pb failed: %d\n",
+			    __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
 		return ret;
 	}
 
 	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
 	if (iface == PHY_INTERFACE_MODE_NA) {
-		netdev_err(pl->netdev,
-			   "selection of interface failed, advertisement %*pb\n",
-			   __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+		phylink_err(pl,
+			    "selection of interface failed, advertisement %*pb\n",
+			    __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
 		return -EINVAL;
 	}
 
 	config.interface = iface;
 	ret = phylink_validate(pl, support, &config);
 	if (ret) {
-		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
-			   phylink_an_mode_str(MLO_AN_INBAND),
-			   phy_modes(config.interface),
-			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n",
+			    phylink_an_mode_str(MLO_AN_INBAND),
+			    phy_modes(config.interface),
+			    __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
 		return ret;
 	}
 
-	netdev_dbg(pl->netdev, "requesting link mode %s/%s with support %*pb\n",
-		   phylink_an_mode_str(MLO_AN_INBAND),
-		   phy_modes(config.interface),
-		   __ETHTOOL_LINK_MODE_MASK_NBITS, support);
+	phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n",
+		    phylink_an_mode_str(MLO_AN_INBAND),
+		    phy_modes(config.interface),
+		    __ETHTOOL_LINK_MODE_MASK_NBITS, support);
 
 	if (phy_interface_mode_is_8023z(iface) && pl->phydev)
 		return -EINVAL;
@@ -1643,9 +1698,9 @@ static int phylink_sfp_module_insert(void *upstream,
 
 		changed = true;
 
-		netdev_info(pl->netdev, "switched to %s/%s link mode\n",
-			    phylink_an_mode_str(MLO_AN_INBAND),
-			    phy_modes(config.interface));
+		phylink_info(pl, "switched to %s/%s link mode\n",
+			     phylink_an_mode_str(MLO_AN_INBAND),
+			     phy_modes(config.interface));
 	}
 
 	pl->link_port = port;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index d4635c2178d1..554acc869c25 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/acpi.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
@@ -1782,6 +1783,7 @@ static void sfp_cleanup(void *data)
 static int sfp_probe(struct platform_device *pdev)
 {
 	const struct sff_data *sff;
+	struct i2c_adapter *i2c;
 	struct sfp *sfp;
 	bool poll = false;
 	int irq, err, i;
@@ -1801,7 +1803,6 @@ static int sfp_probe(struct platform_device *pdev)
 	if (pdev->dev.of_node) {
 		struct device_node *node = pdev->dev.of_node;
 		const struct of_device_id *id;
-		struct i2c_adapter *i2c;
 		struct device_node *np;
 
 		id = of_match_node(sfp_of_match, node);
@@ -1818,14 +1819,32 @@ static int sfp_probe(struct platform_device *pdev)
 
 		i2c = of_find_i2c_adapter_by_node(np);
 		of_node_put(np);
-		if (!i2c)
-			return -EPROBE_DEFER;
-
-		err = sfp_i2c_configure(sfp, i2c);
-		if (err < 0) {
-			i2c_put_adapter(i2c);
-			return err;
+	} else if (has_acpi_companion(&pdev->dev)) {
+		struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+		struct fwnode_handle *fw = acpi_fwnode_handle(adev);
+		struct fwnode_reference_args args;
+		struct acpi_handle *acpi_handle;
+		int ret;
+
+		ret = acpi_node_get_property_reference(fw, "i2c-bus", 0, &args);
+		if (ACPI_FAILURE(ret) || !is_acpi_device_node(args.fwnode)) {
+			dev_err(&pdev->dev, "missing 'i2c-bus' property\n");
+			return -ENODEV;
 		}
+
+		acpi_handle = ACPI_HANDLE_FWNODE(args.fwnode);
+		i2c = i2c_acpi_find_adapter_by_handle(acpi_handle);
+	} else {
+		return -EINVAL;
+	}
+
+	if (!i2c)
+		return -EPROBE_DEFER;
+
+	err = sfp_i2c_configure(sfp, i2c);
+	if (err < 0) {
+		i2c_put_adapter(i2c);
+		return err;
 	}
 
 	for (i = 0; i < GPIO_MAX; i++)
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 5b07235497c6..47e40859960b 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2218,16 +2218,21 @@ static void qedf_simd_int_handler(void *cookie)
 static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
 {
 	int i;
+	u16 vector_idx = 0;
+	u32 vector;
 
 	if (qedf->int_info.msix_cnt) {
 		for (i = 0; i < qedf->int_info.used_cnt; i++) {
-			synchronize_irq(qedf->int_info.msix[i].vector);
-			irq_set_affinity_hint(qedf->int_info.msix[i].vector,
-			    NULL);
-			irq_set_affinity_notifier(qedf->int_info.msix[i].vector,
-			    NULL);
-			free_irq(qedf->int_info.msix[i].vector,
-			    &qedf->fp_array[i]);
+			vector_idx = i * qedf->dev_info.common.num_hwfns +
+				qed_ops->common->get_affin_hwfn_idx(qedf->cdev);
+			QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+				  "Freeing IRQ #%d vector_idx=%d.\n",
+				  i, vector_idx);
+			vector = qedf->int_info.msix[vector_idx].vector;
+			synchronize_irq(vector);
+			irq_set_affinity_hint(vector, NULL);
+			irq_set_affinity_notifier(vector, NULL);
+			free_irq(vector, &qedf->fp_array[i]);
 		}
 	} else
 		qed_ops->common->simd_handler_clean(qedf->cdev,
@@ -2240,11 +2245,19 @@ static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
 static int qedf_request_msix_irq(struct qedf_ctx *qedf)
 {
 	int i, rc, cpu;
+	u16 vector_idx = 0;
+	u32 vector;
 
 	cpu = cpumask_first(cpu_online_mask);
 	for (i = 0; i < qedf->num_queues; i++) {
-		rc = request_irq(qedf->int_info.msix[i].vector,
-		    qedf_msix_handler, 0, "qedf", &qedf->fp_array[i]);
+		vector_idx = i * qedf->dev_info.common.num_hwfns +
+			qed_ops->common->get_affin_hwfn_idx(qedf->cdev);
+		QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+			  "Requesting IRQ #%d vector_idx=%d.\n",
+			  i, vector_idx);
+		vector = qedf->int_info.msix[vector_idx].vector;
+		rc = request_irq(vector, qedf_msix_handler, 0, "qedf",
+				 &qedf->fp_array[i]);
 
 		if (rc) {
 			QEDF_WARN(&(qedf->dbg_ctx), "request_irq failed.\n");
@@ -2253,8 +2266,7 @@ static int qedf_request_msix_irq(struct qedf_ctx *qedf)
 		}
 
 		qedf->int_info.used_cnt++;
-		rc = irq_set_affinity_hint(qedf->int_info.msix[i].vector,
-		    get_cpu_mask(cpu));
+		rc = irq_set_affinity_hint(vector, get_cpu_mask(cpu));
 		cpu = cpumask_next(cpu, cpu_online_mask);
 	}
 
@@ -3211,6 +3223,11 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		goto err1;
 	}
 
+	QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+		  "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+		  qedf->dev_info.common.num_hwfns,
+		  qed_ops->common->get_affin_hwfn_idx(qedf->cdev));
+
 	/* queue allocation code should come here
 	 * order should be
 	 * 	slowpath_start
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e5db9a9954dc..1a62ab0867ef 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1313,13 +1313,20 @@ static void qedi_simd_int_handler(void *cookie)
 static void qedi_sync_free_irqs(struct qedi_ctx *qedi)
 {
 	int i;
+	u16 idx;
 
 	if (qedi->int_info.msix_cnt) {
 		for (i = 0; i < qedi->int_info.used_cnt; i++) {
-			synchronize_irq(qedi->int_info.msix[i].vector);
-			irq_set_affinity_hint(qedi->int_info.msix[i].vector,
+			idx = i * qedi->dev_info.common.num_hwfns +
+			qedi_ops->common->get_affin_hwfn_idx(qedi->cdev);
+
+			QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+				  "Freeing IRQ #%d vector_idx=%d.\n", i, idx);
+
+			synchronize_irq(qedi->int_info.msix[idx].vector);
+			irq_set_affinity_hint(qedi->int_info.msix[idx].vector,
 					      NULL);
-			free_irq(qedi->int_info.msix[i].vector,
+			free_irq(qedi->int_info.msix[idx].vector,
 				 &qedi->fp_array[i]);
 		}
 	} else {
@@ -1334,20 +1341,28 @@ static void qedi_sync_free_irqs(struct qedi_ctx *qedi)
 static int qedi_request_msix_irq(struct qedi_ctx *qedi)
 {
 	int i, rc, cpu;
+	u16 idx;
 
 	cpu = cpumask_first(cpu_online_mask);
-	for (i = 0; i < qedi->int_info.msix_cnt; i++) {
-		rc = request_irq(qedi->int_info.msix[i].vector,
+	for (i = 0; i < MIN_NUM_CPUS_MSIX(qedi); i++) {
+		idx = i * qedi->dev_info.common.num_hwfns +
+			  qedi_ops->common->get_affin_hwfn_idx(qedi->cdev);
+
+		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+			  "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+			  qedi->dev_info.common.num_hwfns,
+			  qedi_ops->common->get_affin_hwfn_idx(qedi->cdev));
+
+		rc = request_irq(qedi->int_info.msix[idx].vector,
 				 qedi_msix_handler, 0, "qedi",
 				 &qedi->fp_array[i]);
-
 		if (rc) {
 			QEDI_WARN(&qedi->dbg_ctx, "request_irq failed.\n");
 			qedi_sync_free_irqs(qedi);
 			return rc;
 		}
 		qedi->int_info.used_cnt++;
-		rc = irq_set_affinity_hint(qedi->int_info.msix[i].vector,
+		rc = irq_set_affinity_hint(qedi->int_info.msix[idx].vector,
 					   get_cpu_mask(cpu));
 		cpu = cpumask_next(cpu, cpu_online_mask);
 	}
@@ -2415,6 +2430,11 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 	if (rc)
 		goto free_host;
 
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+		  "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+		  qedi->dev_info.common.num_hwfns,
+		  qedi_ops->common->get_affin_hwfn_idx(qedi->cdev));
+
 	if (mode != QEDI_MODE_RECOVERY) {
 		rc = qedi_set_iscsi_pf_param(qedi);
 		if (rc) {
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 7b1656427cbe..192b79439eb7 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -56,4 +56,6 @@
 #define DP83867_CLK_O_SEL_CHN_C_TCLK		0xA
 #define DP83867_CLK_O_SEL_CHN_D_TCLK		0xB
 #define DP83867_CLK_O_SEL_REF_CLK		0xC
+/* Special flag to indicate clock should be off */
+#define DP83867_CLK_O_SEL_OFF			0xFFFFFFFF
 #endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 1308126fc384..e982b8913b73 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -14,6 +14,7 @@
 #ifndef _LINUX_I2C_H
 #define _LINUX_I2C_H
 
+#include <linux/acpi.h>		/* for acpi_handle */
 #include <linux/mod_devicetable.h>
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
@@ -981,6 +982,7 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 u32 i2c_acpi_find_bus_speed(struct device *dev);
 struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
 				       struct i2c_board_info *info);
+struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle);
 #else
 static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 					     struct acpi_resource_i2c_serialbus **i2c)
@@ -996,6 +998,10 @@ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
 {
 	return NULL;
 }
+static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+{
+	return NULL;
+}
 #endif /* CONFIG_ACPI */
 
 #endif /* _LINUX_I2C_H */
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
new file mode 100644
index 000000000000..b4f5403383fc
--- /dev/null
+++ b/include/linux/if_rmnet.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2013-2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _LINUX_IF_RMNET_H_
+#define _LINUX_IF_RMNET_H_
+
+struct rmnet_map_header {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	u8  pad_len:6;
+	u8  reserved_bit:1;
+	u8  cd_bit:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	u8  cd_bit:1;
+	u8  reserved_bit:1;
+	u8  pad_len:6;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	u8  mux_id;
+	__be16 pkt_len;
+}  __aligned(1);
+
+struct rmnet_map_dl_csum_trailer {
+	u8  reserved1;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	u8  valid:1;
+	u8  reserved2:7;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	u8  reserved2:7;
+	u8  valid:1;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	u16 csum_start_offset;
+	u16 csum_length;
+	__be16 csum_value;
+} __aligned(1);
+
+struct rmnet_map_ul_csum_header {
+	__be16 csum_start_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	u16 csum_insert_offset:14;
+	u16 udp_ip4_ind:1;
+	u16 csum_enabled:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	u16 csum_enabled:1;
+	u16 udp_ip4_ind:1;
+	u16 csum_insert_offset:14;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+} __aligned(1);
+
+#endif /* !(_LINUX_IF_RMNET_H_) */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 9c94b2ea789c..6649cb78de4a 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -65,8 +65,8 @@ struct ip_mc_socklist {
 
 struct ip_sf_list {
 	struct ip_sf_list	*sf_next;
-	__be32			sf_inaddr;
 	unsigned long		sf_count[2];	/* include/exclude counts */
+	__be32			sf_inaddr;
 	unsigned char		sf_gsresp;	/* include in g & s response? */
 	unsigned char		sf_oldin;	/* change state */
 	unsigned char		sf_crcount;	/* retrans. left to send */
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 12113e502656..3a3dc4b1f0e7 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -19,6 +19,7 @@ struct ip6_rt_info {
 };
 
 struct nf_queue_entry;
+struct nf_ct_bridge_frag_data;
 
 /*
  * Hook functions for ipv6 to allow xt_* modules to be built-in even
@@ -39,6 +40,15 @@ struct nf_ipv6_ops {
 	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
 			int (*output)(struct net *, struct sock *, struct sk_buff *));
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
+#if IS_MODULE(CONFIG_IPV6)
+	int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
+	int (*br_fragment)(struct net *net, struct sock *sk,
+			   struct sk_buff *skb,
+			   struct nf_ct_bridge_frag_data *data,
+			   int (*output)(struct net *, struct sock *sk,
+					 const struct nf_ct_bridge_frag_data *data,
+					 struct sk_buff *));
+#endif
 };
 
 #ifdef CONFIG_NETFILTER
@@ -86,6 +96,48 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
 #endif
 }
 
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
+				    u32 user)
+{
+#if IS_MODULE(CONFIG_IPV6)
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return 1;
+
+	return v6_ops->br_defrag(net, skb, user);
+#else
+	return nf_ct_frag6_gather(net, skb, user);
+#endif
+}
+
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		    struct nf_ct_bridge_frag_data *data,
+		    int (*output)(struct net *, struct sock *sk,
+				  const struct nf_ct_bridge_frag_data *data,
+				  struct sk_buff *));
+
+static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
+				     struct sk_buff *skb,
+				     struct nf_ct_bridge_frag_data *data,
+				     int (*output)(struct net *, struct sock *sk,
+						   const struct nf_ct_bridge_frag_data *data,
+						   struct sk_buff *))
+{
+#if IS_MODULE(CONFIG_IPV6)
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return 1;
+
+	return v6_ops->br_fragment(net, sk, skb, data, output);
+#else
+	return br_ip6_fragment(net, sk, skb, data, output);
+#endif
+}
+
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
 
 static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 073fb151b5a9..dc4b51060ebc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -103,6 +103,7 @@ typedef enum {
 	PHY_INTERFACE_MODE_XAUI,
 	/* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */
 	PHY_INTERFACE_MODE_10GKR,
+	PHY_INTERFACE_MODE_USXGMII,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -178,6 +179,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "xaui";
 	case PHY_INTERFACE_MODE_10GKR:
 		return "10gbase-kr";
+	case PHY_INTERFACE_MODE_USXGMII:
+		return "usxgmii";
 	default:
 		return "unknown";
 	}
@@ -185,7 +188,6 @@ static inline const char *phy_modes(phy_interface_t interface)
 
 
 #define PHY_INIT_TIMEOUT	100000
-#define PHY_STATE_TIME		1
 #define PHY_FORCE_TIMEOUT	10
 
 #define PHY_MAX_ADDR	32
@@ -534,6 +536,9 @@ struct phy_driver {
 	 */
 	int (*did_interrupt)(struct phy_device *phydev);
 
+	/* Override default interrupt handling */
+	int (*handle_interrupt)(struct phy_device *phydev);
+
 	/* Clears up any memory if needed */
 	void (*remove)(struct phy_device *phydev);
 
@@ -1134,6 +1139,7 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
 			 struct module *owner);
 void phy_state_machine(struct work_struct *work);
+void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies);
 void phy_mac_interrupt(struct phy_device *phydev);
 void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
@@ -1144,6 +1150,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 			      const struct ethtool_link_ksettings *cmd);
 int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
 void phy_request_interrupt(struct phy_device *phydev);
+void phy_free_interrupt(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 6411c624f63a..0f6f65bb9d44 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -54,6 +54,21 @@ struct phylink_link_state {
 	unsigned int an_complete:1;
 };
 
+enum phylink_op_type {
+	PHYLINK_NETDEV = 0,
+	PHYLINK_DEV,
+};
+
+/**
+ * struct phylink_config - PHYLINK configuration structure
+ * @dev: a pointer to a struct device associated with the MAC
+ * @type: operation type of PHYLINK instance
+ */
+struct phylink_config {
+	struct device *dev;
+	enum phylink_op_type type;
+};
+
 /**
  * struct phylink_mac_ops - MAC operations structure.
  * @validate: Validate and update the link configuration.
@@ -66,16 +81,17 @@ struct phylink_link_state {
  * The individual methods are described more fully below.
  */
 struct phylink_mac_ops {
-	void (*validate)(struct net_device *ndev, unsigned long *supported,
+	void (*validate)(struct phylink_config *config,
+			 unsigned long *supported,
 			 struct phylink_link_state *state);
-	int (*mac_link_state)(struct net_device *ndev,
+	int (*mac_link_state)(struct phylink_config *config,
 			      struct phylink_link_state *state);
-	void (*mac_config)(struct net_device *ndev, unsigned int mode,
+	void (*mac_config)(struct phylink_config *config, unsigned int mode,
 			   const struct phylink_link_state *state);
-	void (*mac_an_restart)(struct net_device *ndev);
-	void (*mac_link_down)(struct net_device *ndev, unsigned int mode,
+	void (*mac_an_restart)(struct phylink_config *config);
+	void (*mac_link_down)(struct phylink_config *config, unsigned int mode,
 			      phy_interface_t interface);
-	void (*mac_link_up)(struct net_device *ndev, unsigned int mode,
+	void (*mac_link_up)(struct phylink_config *config, unsigned int mode,
 			    phy_interface_t interface,
 			    struct phy_device *phy);
 };
@@ -83,7 +99,7 @@ struct phylink_mac_ops {
 #if 0 /* For kernel-doc purposes only. */
 /**
  * validate - Validate and update the link configuration
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  * @supported: ethtool bitmask for supported link modes.
  * @state: a pointer to a &struct phylink_link_state.
  *
@@ -100,12 +116,12 @@ struct phylink_mac_ops {
  * based on @state->advertising and/or @state->speed and update
  * @state->interface accordingly.
  */
-void validate(struct net_device *ndev, unsigned long *supported,
+void validate(struct phylink_config *config, unsigned long *supported,
 	      struct phylink_link_state *state);
 
 /**
  * mac_link_state() - Read the current link state from the hardware
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  * @state: a pointer to a &struct phylink_link_state.
  *
  * Read the current link state from the MAC, reporting the current
@@ -114,12 +130,12 @@ void validate(struct net_device *ndev, unsigned long *supported,
  * negotiation completion state in @state->an_complete, and link
  * up state in @state->link.
  */
-int mac_link_state(struct net_device *ndev,
+int mac_link_state(struct phylink_config *config,
 		   struct phylink_link_state *state);
 
 /**
  * mac_config() - configure the MAC for the selected mode and state
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
  * @state: a pointer to a &struct phylink_link_state.
  *
@@ -157,18 +173,18 @@ int mac_link_state(struct net_device *ndev,
  * down.  This "update" behaviour is critical to avoid bouncing the
  * link up status.
  */
-void mac_config(struct net_device *ndev, unsigned int mode,
+void mac_config(struct phylink_config *config, unsigned int mode,
 		const struct phylink_link_state *state);
 
 /**
  * mac_an_restart() - restart 802.3z BaseX autonegotiation
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  */
-void mac_an_restart(struct net_device *ndev);
+void mac_an_restart(struct phylink_config *config);
 
 /**
  * mac_link_down() - take the link down
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  * @mode: link autonegotiation mode
  * @interface: link &typedef phy_interface_t mode
  *
@@ -177,12 +193,12 @@ void mac_an_restart(struct net_device *ndev);
  * Energy Efficient Ethernet MAC configuration. Interface type
  * selection must be done in mac_config().
  */
-void mac_link_down(struct net_device *ndev, unsigned int mode,
+void mac_link_down(struct phylink_config *config, unsigned int mode,
 		   phy_interface_t interface);
 
 /**
  * mac_link_up() - allow the link to come up
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
  * @mode: link autonegotiation mode
  * @interface: link &typedef phy_interface_t mode
  * @phy: any attached phy
@@ -193,13 +209,14 @@ void mac_link_down(struct net_device *ndev, unsigned int mode,
  * phy_init_eee() and perform appropriate MAC configuration for EEE.
  * Interface type selection must be done in mac_config().
  */
-void mac_link_up(struct net_device *ndev, unsigned int mode,
+void mac_link_up(struct phylink_config *config, unsigned int mode,
 		 phy_interface_t interface,
 		 struct phy_device *phy);
 #endif
 
-struct phylink *phylink_create(struct net_device *, struct fwnode_handle *,
-	phy_interface_t iface, const struct phylink_mac_ops *ops);
+struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
+			       phy_interface_t iface,
+			       const struct phylink_mac_ops *ops);
 void phylink_destroy(struct phylink *);
 
 int phylink_connect_phy(struct phylink *, struct phy_device *);
diff --git a/include/linux/platform_data/xilinx-ll-temac.h b/include/linux/platform_data/xilinx-ll-temac.h
index 368530f98176..f4a68136afa6 100644
--- a/include/linux/platform_data/xilinx-ll-temac.h
+++ b/include/linux/platform_data/xilinx-ll-temac.h
@@ -4,6 +4,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/phy.h>
+#include <linux/spinlock.h>
 
 struct ll_temac_platform_data {
 	bool txcsum;		/* Enable/disable TX checksum */
@@ -21,7 +22,7 @@ struct ll_temac_platform_data {
 	 * TEMAC IP block, the same mutex should be passed here, as
 	 * they share the same DCR bus bridge.
 	 */
-	struct mutex *indirect_mutex;
+	spinlock_t *indirect_lock;
 	/* DMA channel control setup */
 	u8 tx_irq_timeout;	/* TX Interrupt Delay Time-out */
 	u8 tx_irq_count;	/* TX Interrupt Coalescing Threshold Count */
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 48841e5dab90..eef02e64b422 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -907,7 +907,8 @@ struct qed_common_ops {
 
 	u32		(*sb_release)(struct qed_dev *cdev,
 				      struct qed_sb_info *sb_info,
-				      u16 sb_id);
+				      u16 sb_id,
+				      enum qed_sb_type type);
 
 	void		(*simd_handler_config)(struct qed_dev *cdev,
 					       void *token,
@@ -1123,6 +1124,13 @@ struct qed_common_ops {
  */
 	int (*read_module_eeprom)(struct qed_dev *cdev,
 				  char *buf, u8 dev_addr, u32 offset, u32 len);
+
+/**
+ * @brief get_affin_hwfn_idx
+ *
+ * @param cdev
+ */
+	u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev);
 };
 
 #define MASK_FIELD(_name, _value) \
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index d15f8e4815e3..898f595ea3d6 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -670,6 +670,8 @@ struct qed_rdma_ops {
 	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
 				  u8 *old_mac_address, u8 *new_mac_address);
 
+	int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset);
+
 	int (*iwarp_connect)(void *rdma_cxt,
 			     struct qed_iwarp_connect_in *iparams,
 			     struct qed_iwarp_connect_out *oparams);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9f8bc06d4136..beb9a9da1699 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -352,37 +352,38 @@ static inline void rht_unlock(struct bucket_table *tbl,
 static inline struct rhash_head __rcu *__rht_ptr(
 	struct rhash_lock_head *const *bkt)
 {
-	return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0));
+	return (struct rhash_head __rcu *)
+		((unsigned long)*bkt & ~BIT(0) ?:
+		 (unsigned long)RHT_NULLS_MARKER(bkt));
 }
 
 /*
  * Where 'bkt' is a bucket and might be locked:
- *   rht_ptr() dereferences that pointer and clears the lock bit.
+ *   rht_ptr_rcu() dereferences that pointer and clears the lock bit.
+ *   rht_ptr() dereferences in a context where the bucket is locked.
  *   rht_ptr_exclusive() dereferences in a context where exclusive
  *            access is guaranteed, such as when destroying the table.
  */
+static inline struct rhash_head *rht_ptr_rcu(
+	struct rhash_lock_head *const *bkt)
+{
+	struct rhash_head __rcu *p = __rht_ptr(bkt);
+
+	return rcu_dereference(p);
+}
+
 static inline struct rhash_head *rht_ptr(
 	struct rhash_lock_head *const *bkt,
 	struct bucket_table *tbl,
 	unsigned int hash)
 {
-	struct rhash_head __rcu *p = __rht_ptr(bkt);
-
-	if (!p)
-		return RHT_NULLS_MARKER(bkt);
-
-	return rht_dereference_bucket_rcu(p, tbl, hash);
+	return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
 }
 
 static inline struct rhash_head *rht_ptr_exclusive(
 	struct rhash_lock_head *const *bkt)
 {
-	struct rhash_head __rcu *p = __rht_ptr(bkt);
-
-	if (!p)
-		return RHT_NULLS_MARKER(bkt);
-
-	return rcu_dereference_protected(p, 1);
+	return rcu_dereference_protected(__rht_ptr(bkt), 1);
 }
 
 static inline void rht_assign_locked(struct rhash_lock_head **bkt,
@@ -509,7 +510,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
  */
 #define rht_for_each_rcu(pos, tbl, hash)			\
 	for (({barrier(); }),					\
-	     pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash);	\
+	     pos = rht_ptr_rcu(rht_bucket(tbl, hash));		\
 	     !rht_is_a_nulls(pos);				\
 	     pos = rcu_dereference_raw(pos->next))
 
@@ -546,8 +547,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
  */
 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
 	rht_for_each_entry_rcu_from(tpos, pos,				   \
-				    rht_ptr(rht_bucket(tbl, hash),	   \
-					    tbl, hash),			   \
+				    rht_ptr_rcu(rht_bucket(tbl, hash)),	   \
 				    tbl, hash, member)
 
 /**
@@ -603,7 +603,7 @@ restart:
 	hash = rht_key_hashfn(ht, tbl, key, params);
 	bkt = rht_bucket(tbl, hash);
 	do {
-		rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+		rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
 			if (params.obj_cmpfn ?
 			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
 			    rhashtable_compare(&arg, rht_obj(ht, he)))
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1c4adfb4195a..151eb930d329 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
+#include <linux/workqueue.h>
 #include <net/net_namespace.h>
 #include <uapi/linux/devlink.h>
 
@@ -64,6 +65,7 @@ struct devlink_port {
 	enum devlink_port_type desired_type;
 	void *type_dev;
 	struct devlink_port_attrs attrs;
+	struct delayed_work type_warn_dw;
 };
 
 struct devlink_sb_pool_info {
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 685294817712..a7f36219904f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -22,6 +22,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/phy.h>
 #include <linux/platform_data/dsa.h>
+#include <linux/phylink.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -193,6 +194,7 @@ struct dsa_port {
 	struct net_device	*bridge_dev;
 	struct devlink_port	devlink_port;
 	struct phylink		*pl;
+	struct phylink_config	pl_config;
 
 	struct work_struct	xmit_work;
 	struct sk_buff_head	xmit_queue;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b473df5b9512..eba8465e1d86 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -103,6 +103,7 @@ struct fib_rule_notifier_info {
 };
 
 #define FRA_GENERIC_POLICY \
+	[FRA_UNSPEC]	= { .strict_start_type = FRA_DPORT_RANGE + 1 }, \
 	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_PRIORITY]	= { .type = NLA_U32 }, \
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 378904ee9129..e91b79ad4e4a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -3,19 +3,24 @@
 #define __NET_FRAG_H__
 
 #include <linux/rhashtable-types.h>
+#include <linux/completion.h>
 
-struct netns_frags {
+/* Per netns frag queues directory */
+struct fqdir {
 	/* sysctls */
 	long			high_thresh;
 	long			low_thresh;
 	int			timeout;
 	int			max_dist;
 	struct inet_frags	*f;
+	struct net		*net;
+	bool			dead;
 
 	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
 
 	/* Keep atomic mem on separate cachelines in structs that include it */
 	atomic_long_t		mem ____cacheline_aligned_in_smp;
+	struct rcu_work		destroy_rwork;
 };
 
 /**
@@ -24,11 +29,13 @@ struct netns_frags {
  * @INET_FRAG_FIRST_IN: first fragment has arrived
  * @INET_FRAG_LAST_IN: final fragment has arrived
  * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
+ * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
  */
 enum {
 	INET_FRAG_FIRST_IN	= BIT(0),
 	INET_FRAG_LAST_IN	= BIT(1),
 	INET_FRAG_COMPLETE	= BIT(2),
+	INET_FRAG_HASH_DEAD	= BIT(3),
 };
 
 struct frag_v4_compare_key {
@@ -64,7 +71,7 @@ struct frag_v6_compare_key {
  * @meat: length of received fragments so far
  * @flags: fragment queue flags
  * @max_size: maximum received fragment size
- * @net: namespace that this frag belongs to
+ * @fqdir: pointer to struct fqdir
  * @rcu: rcu head for freeing deferall
  */
 struct inet_frag_queue {
@@ -84,7 +91,7 @@ struct inet_frag_queue {
 	int			meat;
 	__u8			flags;
 	u16			max_size;
-	struct netns_frags      *net;
+	struct fqdir		*fqdir;
 	struct rcu_head		rcu;
 };
 
@@ -98,21 +105,19 @@ struct inet_frags {
 	struct kmem_cache	*frags_cachep;
 	const char		*frags_cache_name;
 	struct rhashtable_params rhash_params;
+	refcount_t		refcnt;
+	struct completion	completion;
 };
 
 int inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
-static inline int inet_frags_init_net(struct netns_frags *nf)
-{
-	atomic_long_set(&nf->mem, 0);
-	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
-}
-void inet_frags_exit_net(struct netns_frags *nf);
+int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
+void fqdir_exit(struct fqdir *fqdir);
 
 void inet_frag_kill(struct inet_frag_queue *q);
 void inet_frag_destroy(struct inet_frag_queue *q);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
 
 /* Free all skbs in the queue; return the sum of their truesizes. */
 unsigned int inet_frag_rbtree_purge(struct rb_root *root);
@@ -125,19 +130,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-static inline long frag_mem_limit(const struct netns_frags *nf)
+static inline long frag_mem_limit(const struct fqdir *fqdir)
 {
-	return atomic_long_read(&nf->mem);
+	return atomic_long_read(&fqdir->mem);
 }
 
-static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
+static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
 {
-	atomic_long_sub(val, &nf->mem);
+	atomic_long_sub(val, &fqdir->mem);
 }
 
-static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
+static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
 {
-	atomic_long_add(val, &nf->mem);
+	atomic_long_add(val, &fqdir->mem);
 }
 
 /* RFC 3168 support :
diff --git a/include/net/ip.h b/include/net/ip.h
index 2d3cce7c3e8a..029cc3fd26bd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,6 +165,45 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		   int (*output)(struct net *, struct sock *, struct sk_buff *));
+
+struct ip_fraglist_iter {
+	struct sk_buff	*frag_list;
+	struct sk_buff	*frag;
+	struct iphdr	*iph;
+	int		offset;
+	unsigned int	hlen;
+};
+
+void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
+		      unsigned int hlen, struct ip_fraglist_iter *iter);
+void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter);
+
+static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
+{
+	struct sk_buff *skb = iter->frag;
+
+	iter->frag = skb->next;
+	skb_mark_not_on_list(skb);
+
+	return skb;
+}
+
+struct ip_frag_state {
+	struct iphdr	*iph;
+	unsigned int	hlen;
+	unsigned int	ll_rs;
+	unsigned int	mtu;
+	unsigned int	left;
+	int		offset;
+	int		ptr;
+	__be16		not_last_frag;
+};
+
+void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
+		  unsigned int mtu, struct ip_frag_state *state);
+struct sk_buff *ip_frag_next(struct sk_buff *skb,
+			     struct ip_frag_state *state);
+
 void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 525f701653ca..ebe5d65f97e0 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -131,6 +131,9 @@ struct fib6_nh {
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	unsigned long		last_probe;
 #endif
+
+	struct rt6_info * __percpu *rt6i_pcpu;
+	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 };
 
 struct fib6_info {
@@ -156,22 +159,18 @@ struct fib6_info {
 	struct rt6key			fib6_src;
 	struct rt6key			fib6_prefsrc;
 
-	struct rt6_info * __percpu	*rt6i_pcpu;
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
+	u8				should_flush:1,
 					dst_nocount:1,
 					dst_nopolicy:1,
 					dst_host:1,
 					fib6_destroying:1,
-					unused:2;
+					unused:3;
 
-	struct fib6_nh			fib6_nh;
 	struct rcu_head			rcu;
+	struct fib6_nh			fib6_nh[0];
 };
 
 struct rt6_info {
@@ -281,7 +280,7 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh);
 void fib6_info_destroy_rcu(struct rcu_head *head);
 
 static inline void fib6_info_hold(struct fib6_info *f6i)
@@ -444,7 +443,7 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
 
 static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
 {
-	return f6i->fib6_nh.fib_nh_dev;
+	return f6i->fib6_nh->fib_nh_dev;
 }
 
 int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
@@ -452,6 +451,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 		 struct netlink_ext_ack *extack);
 void fib6_nh_release(struct fib6_nh *fib6_nh);
 
+int call_fib6_entry_notifiers(struct net *net,
+			      enum fib_event_type event_type,
+			      struct fib6_info *rt,
+			      struct netlink_ext_ack *extack);
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+		    struct nl_info *info);
 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
@@ -485,6 +490,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb);
 
 void fib6_update_sernum(struct net *net, struct fib6_info *rt);
 void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i);
 
 void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
 static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4790beaa86e0..a6ce6ea856b9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -70,7 +70,7 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
 {
 	/* the RTF_ADDRCONF flag filters out RA's */
 	return !(f6i->fib6_flags & RTF_ADDRCONF) &&
-		f6i->fib6_nh.fib_nh_gw_family;
+		f6i->fib6_nh->fib_nh_gw_family;
 }
 
 void ip6_route_input(struct sk_buff *skb);
@@ -275,7 +275,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 
 static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
 {
-	struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh;
+	struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
 
 	return nha->fib_nh_dev == nhb->fib_nh_dev &&
 	       ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d0e28f4ab099..70ba0302c8c9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -150,6 +150,7 @@ struct fib_info {
 #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
 	int			fib_nhs;
 	bool			fib_nh_is_v6;
+	bool			nh_updated;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].fib_nh_dev
@@ -200,7 +201,8 @@ static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
 #define FIB_TABLE_HASHSZ 2
 #endif
 
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
+				unsigned char scope);
 __be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
 
 #define FIB_RES_NHC(res)		((res).nhc)
@@ -231,6 +233,7 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
 int __net_init fib4_notifier_init(struct net *net);
 void __net_exit fib4_notifier_exit(struct net *net);
 
+void fib_info_notify_update(struct net *net, struct nl_info *info);
 void fib_notify(struct net *net, struct notifier_block *nb);
 
 struct fib_table {
@@ -429,11 +432,14 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
+void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
+int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
+		 struct netlink_ext_ack *extack);
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb);
@@ -469,6 +475,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
 #endif
 }
 
+void fib_flush(struct net *net);
 void free_fib_info(struct fib_info *fi);
 
 static inline void fib_info_hold(struct fib_info *fi)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index daf80863d3a5..21bb830e9679 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -154,6 +154,50 @@ struct frag_hdr {
 #define	IP6_MF		0x0001
 #define	IP6_OFFSET	0xFFF8
 
+struct ip6_fraglist_iter {
+	struct ipv6hdr	*tmp_hdr;
+	struct sk_buff	*frag_list;
+	struct sk_buff	*frag;
+	int		offset;
+	unsigned int	hlen;
+	__be32		frag_id;
+	u8		nexthdr;
+};
+
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+		      u8 nexthdr, __be32 frag_id,
+		      struct ip6_fraglist_iter *iter);
+void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter);
+
+static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter)
+{
+	struct sk_buff *skb = iter->frag;
+
+	iter->frag = skb->next;
+	skb_mark_not_on_list(skb);
+
+	return skb;
+}
+
+struct ip6_frag_state {
+	u8		*prevhdr;
+	unsigned int	hlen;
+	unsigned int	mtu;
+	unsigned int	left;
+	int		offset;
+	int		ptr;
+	int		hroom;
+	int		troom;
+	__be32		frag_id;
+	u8		nexthdr;
+};
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state);
+struct sk_buff *ip6_frag_next(struct sk_buff *skb,
+			      struct ip6_frag_state *state);
+
 #define IP6_REPLY_MARK(net, mark) \
 	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
 
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 6c0c4fde16f8..5c93e942c50b 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -45,6 +45,11 @@ struct ipv6_stub {
 			    struct fib6_config *cfg, gfp_t gfp_flags,
 			    struct netlink_ext_ack *extack);
 	void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+	void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
+	int (*ip6_del_rt)(struct net *net, struct fib6_info *rt);
+	void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
+			       struct nl_info *info);
+
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
 			      const struct in6_addr *solicited_addr,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12689ddfc24c..abb4f92456e1 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -19,6 +19,7 @@
 #include <net/netns/packet.h>
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
+#include <net/netns/nexthop.h>
 #include <net/netns/ieee802154_6lowpan.h>
 #include <net/netns/sctp.h>
 #include <net/netns/dccp.h>
@@ -108,6 +109,7 @@ struct net {
 	struct netns_mib	mib;
 	struct netns_packet	packet;
 	struct netns_unix	unx;
+	struct netns_nexthop	nexthop;
 	struct netns_ipv4	ipv4;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct netns_ipv6	ipv6;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d2bc733a2ef1..5cb19ce454d1 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -49,6 +49,7 @@ union nf_conntrack_expect_proto {
 struct nf_conntrack_net {
 	unsigned int users4;
 	unsigned int users6;
+	unsigned int users_bridge;
 };
 
 #include <linux/types.h>
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
new file mode 100644
index 000000000000..9a5514d5bc51
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -0,0 +1,20 @@
+#ifndef NF_CONNTRACK_BRIDGE_
+#define NF_CONNTRACK_BRIDGE_
+
+struct nf_ct_bridge_info {
+	struct nf_hook_ops	*ops;
+	unsigned int		ops_size;
+	struct module		*me;
+};
+
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info);
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info);
+
+struct nf_ct_bridge_frag_data {
+	char	mac[ETH_HLEN];
+	bool	vlan_present;
+	u16	vlan_tci;
+	__be16	vlan_proto;
+};
+
+#endif
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index ae41e92251dd..de10faf2ce91 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	return ret;
 }
 
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo);
+
 void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_l4proto *proto);
 
diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h
index 736aeac52f56..95406e1342cb 100644
--- a/include/net/netns/ieee802154_6lowpan.h
+++ b/include/net/netns/ieee802154_6lowpan.h
@@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {
 
 struct netns_ieee802154_lowpan {
 	struct netns_sysctl_lowpan sysctl;
-	struct netns_frags	frags;
+	struct fqdir		*fqdir;
 };
 
 #endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7698460a3dd1..c07cee1e0c9e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -72,7 +72,7 @@ struct netns_ipv4 {
 
 	struct inet_peer_base	*peers;
 	struct sock  * __percpu	*tcp_sk;
-	struct netns_frags	frags;
+	struct fqdir		*fqdir;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*iptable_filter;
 	struct xt_table		*iptable_mangle;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5e61b5a8635d..022a0fd1a5a4 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -58,7 +58,7 @@ struct netns_ipv6 {
 	struct ipv6_devconf	*devconf_all;
 	struct ipv6_devconf	*devconf_dflt;
 	struct inet_peer_base	*peers;
-	struct netns_frags	frags;
+	struct fqdir		*fqdir;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*ip6table_filter;
 	struct xt_table		*ip6table_mangle;
@@ -116,7 +116,7 @@ struct netns_ipv6 {
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct netns_nf_frag {
-	struct netns_frags	frags;
+	struct fqdir	*fqdir;
 };
 #endif
 
diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h
new file mode 100644
index 000000000000..c712ee5eebd9
--- /dev/null
+++ b/include/net/netns/nexthop.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * nexthops in net namespaces
+ */
+
+#ifndef __NETNS_NEXTHOP_H__
+#define __NETNS_NEXTHOP_H__
+
+#include <linux/rbtree.h>
+
+struct netns_nexthop {
+	struct rb_root		rb_root;	/* tree of nexthops by id */
+	struct hlist_head	*devhash;	/* nexthops by device */
+
+	unsigned int		seq;		/* protected by rtnl_mutex */
+	u32			last_id_allocated;
+};
+#endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
new file mode 100644
index 000000000000..6e1b8f53624c
--- /dev/null
+++ b/include/net/nexthop.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic nexthop implementation
+ *
+ * Copyright (c) 2017-19 Cumulus Networks
+ * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
+ */
+
+#ifndef __LINUX_NEXTHOP_H
+#define __LINUX_NEXTHOP_H
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
+#include <net/netlink.h>
+
+#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
+
+struct nexthop;
+
+struct nh_config {
+	u32		nh_id;
+
+	u8		nh_family;
+	u8		nh_protocol;
+	u8		nh_blackhole;
+	u32		nh_flags;
+
+	int		nh_ifindex;
+	struct net_device *dev;
+
+	union {
+		__be32		ipv4;
+		struct in6_addr	ipv6;
+	} gw;
+
+	struct nlattr	*nh_grp;
+	u16		nh_grp_type;
+
+	struct nlattr	*nh_encap;
+	u16		nh_encap_type;
+
+	u32		nlflags;
+	struct nl_info	nlinfo;
+};
+
+struct nh_info {
+	struct hlist_node	dev_hash;    /* entry on netns devhash */
+	struct nexthop		*nh_parent;
+
+	u8			family;
+	bool			reject_nh;
+
+	union {
+		struct fib_nh_common	fib_nhc;
+		struct fib_nh		fib_nh;
+		struct fib6_nh		fib6_nh;
+	};
+};
+
+struct nh_grp_entry {
+	struct nexthop	*nh;
+	u8		weight;
+	atomic_t	upper_bound;
+
+	struct list_head nh_list;
+	struct nexthop	*nh_parent;  /* nexthop of group with this entry */
+};
+
+struct nh_group {
+	u16			num_nh;
+	bool			mpath;
+	bool			has_v4;
+	struct nh_grp_entry	nh_entries[0];
+};
+
+struct nexthop {
+	struct rb_node		rb_node;    /* entry on netns rbtree */
+	struct list_head	grp_list;   /* nh group entries using this nh */
+	struct net		*net;
+
+	u32			id;
+
+	u8			protocol;   /* app managing this nh */
+	u8			nh_flags;
+	bool			is_group;
+
+	refcount_t		refcnt;
+	struct rcu_head		rcu;
+
+	union {
+		struct nh_info	__rcu *nh_info;
+		struct nh_group __rcu *nh_grp;
+	};
+};
+
+/* caller is holding rcu or rtnl; no reference taken to nexthop */
+struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
+void nexthop_free_rcu(struct rcu_head *head);
+
+static inline bool nexthop_get(struct nexthop *nh)
+{
+	return refcount_inc_not_zero(&nh->refcnt);
+}
+
+static inline void nexthop_put(struct nexthop *nh)
+{
+	if (refcount_dec_and_test(&nh->refcnt))
+		call_rcu(&nh->rcu, nexthop_free_rcu);
+}
+
+static inline bool nexthop_is_multipath(const struct nexthop *nh)
+{
+	if (nh->is_group) {
+		struct nh_group *nh_grp;
+
+		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+		return nh_grp->mpath;
+	}
+	return false;
+}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
+
+static inline unsigned int nexthop_num_path(const struct nexthop *nh)
+{
+	unsigned int rc = 1;
+
+	if (nexthop_is_multipath(nh)) {
+		struct nh_group *nh_grp;
+
+		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+		rc = nh_grp->num_nh;
+	} else {
+		const struct nh_info *nhi;
+
+		nhi = rcu_dereference_rtnl(nh->nh_info);
+		if (nhi->reject_nh)
+			rc = 0;
+	}
+
+	return rc;
+}
+
+static inline
+struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
+{
+	const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+
+	/* for_nexthops macros in fib_semantics.c grabs a pointer to
+	 * the nexthop before checking nhsel
+	 */
+	if (nhsel > nhg->num_nh)
+		return NULL;
+
+	return nhg->nh_entries[nhsel].nh;
+}
+
+static inline
+int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh)
+{
+	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+	int i;
+
+	for (i = 0; i < nhg->num_nh; i++) {
+		struct nexthop *nhe = nhg->nh_entries[i].nh;
+		struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
+		struct fib_nh_common *nhc = &nhi->fib_nhc;
+		int weight = nhg->nh_entries[i].weight;
+
+		if (fib_add_nexthop(skb, nhc, weight) < 0)
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
+/* called with rcu lock */
+static inline bool nexthop_is_blackhole(const struct nexthop *nh)
+{
+	const struct nh_info *nhi;
+
+	if (nexthop_is_multipath(nh)) {
+		if (nexthop_num_path(nh) > 1)
+			return false;
+		nh = nexthop_mpath_select(nh, 0);
+		if (!nh)
+			return false;
+	}
+
+	nhi = rcu_dereference_rtnl(nh->nh_info);
+	return nhi->reject_nh;
+}
+#endif
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 314699333bec..5a9bb09f32b6 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -43,19 +43,21 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
 						   (__force __u32)csum2, len);
 }
 
+static const struct skb_checksum_ops sctp_csum_ops = {
+	.update  = sctp_csum_update,
+	.combine = sctp_csum_combine,
+};
+
 static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
 					unsigned int offset)
 {
 	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
-	const struct skb_checksum_ops ops = {
-		.update  = sctp_csum_update,
-		.combine = sctp_csum_combine,
-	};
 	__le32 old = sh->checksum;
 	__wsum new;
 
 	sh->checksum = 0;
-	new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops);
+	new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0,
+			      &sctp_csum_ops);
 	sh->checksum = old;
 
 	return cpu_to_le32((__force __u32)new);
diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..d6a688571672
--- /dev/null
+++ b/include/net/tc_act/tc_ctinfo.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CTINFO_H
+#define __NET_TC_CTINFO_H
+
+#include <net/act_api.h>
+
+struct tcf_ctinfo_params {
+	struct rcu_head rcu;
+	struct net *net;
+	u32 dscpmask;
+	u32 dscpstatemask;
+	u32 cpmarkmask;
+	u16 zone;
+	u8 mode;
+	u8 dscpmaskshift;
+};
+
+struct tcf_ctinfo {
+	struct tc_action common;
+	struct tcf_ctinfo_params __rcu *params;
+	u64 stats_dscp_set;
+	u64 stats_dscp_error;
+	u64 stats_cpmark_set;
+};
+
+#define to_ctinfo(a) ((struct tcf_ctinfo *)a)
+
+#endif /* __NET_TC_CTINFO_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 985aa5db570c..0083a14fb64f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
 void tcp_fastopen_destroy_cipher(struct sock *sk);
 void tcp_fastopen_ctx_destroy(struct net *net);
 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
-			      void *key, unsigned int len);
+			      void *primary_key, void *backup_key,
+			      unsigned int len);
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
@@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			     struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
 #define TCP_FASTOPEN_KEY_LENGTH 16
+#define TCP_FASTOPEN_KEY_MAX 2
+#define TCP_FASTOPEN_KEY_BUF_LENGTH \
+	(TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
 
 /* Fastopen key context */
 struct tcp_fastopen_context {
-	struct crypto_cipher	*tfm;
-	__u8			key[TCP_FASTOPEN_KEY_LENGTH];
+	struct crypto_cipher	*tfm[TCP_FASTOPEN_KEY_MAX];
+	__u8			key[TCP_FASTOPEN_KEY_BUF_LENGTH];
 	struct rcu_head		rcu;
 };
 
@@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk);
 void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
 void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
 
+/* Caller needs to wrap with rcu_read_(un)lock() */
+static inline
+struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk)
+{
+	struct tcp_fastopen_context *ctx;
+
+	ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+	if (!ctx)
+		ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+	return ctx;
+}
+
+static inline
+bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
+			       const struct tcp_fastopen_cookie *orig)
+{
+	if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
+	    orig->len == foc->len &&
+	    !memcmp(orig->val, foc->val, foc->len))
+		return true;
+	return false;
+}
+
+static inline
+int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
+{
+	if (ctx->tfm[1])
+		return 2;
+	return 1;
+}
+
 /* Latencies incurred by various limits for a sender. They are
  * chronograph-like stats that are mutually exclusive.
  */
diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h
index 0bdb08557763..62bb17516713 100644
--- a/include/trace/events/neigh.h
+++ b/include/trace/events/neigh.h
@@ -20,6 +20,55 @@
 		{ NUD_NOARP, "noarp" },			\
 		{ NUD_PERMANENT, "permanent"})
 
+TRACE_EVENT(neigh_create,
+
+	TP_PROTO(struct neigh_table *tbl, struct net_device *dev,
+		 const void *pkey, const struct neighbour *n,
+		 bool exempt_from_gc),
+
+	TP_ARGS(tbl, dev, pkey, n, exempt_from_gc),
+
+	TP_STRUCT__entry(
+		__field(u32, family)
+		__dynamic_array(char,  dev,   IFNAMSIZ )
+		__field(int, entries)
+		__field(u8, created)
+		__field(u8, gc_exempt)
+		__array(u8, primary_key4, 4)
+		__array(u8, primary_key6, 16)
+	),
+
+	TP_fast_assign(
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->family = tbl->family;
+		__assign_str(dev, (dev ? dev->name : "NULL"));
+		__entry->entries = atomic_read(&tbl->gc_entries);
+		__entry->created = n != NULL;
+		__entry->gc_exempt = exempt_from_gc;
+		pin6 = (struct in6_addr *)__entry->primary_key6;
+		p32 = (__be32 *)__entry->primary_key4;
+
+		if (tbl->family == AF_INET)
+			*p32 = *(__be32 *)pkey;
+		else
+			*p32 = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (tbl->family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->primary_key6;
+			*pin6 = *(struct in6_addr *)pkey;
+		}
+#endif
+	),
+
+	TP_printk("family %d dev %s entries %d primary_key4 %pI4 primary_key6 %pI6c created %d gc_exempt %d",
+		  __entry->family, __get_str(dev), __entry->entries,
+		  __entry->primary_key4, __entry->primary_key6,
+		  __entry->created, __entry->gc_exempt)
+);
+
 TRACE_EVENT(neigh_update,
 
 	TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new,
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 3534ce157ae9..dd06302aa93e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1483,6 +1483,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64,
 	ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT	 = 65,
 	ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT	 = 66,
+	ETHTOOL_LINK_MODE_100baseT1_Full_BIT		 = 67,
+	ETHTOOL_LINK_MODE_1000baseT1_Full_BIT		 = 68,
 
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
new file mode 100644
index 000000000000..7b61867e9848
--- /dev/null
+++ b/include/uapi/linux/nexthop.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_NEXTHOP_H
+#define _UAPI_LINUX_NEXTHOP_H
+
+#include <linux/types.h>
+
+struct nhmsg {
+	unsigned char	nh_family;
+	unsigned char	nh_scope;     /* return only */
+	unsigned char	nh_protocol;  /* Routing protocol that installed nh */
+	unsigned char	resvd;
+	unsigned int	nh_flags;     /* RTNH_F flags */
+};
+
+/* entry in a nexthop group */
+struct nexthop_grp {
+	__u32	id;	  /* nexthop id - must exist */
+	__u8	weight;   /* weight of this nexthop */
+	__u8	resvd1;
+	__u16	resvd2;
+};
+
+enum {
+	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */
+	__NEXTHOP_GRP_TYPE_MAX,
+};
+
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+enum {
+	NHA_UNSPEC,
+	NHA_ID,		/* u32; id for nexthop. id == 0 means auto-assign */
+
+	NHA_GROUP,	/* array of nexthop_grp */
+	NHA_GROUP_TYPE,	/* u16 one of NEXTHOP_GRP_TYPE */
+	/* if NHA_GROUP attribute is added, no other attributes can be set */
+
+	NHA_BLACKHOLE,	/* flag; nexthop used to blackhole packets */
+	/* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */
+
+	NHA_OIF,	/* u32; nexthop device */
+	NHA_GATEWAY,	/* be32 (IPv4) or in6_addr (IPv6) gw address */
+	NHA_ENCAP_TYPE, /* u16; lwt encap type */
+	NHA_ENCAP,	/* lwt encap data */
+
+	/* NHA_OIF can be appended to dump request to return only
+	 * nexthops using given device
+	 */
+	NHA_GROUPS,	/* flag; only return nexthop groups in dump */
+	NHA_MASTER,	/* u32;  only return nexthops with given master dev */
+
+	__NHA_MAX,
+};
+
+#define NHA_MAX	(__NHA_MAX - 1)
+#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 51a0496f78ea..a93680fc4bfa 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -105,6 +105,7 @@ enum tca_id {
 	TCA_ID_IFE = TCA_ACT_IFE,
 	TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
 	/* other actions go here */
+	TCA_ID_CTINFO,
 	__TCA_ID_MAX = 255
 };
 
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 46399367627f..ce2a623abb75 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -157,6 +157,13 @@ enum {
 	RTM_GETCHAIN,
 #define RTM_GETCHAIN RTM_GETCHAIN
 
+	RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP	RTM_NEWNEXTHOP
+	RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP	RTM_DELNEXTHOP
+	RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP	RTM_GETNEXTHOP
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -342,6 +349,7 @@ enum rtattr_type_t {
 	RTA_IP_PROTO,
 	RTA_SPORT,
 	RTA_DPORT,
+	RTA_NH_ID,
 	__RTA_MAX
 };
 
@@ -704,6 +712,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV4_MROUTE_R	RTNLGRP_IPV4_MROUTE_R
 	RTNLGRP_IPV6_MROUTE_R,
 #define RTNLGRP_IPV6_MROUTE_R	RTNLGRP_IPV6_MROUTE_R
+	RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP		RTNLGRP_NEXTHOP
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 86dc24a96c90..74904e9d1b72 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -283,6 +283,7 @@ enum
 	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
 	LINUX_MIB_TCPZEROWINDOWDROP,		/* TCPZeroWindowDrop */
 	LINUX_MIB_TCPRCVQDROP,			/* TCPRcvQDrop */
+	LINUX_MIB_TCPFASTOPENPASSIVEALTKEY,	/* TCPFastOpenPassiveAltKey */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/uapi/linux/tc_act/tc_ctinfo.h b/include/uapi/linux/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..da803e05a89b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_ctinfo.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CTINFO_H
+#define __UAPI_TC_CTINFO_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+struct tc_ctinfo {
+	tc_gen;
+};
+
+enum {
+	TCA_CTINFO_UNSPEC,
+	TCA_CTINFO_PAD,
+	TCA_CTINFO_TM,
+	TCA_CTINFO_ACT,
+	TCA_CTINFO_ZONE,
+	TCA_CTINFO_PARMS_DSCP_MASK,
+	TCA_CTINFO_PARMS_DSCP_STATEMASK,
+	TCA_CTINFO_PARMS_CPMARK_MASK,
+	TCA_CTINFO_STATS_DSCP_SET,
+	TCA_CTINFO_STATS_DSCP_ERROR,
+	TCA_CTINFO_STATS_CPMARK_SET,
+	__TCA_CTINFO_MAX
+};
+
+#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
+
+enum {
+	CTINFO_MODE_DSCP	= BIT(0),
+	CTINFO_MODE_CPMARK	= BIT(1)
+};
+
+#endif
diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
index 5c502fdf7a42..a1988576fa8a 100644
--- a/include/uapi/linux/unix_diag.h
+++ b/include/uapi/linux/unix_diag.h
@@ -20,6 +20,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
 #define UDIAG_SHOW_RQLEN	0x00000010	/* show skb receive queue len */
 #define UDIAG_SHOW_MEMINFO	0x00000020	/* show memory info of a socket */
+#define UDIAG_SHOW_UID		0x00000040	/* show socket's UID */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -40,6 +41,7 @@ enum {
 	UNIX_DIAG_RQLEN,
 	UNIX_DIAG_MEMINFO,
 	UNIX_DIAG_SHUTDOWN,
+	UNIX_DIAG_UID,
 
 	__UNIX_DIAG_MAX,
 };
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 013323b6dbe4..693aefad7f8a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,6 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	br_switchdev_frame_unmark(skb);
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
+	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
 
 	skb_reset_mac_header(skb);
 	eth = eth_hdr(skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 334a8c496b50..68561741e827 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -425,6 +425,7 @@ struct net_bridge {
 struct br_input_skb_cb {
 	struct net_device *brdev;
 
+	u16 frag_max_size;
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	u8 igmp;
 	u8 mrouters_only:1;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c3ad90c43801..f4fb0b9b927d 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -19,6 +19,20 @@ config NF_LOG_BRIDGE
 	tristate "Bridge packet logging"
 	select NF_LOG_COMMON
 
+config NF_CONNTRACK_BRIDGE
+	tristate "IPv4/IPV6 bridge connection tracking support"
+	depends on NF_CONNTRACK
+	default n
+	help
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections. This is used to enhance packet filtering via
+	  stateful policies. Enable this if you want native tracking from
+	  the bridge. This provides a replacement for the `br_netfilter'
+	  infrastructure.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # NF_TABLES_BRIDGE
 
 menuconfig BRIDGE_NF_EBTABLES
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 9b868861f21a..9d7767322a64 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -5,6 +5,9 @@
 
 obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
+
 # packet logging
 obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
 
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
new file mode 100644
index 000000000000..b675cd7c1a82
--- /dev/null
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+
+#include "../br_private.h"
+
+/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
+ * has been linearized or cloned.
+ */
+static int nf_br_ip_fragment(struct net *net, struct sock *sk,
+			     struct sk_buff *skb,
+			     struct nf_ct_bridge_frag_data *data,
+			     int (*output)(struct net *, struct sock *sk,
+					   const struct nf_ct_bridge_frag_data *data,
+					   struct sk_buff *))
+{
+	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+	unsigned int hlen, ll_rs, mtu;
+	struct ip_frag_state state;
+	struct iphdr *iph;
+	int err;
+
+	/* for offloaded checksums cleanup checksum before fragmentation */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto blackhole;
+
+	iph = ip_hdr(skb);
+
+	/*
+	 *	Setup starting values
+	 */
+
+	hlen = iph->ihl * 4;
+	frag_max_size -= hlen;
+	ll_rs = LL_RESERVED_SPACE(skb->dev);
+	mtu = skb->dev->mtu;
+
+	if (skb_has_frag_list(skb)) {
+		unsigned int first_len = skb_pagelen(skb);
+		struct ip_fraglist_iter iter;
+		struct sk_buff *frag;
+
+		if (first_len - hlen > mtu ||
+		    skb_headroom(skb) < ll_rs)
+			goto blackhole;
+
+		if (skb_cloned(skb))
+			goto slow_path;
+
+		skb_walk_frags(skb, frag) {
+			if (frag->len > mtu ||
+			    skb_headroom(frag) < hlen + ll_rs)
+				goto blackhole;
+
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		ip_fraglist_init(skb, iph, hlen, &iter);
+
+		for (;;) {
+			if (iter.frag)
+				ip_fraglist_prepare(skb, &iter);
+
+			err = output(net, sk, data, skb);
+			if (err || !iter.frag)
+				break;
+
+			skb = ip_fraglist_next(&iter);
+		}
+		return err;
+	}
+slow_path:
+	/* This is a linearized skbuff, the original geometry is lost for us.
+	 * This may also be a clone skbuff, we could preserve the geometry for
+	 * the copies but probably not worth the effort.
+	 */
+	ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
+
+	while (state.left > 0) {
+		struct sk_buff *skb2;
+
+		skb2 = ip_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
+			goto blackhole;
+		}
+
+		err = output(net, sk, data, skb2);
+		if (err)
+			goto blackhole;
+	}
+	consume_skb(skb);
+	return err;
+
+blackhole:
+	kfree_skb(skb);
+	return 0;
+}
+
+/* ip_defrag() expects IPCB() in place. */
+static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
+			   size_t inet_skb_parm_size)
+{
+	memcpy(cb, skb->cb, sizeof(*cb));
+	memset(skb->cb, 0, inet_skb_parm_size);
+}
+
+static void br_skb_cb_restore(struct sk_buff *skb,
+			      const struct br_input_skb_cb *cb,
+			      u16 fragsz)
+{
+	memcpy(skb->cb, cb, sizeof(*cb));
+	BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
+}
+
+static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+	enum ip_conntrack_info ctinfo;
+	struct br_input_skb_cb cb;
+	const struct nf_conn *ct;
+	int err;
+
+	if (!ip_is_fragment(ip_hdr(skb)))
+		return NF_ACCEPT;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+	br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
+	local_bh_disable();
+	err = ip_defrag(state->net, skb,
+			IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+	local_bh_enable();
+	if (!err) {
+		br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
+		skb->ignore_df = 1;
+		return NF_ACCEPT;
+	}
+
+	return NF_STOLEN;
+}
+
+static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+	enum ip_conntrack_info ctinfo;
+	struct br_input_skb_cb cb;
+	const struct nf_conn *ct;
+	int err;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+	br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
+
+	err = nf_ipv6_br_defrag(state->net, skb,
+				IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+	/* queued */
+	if (err == -EINPROGRESS)
+		return NF_STOLEN;
+
+	br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
+	return err == 0 ? NF_ACCEPT : NF_DROP;
+}
+
+static int nf_ct_br_ip_check(const struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	int nhoff, len;
+
+	nhoff = skb_network_offset(skb);
+	iph = ip_hdr(skb);
+	if (iph->ihl < 5 ||
+	    iph->version != 4)
+		return -1;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < nhoff + len ||
+	    len < (iph->ihl * 4))
+                return -1;
+
+	return 0;
+}
+
+static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
+{
+	const struct ipv6hdr *hdr;
+	int nhoff, len;
+
+	nhoff = skb_network_offset(skb);
+	hdr = ipv6_hdr(skb);
+	if (hdr->version != 6)
+		return -1;
+
+	len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
+	if (skb->len < len)
+		return -1;
+
+	return 0;
+}
+
+static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nf_hook_state bridge_state = *state;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u32 len;
+	int ret;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if ((ct && !nf_ct_is_template(ct)) ||
+	    ctinfo == IP_CT_UNTRACKED)
+		return NF_ACCEPT;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			return NF_ACCEPT;
+
+		len = ntohs(ip_hdr(skb)->tot_len);
+		if (pskb_trim_rcsum(skb, len))
+			return NF_ACCEPT;
+
+		if (nf_ct_br_ip_check(skb))
+			return NF_ACCEPT;
+
+		bridge_state.pf = NFPROTO_IPV4;
+		ret = nf_ct_br_defrag4(skb, &bridge_state);
+		break;
+	case htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			return NF_ACCEPT;
+
+		len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+		if (pskb_trim_rcsum(skb, len))
+			return NF_ACCEPT;
+
+		if (nf_ct_br_ipv6_check(skb))
+			return NF_ACCEPT;
+
+		bridge_state.pf = NFPROTO_IPV6;
+		ret = nf_ct_br_defrag6(skb, &bridge_state);
+		break;
+	default:
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		return NF_ACCEPT;
+	}
+
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	return nf_conntrack_in(skb, &bridge_state);
+}
+
+static void nf_ct_bridge_frag_save(struct sk_buff *skb,
+				   struct nf_ct_bridge_frag_data *data)
+{
+	if (skb_vlan_tag_present(skb)) {
+		data->vlan_present = true;
+		data->vlan_tci = skb->vlan_tci;
+		data->vlan_proto = skb->vlan_proto;
+	} else {
+		data->vlan_present = false;
+	}
+	skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+}
+
+static unsigned int
+nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
+		    int (*output)(struct net *, struct sock *sk,
+				  const struct nf_ct_bridge_frag_data *data,
+				  struct sk_buff *))
+{
+	struct nf_ct_bridge_frag_data data;
+
+	if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
+		return NF_ACCEPT;
+
+	nf_ct_bridge_frag_save(skb, &data);
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
+		break;
+	case htons(ETH_P_IPV6):
+		nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return NF_DROP;
+	}
+
+	return NF_STOLEN;
+}
+
+/* Actually only slow path refragmentation needs this. */
+static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
+				     const struct nf_ct_bridge_frag_data *data)
+{
+	int err;
+
+	err = skb_cow_head(skb, ETH_HLEN);
+	if (err) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	if (data->vlan_present)
+		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
+
+	skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	return 0;
+}
+
+static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
+				    const struct nf_ct_bridge_frag_data *data,
+				    struct sk_buff *skb)
+{
+	int err;
+
+	err = nf_ct_bridge_frag_restore(skb, data);
+	if (err < 0)
+		return err;
+
+	return br_dev_queue_push_xmit(net, sk, skb);
+}
+
+static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	int protoff;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return nf_conntrack_confirm(skb);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		protoff = skb_network_offset(skb) + ip_hdrlen(skb);
+		break;
+	case htons(ETH_P_IPV6): {
+		 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+		__be16 frag_off;
+
+		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+					   &frag_off);
+		if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+			return nf_conntrack_confirm(skb);
+		}
+		break;
+	default:
+		return NF_ACCEPT;
+	}
+	return nf_confirm(skb, protoff, ct, ctinfo);
+}
+
+static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	int ret;
+
+	ret = nf_ct_bridge_confirm(skb);
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
+}
+
+static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
+	{
+		.hook		= nf_ct_bridge_pre,
+		.pf		= NFPROTO_BRIDGE,
+		.hooknum	= NF_BR_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= nf_ct_bridge_post,
+		.pf		= NFPROTO_BRIDGE,
+		.hooknum	= NF_BR_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+};
+
+static struct nf_ct_bridge_info bridge_info = {
+	.ops		= nf_ct_bridge_hook_ops,
+	.ops_size	= ARRAY_SIZE(nf_ct_bridge_hook_ops),
+	.me		= THIS_MODULE,
+};
+
+static int __init nf_conntrack_l3proto_bridge_init(void)
+{
+	nf_ct_bridge_register(&bridge_info);
+
+	return 0;
+}
+
+static void __exit nf_conntrack_l3proto_bridge_fini(void)
+{
+	nf_ct_bridge_unregister(&bridge_info);
+}
+
+module_init(nf_conntrack_l3proto_bridge_init);
+module_exit(nf_conntrack_l3proto_bridge_fini);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
+MODULE_LICENSE("GPL");
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d43bc52b8840..9716a7f382cb 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -21,6 +21,7 @@
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
+#include <linux/workqueue.h>
 #include <rdma/ib_verbs.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -5390,6 +5391,38 @@ void devlink_free(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_free);
 
+static void devlink_port_type_warn(struct work_struct *work)
+{
+	WARN(true, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+	/* Ignore CPU and DSA flavours. */
+	return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	/* Schedule a work to WARN in case driver does not set port
+	 * type within timeout.
+	 */
+	schedule_delayed_work(&devlink_port->type_warn_dw,
+			      DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
 /**
  *	devlink_port_register - Register devlink port
  *
@@ -5419,6 +5452,8 @@ int devlink_port_register(struct devlink *devlink,
 	list_add_tail(&devlink_port->list, &devlink->port_list);
 	INIT_LIST_HEAD(&devlink_port->param_list);
 	mutex_unlock(&devlink->lock);
+	INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+	devlink_port_type_warn_schedule(devlink_port);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 	return 0;
 }
@@ -5433,6 +5468,7 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
 {
 	struct devlink *devlink = devlink_port->devlink;
 
+	devlink_port_type_warn_cancel(devlink_port);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
 	mutex_lock(&devlink->lock);
 	list_del(&devlink_port->list);
@@ -5446,6 +5482,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
 {
 	if (WARN_ON(!devlink_port->registered))
 		return;
+	devlink_port_type_warn_cancel(devlink_port);
 	spin_lock(&devlink_port->type_lock);
 	devlink_port->type = type;
 	devlink_port->type_dev = type_dev;
@@ -5519,6 +5556,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
 void devlink_port_type_clear(struct devlink_port *devlink_port)
 {
 	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+	devlink_port_type_warn_schedule(devlink_port);
 }
 EXPORT_SYMBOL_GPL(devlink_port_type_clear);
 
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 5ce7d47a960e..3d93e51b83e0 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -7,8 +7,7 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
 	struct flow_rule *rule;
 
-	rule = kzalloc(sizeof(struct flow_rule) +
-		       sizeof(struct flow_action_entry) * num_actions,
+	rule = kzalloc(struct_size(rule, action.entries, num_actions),
 		       GFP_KERNEL);
 	if (!rule)
 		return NULL;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index dfa871061f14..a5556e4d3f96 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -587,6 +587,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
 	int error;
 	struct neigh_hash_table *nht;
 
+	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
+
 	if (!n) {
 		rc = ERR_PTR(-ENOBUFS);
 		goto out;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eaad23f9c7b5..4a712a00243a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -76,6 +76,7 @@
 #include <linux/highmem.h>
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include "datagram.h"
 
@@ -2511,7 +2512,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		csum = ops->update(skb->data + offset, copy, csum);
+		csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
+				       skb->data + offset, copy, csum);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -2538,9 +2540,13 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 					      frag->page_offset + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
-				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				csum2 = INDIRECT_CALL_1(ops->update,
+							csum_partial_ext,
+							vaddr + p_off, p_len, 0);
 				kunmap_atomic(vaddr);
-				csum = ops->combine(csum, csum2, pos, p_len);
+				csum = INDIRECT_CALL_1(ops->combine,
+						       csum_block_add_ext, csum,
+						       csum2, pos, p_len);
 				pos += p_len;
 			}
 
@@ -2563,7 +2569,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 				copy = len;
 			csum2 = __skb_checksum(frag_iter, offset - start,
 					       copy, 0, ops);
-			csum = ops->combine(csum, csum2, pos, copy);
+			csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
+					       csum, csum2, pos, copy);
 			if ((len -= copy) == 0)
 				return csum;
 			offset += copy;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3b5f434cad3f..b70befe8a3c8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -261,7 +261,7 @@ static int dsa_port_setup(struct dsa_port *dp)
 	enum devlink_port_flavour flavour;
 	struct dsa_switch *ds = dp->ds;
 	struct dsa_switch_tree *dst = ds->dst;
-	int err;
+	int err = 0;
 
 	if (dp->type == DSA_PORT_TYPE_UNUSED)
 		return 0;
@@ -299,19 +299,15 @@ static int dsa_port_setup(struct dsa_port *dp)
 		break;
 	case DSA_PORT_TYPE_CPU:
 		err = dsa_port_link_register_of(dp);
-		if (err) {
+		if (err)
 			dev_err(ds->dev, "failed to setup link for port %d.%d\n",
 				ds->index, dp->index);
-			return err;
-		}
 		break;
 	case DSA_PORT_TYPE_DSA:
 		err = dsa_port_link_register_of(dp);
-		if (err) {
+		if (err)
 			dev_err(ds->dev, "failed to setup link for port %d.%d\n",
 				ds->index, dp->index);
-			return err;
-		}
 		break;
 	case DSA_PORT_TYPE_USER:
 		err = dsa_slave_create(dp);
@@ -323,7 +319,10 @@ static int dsa_port_setup(struct dsa_port *dp)
 		break;
 	}
 
-	return 0;
+	if (err)
+		devlink_port_unregister(&dp->devlink_port);
+
+	return err;
 }
 
 static void dsa_port_teardown(struct dsa_port *dp)
@@ -351,7 +350,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
 
 static int dsa_switch_setup(struct dsa_switch *ds)
 {
-	int err;
+	int err = 0;
 
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
 	 * driver and before ops->setup() has run, since the switch drivers and
@@ -369,29 +368,41 @@ static int dsa_switch_setup(struct dsa_switch *ds)
 
 	err = devlink_register(ds->devlink, ds->dev);
 	if (err)
-		return err;
+		goto free_devlink;
 
 	err = dsa_switch_register_notifier(ds);
 	if (err)
-		return err;
+		goto unregister_devlink;
 
 	err = ds->ops->setup(ds);
 	if (err < 0)
-		return err;
+		goto unregister_notifier;
 
 	if (!ds->slave_mii_bus && ds->ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
-		if (!ds->slave_mii_bus)
-			return -ENOMEM;
+		if (!ds->slave_mii_bus) {
+			err = -ENOMEM;
+			goto unregister_notifier;
+		}
 
 		dsa_slave_mii_bus_init(ds);
 
 		err = mdiobus_register(ds->slave_mii_bus);
 		if (err < 0)
-			return err;
+			goto unregister_notifier;
 	}
 
 	return 0;
+
+unregister_notifier:
+	dsa_switch_unregister_notifier(ds);
+unregister_devlink:
+	devlink_unregister(ds->devlink);
+free_devlink:
+	devlink_free(ds->devlink);
+	ds->devlink = NULL;
+
+	return err;
 }
 
 static void dsa_switch_teardown(struct dsa_switch *ds)
@@ -413,8 +424,8 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 {
 	struct dsa_switch *ds;
 	struct dsa_port *dp;
-	int device, port;
-	int err;
+	int device, port, i;
+	int err = 0;
 
 	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
 		ds = dst->ds[device];
@@ -423,18 +434,41 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 
 		err = dsa_switch_setup(ds);
 		if (err)
-			return err;
+			goto switch_teardown;
 
 		for (port = 0; port < ds->num_ports; port++) {
 			dp = &ds->ports[port];
 
 			err = dsa_port_setup(dp);
 			if (err)
-				return err;
+				goto ports_teardown;
 		}
 	}
 
 	return 0;
+
+ports_teardown:
+	for (i = 0; i < port; i++)
+		dsa_port_teardown(&ds->ports[i]);
+
+	dsa_switch_teardown(ds);
+
+switch_teardown:
+	for (i = 0; i < device; i++) {
+		ds = dst->ds[i];
+		if (!ds)
+			continue;
+
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
+
+			dsa_port_teardown(dp);
+		}
+
+		dsa_switch_teardown(ds);
+	}
+
+	return err;
 }
 
 static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
@@ -496,17 +530,24 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
 
 	err = dsa_tree_setup_switches(dst);
 	if (err)
-		return err;
+		goto teardown_default_cpu;
 
 	err = dsa_tree_setup_master(dst);
 	if (err)
-		return err;
+		goto teardown_switches;
 
 	dst->setup = true;
 
 	pr_info("DSA: tree %d setup\n", dst->index);
 
 	return 0;
+
+teardown_switches:
+	dsa_tree_teardown_switches(dst);
+teardown_default_cpu:
+	dsa_tree_teardown_default_cpu(dst);
+
+	return err;
 }
 
 static void dsa_tree_teardown(struct dsa_switch_tree *dst)
@@ -547,8 +588,10 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
 	dst->ds[index] = ds;
 
 	err = dsa_tree_setup(dst);
-	if (err)
-		dsa_tree_remove_switch(dst, index);
+	if (err) {
+		dst->ds[index] = NULL;
+		dsa_tree_put(dst);
+	}
 
 	return err;
 }
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 8f1222324646..418490bda3a4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -163,6 +163,23 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
 int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
 int dsa_port_link_register_of(struct dsa_port *dp);
 void dsa_port_link_unregister_of(struct dsa_port *dp);
+void dsa_port_phylink_validate(struct phylink_config *config,
+			       unsigned long *supported,
+			       struct phylink_link_state *state);
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+				    struct phylink_link_state *state);
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+				 unsigned int mode,
+				 const struct phylink_link_state *state);
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+				    unsigned int mode,
+				    phy_interface_t interface);
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface,
+				  struct phy_device *phydev);
+extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed8ba9daa3ba..d74bc9df1359 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -422,6 +422,108 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
 	return phydev;
 }
 
+void dsa_port_phylink_validate(struct phylink_config *config,
+			       unsigned long *supported,
+			       struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_validate)
+		return;
+
+	ds->ops->phylink_validate(ds, dp->index, supported, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
+
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+				    struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	/* Only called for SGMII and 802.3z */
+	if (!ds->ops->phylink_mac_link_state)
+		return -EOPNOTSUPP;
+
+	return ds->ops->phylink_mac_link_state(ds, dp->index, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_state);
+
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+				 unsigned int mode,
+				 const struct phylink_link_state *state)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_config)
+		return;
+
+	ds->ops->phylink_mac_config(ds, dp->index, mode, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
+
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_an_restart)
+		return;
+
+	ds->ops->phylink_mac_an_restart(ds, dp->index);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
+
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+				    unsigned int mode,
+				    phy_interface_t interface)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct phy_device *phydev = NULL;
+	struct dsa_switch *ds = dp->ds;
+
+	if (dsa_is_user_port(ds, dp->index))
+		phydev = dp->slave->phydev;
+
+	if (!ds->ops->phylink_mac_link_down) {
+		if (ds->ops->adjust_link && phydev)
+			ds->ops->adjust_link(ds, dp->index, phydev);
+		return;
+	}
+
+	ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
+
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+				  unsigned int mode,
+				  phy_interface_t interface,
+				  struct phy_device *phydev)
+{
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->phylink_mac_link_up) {
+		if (ds->ops->adjust_link && phydev)
+			ds->ops->adjust_link(ds, dp->index, phydev);
+		return;
+	}
+
+	ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
+
+const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
+	.validate = dsa_port_phylink_validate,
+	.mac_link_state = dsa_port_phylink_mac_link_state,
+	.mac_config = dsa_port_phylink_mac_config,
+	.mac_an_restart = dsa_port_phylink_mac_an_restart,
+	.mac_link_down = dsa_port_phylink_mac_link_down,
+	.mac_link_up = dsa_port_phylink_mac_link_up,
+};
+
 static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
 {
 	struct dsa_switch *ds = dp->ds;
@@ -499,8 +601,53 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
 	return 0;
 }
 
+static int dsa_port_phylink_register(struct dsa_port *dp)
+{
+	struct dsa_switch *ds = dp->ds;
+	struct device_node *port_dn = dp->dn;
+	int mode, err;
+
+	mode = of_get_phy_mode(port_dn);
+	if (mode < 0)
+		mode = PHY_INTERFACE_MODE_NA;
+
+	dp->pl_config.dev = ds->dev;
+	dp->pl_config.type = PHYLINK_DEV;
+
+	dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
+				mode, &dsa_port_phylink_mac_ops);
+	if (IS_ERR(dp->pl)) {
+		pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
+		return PTR_ERR(dp->pl);
+	}
+
+	err = phylink_of_phy_connect(dp->pl, port_dn, 0);
+	if (err) {
+		pr_err("could not attach to PHY: %d\n", err);
+		goto err_phy_connect;
+	}
+
+	rtnl_lock();
+	phylink_start(dp->pl);
+	rtnl_unlock();
+
+	return 0;
+
+err_phy_connect:
+	phylink_destroy(dp->pl);
+	return err;
+}
+
 int dsa_port_link_register_of(struct dsa_port *dp)
 {
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->adjust_link)
+		return dsa_port_phylink_register(dp);
+
+	dev_warn(ds->dev,
+		 "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
+
 	if (of_phy_is_fixed_link(dp->dn))
 		return dsa_port_fixed_link_register_of(dp);
 	else
@@ -509,6 +656,16 @@ int dsa_port_link_register_of(struct dsa_port *dp)
 
 void dsa_port_link_unregister_of(struct dsa_port *dp)
 {
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->adjust_link) {
+		rtnl_lock();
+		phylink_disconnect_phy(dp->pl);
+		rtnl_unlock();
+		phylink_destroy(dp->pl);
+		return;
+	}
+
 	if (of_phy_is_fixed_link(dp->dn))
 		of_phy_deregister_fixed_link(dp->dn);
 	else
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9892ca1f6859..1e2ae9d59b88 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1164,98 +1164,6 @@ static struct device_type dsa_type = {
 	.name	= "dsa",
 };
 
-static void dsa_slave_phylink_validate(struct net_device *dev,
-				       unsigned long *supported,
-				       struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_validate)
-		return;
-
-	ds->ops->phylink_validate(ds, dp->index, supported, state);
-}
-
-static int dsa_slave_phylink_mac_link_state(struct net_device *dev,
-					    struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	/* Only called for SGMII and 802.3z */
-	if (!ds->ops->phylink_mac_link_state)
-		return -EOPNOTSUPP;
-
-	return ds->ops->phylink_mac_link_state(ds, dp->index, state);
-}
-
-static void dsa_slave_phylink_mac_config(struct net_device *dev,
-					 unsigned int mode,
-					 const struct phylink_link_state *state)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_config)
-		return;
-
-	ds->ops->phylink_mac_config(ds, dp->index, mode, state);
-}
-
-static void dsa_slave_phylink_mac_an_restart(struct net_device *dev)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_an_restart)
-		return;
-
-	ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
-static void dsa_slave_phylink_mac_link_down(struct net_device *dev,
-					    unsigned int mode,
-					    phy_interface_t interface)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_link_down) {
-		if (ds->ops->adjust_link && dev->phydev)
-			ds->ops->adjust_link(ds, dp->index, dev->phydev);
-		return;
-	}
-
-	ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
-}
-
-static void dsa_slave_phylink_mac_link_up(struct net_device *dev,
-					  unsigned int mode,
-					  phy_interface_t interface,
-					  struct phy_device *phydev)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	if (!ds->ops->phylink_mac_link_up) {
-		if (ds->ops->adjust_link && dev->phydev)
-			ds->ops->adjust_link(ds, dp->index, dev->phydev);
-		return;
-	}
-
-	ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
-}
-
-static const struct phylink_mac_ops dsa_slave_phylink_mac_ops = {
-	.validate = dsa_slave_phylink_validate,
-	.mac_link_state = dsa_slave_phylink_mac_link_state,
-	.mac_config = dsa_slave_phylink_mac_config,
-	.mac_an_restart = dsa_slave_phylink_mac_an_restart,
-	.mac_link_down = dsa_slave_phylink_mac_link_down,
-	.mac_link_up = dsa_slave_phylink_mac_link_up,
-};
-
 void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
 {
 	const struct dsa_port *dp = dsa_to_port(ds, port);
@@ -1303,8 +1211,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
 	if (mode < 0)
 		mode = PHY_INTERFACE_MODE_NA;
 
-	dp->pl = phylink_create(slave_dev, of_fwnode_handle(port_dn), mode,
-				&dsa_slave_phylink_mac_ops);
+	dp->pl_config.dev = &slave_dev->dev;
+	dp->pl_config.type = PHYLINK_NETDEV;
+
+	dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode,
+				&dsa_port_phylink_mac_ops);
 	if (IS_ERR(dp->pl)) {
 		netdev_err(slave_dev,
 			   "error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 4196bcd4105a..5b56f16ed86b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
 	key.src = *src;
 	key.dst = *dst;
 
-	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+	q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
 		fq->q.flags |= INET_FRAG_FIRST_IN;
 
 	fq->q.meat += skb->len;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    fq->q.meat == fq->q.len) {
@@ -326,23 +326,18 @@ err:
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_high_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_low_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_time",
-		.data		= &init_net.ieee802154_lowpan.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].data = &ieee802154_lowpan->frags.high_thresh;
-		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
-		table[1].data = &ieee802154_lowpan->frags.low_thresh;
-		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
-		table[2].data = &ieee802154_lowpan->frags.timeout;
-
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
 			table[0].procname = NULL;
 	}
 
+	table[0].data	= &ieee802154_lowpan->fqdir->high_thresh;
+	table[0].extra1	= &ieee802154_lowpan->fqdir->low_thresh;
+	table[1].data	= &ieee802154_lowpan->fqdir->low_thresh;
+	table[1].extra2	= &ieee802154_lowpan->fqdir->high_thresh;
+	table[2].data	= &ieee802154_lowpan->fqdir->timeout;
+
 	hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
 	if (hdr == NULL)
 		goto err_reg;
@@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 		net_ieee802154_lowpan(net);
 	int res;
 
-	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
-	ieee802154_lowpan->frags.f = &lowpan_frags;
 
-	res = inet_frags_init_net(&ieee802154_lowpan->frags);
+	res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
 	if (res < 0)
 		return res;
+
+	ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = lowpan_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&ieee802154_lowpan->frags);
+		fqdir_exit(ieee802154_lowpan->fqdir);
 	return res;
 }
 
@@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 		net_ieee802154_lowpan(net);
 
 	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&ieee802154_lowpan->frags);
+	fqdir_exit(ieee802154_lowpan->fqdir);
 }
 
 static struct pernet_operations lowpan_frags_ops = {
@@ -544,7 +540,7 @@ err_sysctl:
 
 void lowpan_net_frag_exit(void)
 {
-	inet_frags_fini(&lowpan_frags);
 	lowpan_frags_sysctl_unregister();
 	unregister_pernet_subsys(&lowpan_frags_ops);
+	inet_frags_fini(&lowpan_frags);
 }
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 000a61994c8f..d57ecfaf89d4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -14,7 +14,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
 	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
-	     metrics.o netlink.o
+	     metrics.o netlink.o nexthop.o
 
 obj-$(CONFIG_BPFILTER) += bpfilter/
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b298255f6fdb..76055c66326a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -192,7 +192,7 @@ int fib_unmerge(struct net *net)
 	return 0;
 }
 
-static void fib_flush(struct net *net)
+void fib_flush(struct net *net)
 {
 	int flushed = 0;
 	unsigned int h;
@@ -645,6 +645,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
 }
 
 const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
+	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_SRC]		= { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d3da6a10f86f..78648072783e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1092,15 +1092,13 @@ out:
 	return err;
 }
 
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
-			struct netlink_ext_ack *extack)
+int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
+		 struct netlink_ext_ack *extack)
 {
-	struct net *net = cfg->fc_nlinfo.nl_net;
-	u32 table = cfg->fc_table;
 	int err;
 
 	if (nh->fib_nh_gw_family == AF_INET)
-		err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
+		err = fib_check_nh_v4_gw(net, nh, table, scope, extack);
 	else if (nh->fib_nh_gw_family == AF_INET6)
 		err = fib_check_nh_v6_gw(net, nh, table, extack);
 	else
@@ -1191,11 +1189,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 	fib_info_hash_free(old_laddrhash, bytes);
 }
 
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
+				unsigned char scope)
 {
-	nh->nh_saddr = inet_select_addr(nh->fib_nh_dev,
-					nh->fib_nh_gw4,
-					nh->nh_parent->fib_scope);
+	nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
 	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
 
 	return nh->nh_saddr;
@@ -1213,7 +1210,7 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
 	if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
 		return nh->nh_saddr;
 
-	return fib_info_update_nh_saddr(net, nh);
+	return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope);
 }
 
 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
@@ -1377,7 +1374,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		int linkdown = 0;
 
 		change_nexthops(fi) {
-			err = fib_check_nh(cfg, nexthop_nh, extack);
+			err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
+					   cfg->fc_table, cfg->fc_scope,
+					   extack);
 			if (err != 0)
 				goto failure;
 			if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
@@ -1393,7 +1392,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	}
 
 	change_nexthops(fi) {
-		fib_info_update_nh_saddr(net, nexthop_nh);
+		fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
 		if (nexthop_nh->fib_nh_gw_family == AF_INET6)
 			fi->fib_nh_is_v6 = true;
 	} endfor_nexthops(fi)
@@ -1713,7 +1712,7 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
  * - if the new MTU is greater than the PMTU, don't make any change
  * - otherwise, unlock and set PMTU
  */
-static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
+void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 {
 	struct fnhe_hash_bucket *bucket;
 	int i;
@@ -1749,7 +1748,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 
 	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->fib_nh_dev == dev)
-			nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+			fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
 	}
 }
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 334f723bdf80..b53ecef89d59 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1943,6 +1943,77 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
 	return found;
 }
 
+/* derived from fib_trie_free */
+static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
+				     struct nl_info *info)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct key_vector *pn = t->kv;
+	unsigned long cindex = 1;
+	struct fib_alias *fa;
+
+	for (;;) {
+		struct key_vector *n;
+
+		if (!(cindex--)) {
+			t_key pkey = pn->key;
+
+			if (IS_TRIE(pn))
+				break;
+
+			pn = node_parent(pn);
+			cindex = get_index(pkey, pn);
+			continue;
+		}
+
+		/* grab the next available node */
+		n = get_child(pn, cindex);
+		if (!n)
+			continue;
+
+		if (IS_TNODE(n)) {
+			/* record pn and cindex for leaf walking */
+			pn = n;
+			cindex = 1ul << n->bits;
+
+			continue;
+		}
+
+		hlist_for_each_entry(fa, &n->leaf, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+
+			if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
+				continue;
+
+			rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
+				  KEYLENGTH - fa->fa_slen, tb->tb_id,
+				  info, NLM_F_REPLACE);
+
+			/* call_fib_entry_notifiers will be removed when
+			 * in-kernel notifier is implemented and supported
+			 * for nexthop objects
+			 */
+			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+						 n->key,
+						 KEYLENGTH - fa->fa_slen, fa,
+						 NULL);
+		}
+	}
+}
+
+void fib_info_notify_update(struct net *net, struct nl_info *info)
+{
+	unsigned int h;
+
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+		struct fib_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb_hlist)
+			__fib_info_notify_update(net, tb, info);
+	}
+}
+
 static void fib_leaf_notify(struct net *net, struct key_vector *l,
 			    struct fib_table *tb, struct notifier_block *nb)
 {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 737808e27f8b..35e9784fab4e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -110,48 +110,101 @@ int inet_frags_init(struct inet_frags *f)
 	if (!f->frags_cachep)
 		return -ENOMEM;
 
+	refcount_set(&f->refcnt, 1);
+	init_completion(&f->completion);
 	return 0;
 }
 EXPORT_SYMBOL(inet_frags_init);
 
 void inet_frags_fini(struct inet_frags *f)
 {
-	/* We must wait that all inet_frag_destroy_rcu() have completed. */
-	rcu_barrier();
+	if (refcount_dec_and_test(&f->refcnt))
+		complete(&f->completion);
+
+	wait_for_completion(&f->completion);
 
 	kmem_cache_destroy(f->frags_cachep);
 	f->frags_cachep = NULL;
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
+/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
 static void inet_frags_free_cb(void *ptr, void *arg)
 {
 	struct inet_frag_queue *fq = ptr;
+	int count;
 
-	/* If we can not cancel the timer, it means this frag_queue
-	 * is already disappearing, we have nothing to do.
-	 * Otherwise, we own a refcount until the end of this function.
-	 */
-	if (!del_timer(&fq->timer))
-		return;
+	count = del_timer_sync(&fq->timer) ? 1 : 0;
 
 	spin_lock_bh(&fq->lock);
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
 		fq->flags |= INET_FRAG_COMPLETE;
-		refcount_dec(&fq->refcnt);
+		count++;
+	} else if (fq->flags & INET_FRAG_HASH_DEAD) {
+		count++;
 	}
 	spin_unlock_bh(&fq->lock);
 
-	inet_frag_put(fq);
+	if (refcount_sub_and_test(count, &fq->refcnt))
+		inet_frag_destroy(fq);
 }
 
-void inet_frags_exit_net(struct netns_frags *nf)
+static void fqdir_rwork_fn(struct work_struct *work)
 {
-	nf->high_thresh = 0; /* prevent creation of new frags */
+	struct fqdir *fqdir = container_of(to_rcu_work(work),
+					   struct fqdir, destroy_rwork);
+	struct inet_frags *f = fqdir->f;
+
+	rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
+
+	/* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu)
+	 * have completed, since they need to dereference fqdir.
+	 * Would it not be nice to have kfree_rcu_barrier() ? :)
+	 */
+	rcu_barrier();
+
+	if (refcount_dec_and_test(&f->refcnt))
+		complete(&f->completion);
 
-	rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+	kfree(fqdir);
 }
-EXPORT_SYMBOL(inet_frags_exit_net);
+
+int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
+{
+	struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
+	int res;
+
+	if (!fqdir)
+		return -ENOMEM;
+	fqdir->f = f;
+	fqdir->net = net;
+	res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
+	if (res < 0) {
+		kfree(fqdir);
+		return res;
+	}
+	refcount_inc(&f->refcnt);
+	*fqdirp = fqdir;
+	return 0;
+}
+EXPORT_SYMBOL(fqdir_init);
+
+void fqdir_exit(struct fqdir *fqdir)
+{
+	fqdir->high_thresh = 0; /* prevent creation of new frags */
+
+	fqdir->dead = true;
+
+	/* call_rcu is supposed to provide memory barrier semantics,
+	 * separating the setting of fqdir->dead with the destruction
+	 * work.  This implicit barrier is paired with inet_frag_kill().
+	 */
+
+	INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn);
+	queue_rcu_work(system_wq, &fqdir->destroy_rwork);
+
+}
+EXPORT_SYMBOL(fqdir_exit);
 
 void inet_frag_kill(struct inet_frag_queue *fq)
 {
@@ -159,11 +212,23 @@ void inet_frag_kill(struct inet_frag_queue *fq)
 		refcount_dec(&fq->refcnt);
 
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
-		struct netns_frags *nf = fq->net;
+		struct fqdir *fqdir = fq->fqdir;
 
 		fq->flags |= INET_FRAG_COMPLETE;
-		rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
-		refcount_dec(&fq->refcnt);
+		rcu_read_lock();
+		/* The RCU read lock provides a memory barrier
+		 * guaranteeing that if fqdir->dead is false then
+		 * the hash table destruction will not start until
+		 * after we unlock.  Paired with inet_frags_exit_net().
+		 */
+		if (!fqdir->dead) {
+			rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
+					       fqdir->f->rhash_params);
+			refcount_dec(&fq->refcnt);
+		} else {
+			fq->flags |= INET_FRAG_HASH_DEAD;
+		}
+		rcu_read_unlock();
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
@@ -172,7 +237,7 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
 {
 	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
 						 rcu);
-	struct inet_frags *f = q->net->f;
+	struct inet_frags *f = q->fqdir->f;
 
 	if (f->destructor)
 		f->destructor(q);
@@ -203,7 +268,7 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
 
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
-	struct netns_frags *nf;
+	struct fqdir *fqdir;
 	unsigned int sum, sum_truesize = 0;
 	struct inet_frags *f;
 
@@ -211,18 +276,18 @@ void inet_frag_destroy(struct inet_frag_queue *q)
 	WARN_ON(del_timer(&q->timer) != 0);
 
 	/* Release all fragment data. */
-	nf = q->net;
-	f = nf->f;
+	fqdir = q->fqdir;
+	f = fqdir->f;
 	sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
 	sum = sum_truesize + f->qsize;
 
 	call_rcu(&q->rcu, inet_frag_destroy_rcu);
 
-	sub_frag_mem_limit(nf, sum);
+	sub_frag_mem_limit(fqdir, sum);
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 					       struct inet_frags *f,
 					       void *arg)
 {
@@ -232,9 +297,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	if (!q)
 		return NULL;
 
-	q->net = nf;
+	q->fqdir = fqdir;
 	f->constructor(q, arg);
-	add_frag_mem_limit(nf, f->qsize);
+	add_frag_mem_limit(fqdir, f->qsize);
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
@@ -243,21 +308,21 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	return q;
 }
 
-static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 						void *arg,
 						struct inet_frag_queue **prev)
 {
-	struct inet_frags *f = nf->f;
+	struct inet_frags *f = fqdir->f;
 	struct inet_frag_queue *q;
 
-	q = inet_frag_alloc(nf, f, arg);
+	q = inet_frag_alloc(fqdir, f, arg);
 	if (!q) {
 		*prev = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
-	mod_timer(&q->timer, jiffies + nf->timeout);
+	mod_timer(&q->timer, jiffies + fqdir->timeout);
 
-	*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+	*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
 						 &q->node, f->rhash_params);
 	if (*prev) {
 		q->flags |= INET_FRAG_COMPLETE;
@@ -269,18 +334,18 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
 }
 
 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 {
 	struct inet_frag_queue *fq = NULL, *prev;
 
-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+	if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
 		return NULL;
 
 	rcu_read_lock();
 
-	prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+	prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
 	if (!prev)
-		fq = inet_frag_create(nf, key, &prev);
+		fq = inet_frag_create(fqdir, key, &prev);
 	if (prev && !IS_ERR(prev)) {
 		fq = prev;
 		if (!refcount_inc_not_zero(&fq->refcnt))
@@ -391,7 +456,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 
 	delta += head->truesize;
 	if (delta)
-		add_frag_mem_limit(q->net, delta);
+		add_frag_mem_limit(q->fqdir, delta);
 
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
@@ -413,7 +478,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 		head->truesize += clone->truesize;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(q->net, clone->truesize);
+		add_frag_mem_limit(q->fqdir, clone->truesize);
 		skb_shinfo(head)->frag_list = clone;
 		nextp = &clone->next;
 	} else {
@@ -466,7 +531,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
 			rbn = rbnext;
 		}
 	}
-	sub_frag_mem_limit(q->net, head->truesize);
+	sub_frag_mem_limit(q->fqdir, head->truesize);
 
 	*nextp = NULL;
 	skb_mark_not_on_list(head);
@@ -494,7 +559,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
 	if (head == q->fragments_tail)
 		q->fragments_tail = NULL;
 
-	sub_frag_mem_limit(q->net, head->truesize);
+	sub_frag_mem_limit(q->fqdir, head->truesize);
 
 	return head;
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cf2b0a6a3337..1ffaec056821 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,15 +82,13 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
-	struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
-					       frags);
-	struct net *net = container_of(ipv4, struct net, ipv4);
+	struct net *net = q->fqdir->net;
 
 	const struct frag_v4_compare_key *key = a;
 
 	q->key.v4 = *key;
 	qp->ecn = 0;
-	qp->peer = q->net->max_dist ?
+	qp->peer = q->fqdir->max_dist ?
 		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
 		NULL;
 }
@@ -142,7 +140,7 @@ static void ip_expire(struct timer_list *t)
 	int err;
 
 	qp = container_of(frag, struct ipq, q);
-	net = container_of(qp->q.net, struct net, ipv4.frags);
+	net = qp->q.fqdir->net;
 
 	rcu_read_lock();
 	spin_lock(&qp->q.lock);
@@ -211,7 +209,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
 	};
 	struct inet_frag_queue *q;
 
-	q = inet_frag_find(&net->ipv4.frags, &key);
+	q = inet_frag_find(net->ipv4.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -222,7 +220,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
 static int ip_frag_too_far(struct ipq *qp)
 {
 	struct inet_peer *peer = qp->peer;
-	unsigned int max = qp->q.net->max_dist;
+	unsigned int max = qp->q.fqdir->max_dist;
 	unsigned int start, end;
 
 	int rc;
@@ -236,12 +234,8 @@ static int ip_frag_too_far(struct ipq *qp)
 
 	rc = qp->q.fragments_tail && (end - start) > max;
 
-	if (rc) {
-		struct net *net;
-
-		net = container_of(qp->q.net, struct net, ipv4.frags);
-		__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
-	}
+	if (rc)
+		__IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
 
 	return rc;
 }
@@ -250,13 +244,13 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	unsigned int sum_truesize = 0;
 
-	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+	if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
 		refcount_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
 
 	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
-	sub_frag_mem_limit(qp->q.net, sum_truesize);
+	sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;
@@ -273,7 +267,7 @@ static int ip_frag_reinit(struct ipq *qp)
 /* Add new segment to existing queue. */
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
-	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+	struct net *net = qp->q.fqdir->net;
 	int ihl, end, flags, offset;
 	struct sk_buff *prev_tail;
 	struct net_device *dev;
@@ -352,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
-	add_frag_mem_limit(qp->q.net, skb->truesize);
+	add_frag_mem_limit(qp->q.fqdir, skb->truesize);
 	if (offset == 0)
 		qp->q.flags |= INET_FRAG_FIRST_IN;
 
@@ -399,7 +393,7 @@ err:
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			 struct sk_buff *prev_tail, struct net_device *dev)
 {
-	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+	struct net *net = qp->q.fqdir->net;
 	struct iphdr *iph;
 	void *reasm_data;
 	int len, err;
@@ -544,30 +538,24 @@ static int dist_min;
 static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ipfrag_high_thresh",
-		.data		= &init_net.ipv4.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ipv4.frags.low_thresh
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
-		.data		= &init_net.ipv4.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ipv4.frags.high_thresh
 	},
 	{
 		.procname	= "ipfrag_time",
-		.data		= &init_net.ipv4.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "ipfrag_max_dist",
-		.data		= &init_net.ipv4.frags.max_dist,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -600,13 +588,13 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
 		if (!table)
 			goto err_alloc;
 
-		table[0].data = &net->ipv4.frags.high_thresh;
-		table[0].extra1 = &net->ipv4.frags.low_thresh;
-		table[1].data = &net->ipv4.frags.low_thresh;
-		table[1].extra2 = &net->ipv4.frags.high_thresh;
-		table[2].data = &net->ipv4.frags.timeout;
-		table[3].data = &net->ipv4.frags.max_dist;
 	}
+	table[0].data	= &net->ipv4.fqdir->high_thresh;
+	table[0].extra1	= &net->ipv4.fqdir->low_thresh;
+	table[1].data	= &net->ipv4.fqdir->low_thresh;
+	table[1].extra2	= &net->ipv4.fqdir->high_thresh;
+	table[2].data	= &net->ipv4.fqdir->timeout;
+	table[3].data	= &net->ipv4.fqdir->max_dist;
 
 	hdr = register_net_sysctl(net, "net/ipv4", table);
 	if (!hdr)
@@ -654,6 +642,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 {
 	int res;
 
+	res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
+	if (res < 0)
+		return res;
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -668,31 +659,27 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 	 * we will prune down to 3MB, making room for approx 8 big 64K
 	 * fragments 8x128k.
 	 */
-	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
-	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
+	net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
+	net->ipv4.fqdir->low_thresh  = 3 * 1024 * 1024;
 	/*
 	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
 	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
 	 * by TTL.
 	 */
-	net->ipv4.frags.timeout = IP_FRAG_TIME;
+	net->ipv4.fqdir->timeout = IP_FRAG_TIME;
 
-	net->ipv4.frags.max_dist = 64;
-	net->ipv4.frags.f = &ip4_frags;
+	net->ipv4.fqdir->max_dist = 64;
 
-	res = inet_frags_init_net(&net->ipv4.frags);
-	if (res < 0)
-		return res;
 	res = ip4_frags_ns_ctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv4.frags);
+		fqdir_exit(net->ipv4.fqdir);
 	return res;
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
 {
 	ip4_frags_ns_ctl_unregister(net);
-	inet_frags_exit_net(&net->ipv4.frags);
+	fqdir_exit(net->ipv4.fqdir);
 }
 
 static struct pernet_operations ip4_frags_ops = {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8c9189a41b13..d8f2a22c6ff7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -525,9 +525,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 
 	skb_copy_hash(to, from);
 
-	/* Copy the flags to each fragment. */
-	IPCB(to)->flags = IPCB(from)->flags;
-
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
@@ -561,6 +558,176 @@ static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	return ip_do_fragment(net, sk, skb, output);
 }
 
+void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
+		      unsigned int hlen, struct ip_fraglist_iter *iter)
+{
+	unsigned int first_len = skb_pagelen(skb);
+
+	iter->frag_list = skb_shinfo(skb)->frag_list;
+	iter->frag = iter->frag_list;
+	skb_frag_list_init(skb);
+
+	iter->offset = 0;
+	iter->iph = iph;
+	iter->hlen = hlen;
+
+	skb->data_len = first_len - skb_headlen(skb);
+	skb->len = first_len;
+	iph->tot_len = htons(first_len);
+	iph->frag_off = htons(IP_MF);
+	ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_init);
+
+static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
+				     struct ip_fraglist_iter *iter)
+{
+	struct sk_buff *to = iter->frag;
+
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(skb)->flags;
+
+	if (iter->offset == 0)
+		ip_options_fragment(to);
+}
+
+void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
+{
+	unsigned int hlen = iter->hlen;
+	struct iphdr *iph = iter->iph;
+	struct sk_buff *frag;
+
+	frag = iter->frag;
+	frag->ip_summed = CHECKSUM_NONE;
+	skb_reset_transport_header(frag);
+	__skb_push(frag, hlen);
+	skb_reset_network_header(frag);
+	memcpy(skb_network_header(frag), iph, hlen);
+	iter->iph = ip_hdr(frag);
+	iph = iter->iph;
+	iph->tot_len = htons(frag->len);
+	ip_copy_metadata(frag, skb);
+	iter->offset += skb->len - hlen;
+	iph->frag_off = htons(iter->offset >> 3);
+	if (frag->next)
+		iph->frag_off |= htons(IP_MF);
+	/* Ready, complete checksum */
+	ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_prepare);
+
+void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
+		  unsigned int ll_rs, unsigned int mtu,
+		  struct ip_frag_state *state)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	state->hlen = hlen;
+	state->ll_rs = ll_rs;
+	state->mtu = mtu;
+
+	state->left = skb->len - hlen;	/* Space per frame */
+	state->ptr = hlen;		/* Where to start from */
+
+	state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+	state->not_last_frag = iph->frag_off & htons(IP_MF);
+}
+EXPORT_SYMBOL(ip_frag_init);
+
+static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
+			 bool first_frag, struct ip_frag_state *state)
+{
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(from)->flags;
+
+	if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
+		state->iph->frag_off |= htons(IP_DF);
+
+	/* ANK: dirty, but effective trick. Upgrade options only if
+	 * the segment to be fragmented was THE FIRST (otherwise,
+	 * options are already fixed) and make it ONCE
+	 * on the initial skb, so that all the following fragments
+	 * will inherit fixed options.
+	 */
+	if (first_frag)
+		ip_options_fragment(from);
+}
+
+struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
+{
+	unsigned int len = state->left;
+	struct sk_buff *skb2;
+	struct iphdr *iph;
+
+	len = state->left;
+	/* IF: it doesn't fit, use 'mtu' - the data space left */
+	if (len > state->mtu)
+		len = state->mtu;
+	/* IF: we are not sending up to and including the packet end
+	   then align the next start on an eight byte boundary */
+	if (len < state->left)	{
+		len &= ~7;
+	}
+
+	/* Allocate buffer */
+	skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC);
+	if (!skb2)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 *	Set up data on packet
+	 */
+
+	ip_copy_metadata(skb2, skb);
+	skb_reserve(skb2, state->ll_rs);
+	skb_put(skb2, len + state->hlen);
+	skb_reset_network_header(skb2);
+	skb2->transport_header = skb2->network_header + state->hlen;
+
+	/*
+	 *	Charge the memory for the fragment to any owner
+	 *	it might possess
+	 */
+
+	if (skb->sk)
+		skb_set_owner_w(skb2, skb->sk);
+
+	/*
+	 *	Copy the packet header into the new buffer.
+	 */
+
+	skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen);
+
+	/*
+	 *	Copy a block of the IP datagram.
+	 */
+	if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len))
+		BUG();
+	state->left -= len;
+
+	/*
+	 *	Fill in the new header fields.
+	 */
+	iph = ip_hdr(skb2);
+	iph->frag_off = htons((state->offset >> 3));
+
+	/*
+	 *	Added AC : If we are fragmenting a fragment that's not the
+	 *		   last fragment then keep MF on each bit
+	 */
+	if (state->left > 0 || state->not_last_frag)
+		iph->frag_off |= htons(IP_MF);
+	state->ptr += len;
+	state->offset += len;
+
+	iph->tot_len = htons(len + state->hlen);
+
+	ip_send_check(iph);
+
+	return skb2;
+}
+EXPORT_SYMBOL(ip_frag_next);
+
 /*
  *	This IP datagram is too large to be sent in one piece.  Break it up into
  *	smaller pieces (each of size equal to IP header plus
@@ -572,12 +739,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		   int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph;
-	int ptr;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs;
-	int offset;
-	__be16 not_last_frag;
 	struct rtable *rt = skb_rtable(skb);
+	unsigned int mtu, hlen, ll_rs;
+	struct ip_fraglist_iter iter;
+	struct ip_frag_state state;
 	int err = 0;
 
 	/* for offloaded checksums cleanup checksum before fragmentation */
@@ -642,49 +808,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		}
 
 		/* Everything is OK. Generate! */
-
-		err = 0;
-		offset = 0;
-		frag = skb_shinfo(skb)->frag_list;
-		skb_frag_list_init(skb);
-		skb->data_len = first_len - skb_headlen(skb);
-		skb->len = first_len;
-		iph->tot_len = htons(first_len);
-		iph->frag_off = htons(IP_MF);
-		ip_send_check(iph);
+		ip_fraglist_init(skb, iph, hlen, &iter);
 
 		for (;;) {
 			/* Prepare header of the next frame,
 			 * before previous one went down. */
-			if (frag) {
-				frag->ip_summed = CHECKSUM_NONE;
-				skb_reset_transport_header(frag);
-				__skb_push(frag, hlen);
-				skb_reset_network_header(frag);
-				memcpy(skb_network_header(frag), iph, hlen);
-				iph = ip_hdr(frag);
-				iph->tot_len = htons(frag->len);
-				ip_copy_metadata(frag, skb);
-				if (offset == 0)
-					ip_options_fragment(frag);
-				offset += skb->len - hlen;
-				iph->frag_off = htons(offset>>3);
-				if (frag->next)
-					iph->frag_off |= htons(IP_MF);
-				/* Ready, complete checksum */
-				ip_send_check(iph);
+			if (iter.frag) {
+				ip_fraglist_ipcb_prepare(skb, &iter);
+				ip_fraglist_prepare(skb, &iter);
 			}
 
 			err = output(net, sk, skb);
 
 			if (!err)
 				IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
-			if (err || !frag)
+			if (err || !iter.frag)
 				break;
 
-			skb = frag;
-			frag = skb->next;
-			skb_mark_not_on_list(skb);
+			skb = ip_fraglist_next(&iter);
 		}
 
 		if (err == 0) {
@@ -692,7 +833,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			return 0;
 		}
 
-		kfree_skb_list(frag);
+		kfree_skb_list(iter.frag_list);
 
 		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		return err;
@@ -708,105 +849,29 @@ slow_path_clean:
 	}
 
 slow_path:
-	iph = ip_hdr(skb);
-
-	left = skb->len - hlen;		/* Space per frame */
-	ptr = hlen;		/* Where to start from */
-
 	/*
 	 *	Fragment the datagram.
 	 */
 
-	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
-	not_last_frag = iph->frag_off & htons(IP_MF);
+	ip_frag_init(skb, hlen, ll_rs, mtu, &state);
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
 
-	while (left > 0) {
-		len = left;
-		/* IF: it doesn't fit, use 'mtu' - the data space left */
-		if (len > mtu)
-			len = mtu;
-		/* IF: we are not sending up to and including the packet end
-		   then align the next start on an eight byte boundary */
-		if (len < left)	{
-			len &= ~7;
-		}
+	while (state.left > 0) {
+		bool first_frag = (state.offset == 0);
 
-		/* Allocate buffer */
-		skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
-		if (!skb2) {
-			err = -ENOMEM;
+		skb2 = ip_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
 			goto fail;
 		}
-
-		/*
-		 *	Set up data on packet
-		 */
-
-		ip_copy_metadata(skb2, skb);
-		skb_reserve(skb2, ll_rs);
-		skb_put(skb2, len + hlen);
-		skb_reset_network_header(skb2);
-		skb2->transport_header = skb2->network_header + hlen;
-
-		/*
-		 *	Charge the memory for the fragment to any owner
-		 *	it might possess
-		 */
-
-		if (skb->sk)
-			skb_set_owner_w(skb2, skb->sk);
-
-		/*
-		 *	Copy the packet header into the new buffer.
-		 */
-
-		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
-
-		/*
-		 *	Copy a block of the IP datagram.
-		 */
-		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
-			BUG();
-		left -= len;
-
-		/*
-		 *	Fill in the new header fields.
-		 */
-		iph = ip_hdr(skb2);
-		iph->frag_off = htons((offset >> 3));
-
-		if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
-			iph->frag_off |= htons(IP_DF);
-
-		/* ANK: dirty, but effective trick. Upgrade options only if
-		 * the segment to be fragmented was THE FIRST (otherwise,
-		 * options are already fixed) and make it ONCE
-		 * on the initial skb, so that all the following fragments
-		 * will inherit fixed options.
-		 */
-		if (offset == 0)
-			ip_options_fragment(skb);
-
-		/*
-		 *	Added AC : If we are fragmenting a fragment that's not the
-		 *		   last fragment then keep MF on each bit
-		 */
-		if (left > 0 || not_last_frag)
-			iph->frag_off |= htons(IP_MF);
-		ptr += len;
-		offset += len;
+		ip_frag_ipcb(skb, skb2, first_frag, &state);
 
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-		iph->tot_len = htons(len + hlen);
-
-		ip_send_check(iph);
-
 		err = output(net, sk, skb2);
 		if (err)
 			goto fail;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
new file mode 100644
index 000000000000..1af8a329dacb
--- /dev/null
+++ b/net/ipv4/nexthop.c
@@ -0,0 +1,1479 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Generic nexthop implementation
+ *
+ * Copyright (c) 2017-19 Cumulus Networks
+ * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
+ */
+
+#include <linux/nexthop.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/arp.h>
+#include <net/ipv6_stubs.h>
+#include <net/lwtunnel.h>
+#include <net/ndisc.h>
+#include <net/nexthop.h>
+#include <net/route.h>
+#include <net/sock.h>
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+			   struct nl_info *nlinfo);
+
+#define NH_DEV_HASHBITS  8
+#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+
+static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
+	[NHA_UNSPEC]		= { .strict_start_type = NHA_UNSPEC + 1 },
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_GROUP]		= { .type = NLA_BINARY },
+	[NHA_GROUP_TYPE]	= { .type = NLA_U16 },
+	[NHA_BLACKHOLE]		= { .type = NLA_FLAG },
+	[NHA_OIF]		= { .type = NLA_U32 },
+	[NHA_GATEWAY]		= { .type = NLA_BINARY },
+	[NHA_ENCAP_TYPE]	= { .type = NLA_U16 },
+	[NHA_ENCAP]		= { .type = NLA_NESTED },
+	[NHA_GROUPS]		= { .type = NLA_FLAG },
+	[NHA_MASTER]		= { .type = NLA_U32 },
+};
+
+static unsigned int nh_dev_hashfn(unsigned int val)
+{
+	unsigned int mask = NH_DEV_HASHSIZE - 1;
+
+	return (val ^
+		(val >> NH_DEV_HASHBITS) ^
+		(val >> (NH_DEV_HASHBITS * 2))) & mask;
+}
+
+static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
+{
+	struct net_device *dev = nhi->fib_nhc.nhc_dev;
+	struct hlist_head *head;
+	unsigned int hash;
+
+	WARN_ON(!dev);
+
+	hash = nh_dev_hashfn(dev->ifindex);
+	head = &net->nexthop.devhash[hash];
+	hlist_add_head(&nhi->dev_hash, head);
+}
+
+static void nexthop_free_mpath(struct nexthop *nh)
+{
+	struct nh_group *nhg;
+	int i;
+
+	nhg = rcu_dereference_raw(nh->nh_grp);
+	for (i = 0; i < nhg->num_nh; ++i)
+		WARN_ON(nhg->nh_entries[i].nh);
+
+	kfree(nhg);
+}
+
+static void nexthop_free_single(struct nexthop *nh)
+{
+	struct nh_info *nhi;
+
+	nhi = rcu_dereference_raw(nh->nh_info);
+	switch (nhi->family) {
+	case AF_INET:
+		fib_nh_release(nh->net, &nhi->fib_nh);
+		break;
+	case AF_INET6:
+		ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
+		break;
+	}
+	kfree(nhi);
+}
+
+void nexthop_free_rcu(struct rcu_head *head)
+{
+	struct nexthop *nh = container_of(head, struct nexthop, rcu);
+
+	if (nh->is_group)
+		nexthop_free_mpath(nh);
+	else
+		nexthop_free_single(nh);
+
+	kfree(nh);
+}
+EXPORT_SYMBOL_GPL(nexthop_free_rcu);
+
+static struct nexthop *nexthop_alloc(void)
+{
+	struct nexthop *nh;
+
+	nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
+	if (nh) {
+		INIT_LIST_HEAD(&nh->grp_list);
+	}
+	return nh;
+}
+
+static struct nh_group *nexthop_grp_alloc(u16 num_nh)
+{
+	size_t sz = offsetof(struct nexthop, nh_grp)
+		    + sizeof(struct nh_group)
+		    + sizeof(struct nh_grp_entry) * num_nh;
+	struct nh_group *nhg;
+
+	nhg = kzalloc(sz, GFP_KERNEL);
+	if (nhg)
+		nhg->num_nh = num_nh;
+
+	return nhg;
+}
+
+static void nh_base_seq_inc(struct net *net)
+{
+	while (++net->nexthop.seq == 0)
+		;
+}
+
+/* no reference taken; rcu lock or rtnl must be held */
+struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
+{
+	struct rb_node **pp, *parent = NULL, *next;
+
+	pp = &net->nexthop.rb_root.rb_node;
+	while (1) {
+		struct nexthop *nh;
+
+		next = rcu_dereference_raw(*pp);
+		if (!next)
+			break;
+		parent = next;
+
+		nh = rb_entry(parent, struct nexthop, rb_node);
+		if (id < nh->id)
+			pp = &next->rb_left;
+		else if (id > nh->id)
+			pp = &next->rb_right;
+		else
+			return nh;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nexthop_find_by_id);
+
+/* used for auto id allocation; called with rtnl held */
+static u32 nh_find_unused_id(struct net *net)
+{
+	u32 id_start = net->nexthop.last_id_allocated;
+
+	while (1) {
+		net->nexthop.last_id_allocated++;
+		if (net->nexthop.last_id_allocated == id_start)
+			break;
+
+		if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
+			return net->nexthop.last_id_allocated;
+	}
+	return 0;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+{
+	struct nexthop_grp *p;
+	size_t len = nhg->num_nh * sizeof(*p);
+	struct nlattr *nla;
+	u16 group_type = 0;
+	int i;
+
+	if (nhg->mpath)
+		group_type = NEXTHOP_GRP_TYPE_MPATH;
+
+	if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, NHA_GROUP, len);
+	if (!nla)
+		goto nla_put_failure;
+
+	p = nla_data(nla);
+	for (i = 0; i < nhg->num_nh; ++i) {
+		p->id = nhg->nh_entries[i].nh->id;
+		p->weight = nhg->nh_entries[i].weight - 1;
+		p += 1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
+			int event, u32 portid, u32 seq, unsigned int nlflags)
+{
+	struct fib6_nh *fib6_nh;
+	struct fib_nh *fib_nh;
+	struct nlmsghdr *nlh;
+	struct nh_info *nhi;
+	struct nhmsg *nhm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	nhm = nlmsg_data(nlh);
+	nhm->nh_family = AF_UNSPEC;
+	nhm->nh_flags = nh->nh_flags;
+	nhm->nh_protocol = nh->protocol;
+	nhm->nh_scope = 0;
+	nhm->resvd = 0;
+
+	if (nla_put_u32(skb, NHA_ID, nh->id))
+		goto nla_put_failure;
+
+	if (nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+		if (nla_put_nh_group(skb, nhg))
+			goto nla_put_failure;
+		goto out;
+	}
+
+	nhi = rtnl_dereference(nh->nh_info);
+	nhm->nh_family = nhi->family;
+	if (nhi->reject_nh) {
+		if (nla_put_flag(skb, NHA_BLACKHOLE))
+			goto nla_put_failure;
+		goto out;
+	} else {
+		const struct net_device *dev;
+
+		dev = nhi->fib_nhc.nhc_dev;
+		if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
+			goto nla_put_failure;
+	}
+
+	nhm->nh_scope = nhi->fib_nhc.nhc_scope;
+	switch (nhi->family) {
+	case AF_INET:
+		fib_nh = &nhi->fib_nh;
+		if (fib_nh->fib_nh_gw_family &&
+		    nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
+			goto nla_put_failure;
+		break;
+
+	case AF_INET6:
+		fib6_nh = &nhi->fib6_nh;
+		if (fib6_nh->fib_nh_gw_family &&
+		    nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nhi->fib_nhc.nhc_lwtstate &&
+	    lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
+				NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
+		goto nla_put_failure;
+
+out:
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static size_t nh_nlmsg_size_grp(struct nexthop *nh)
+{
+	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+	size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+
+	return nla_total_size(sz) +
+	       nla_total_size(2);  /* NHA_GROUP_TYPE */
+}
+
+static size_t nh_nlmsg_size_single(struct nexthop *nh)
+{
+	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+	size_t sz;
+
+	/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
+	 * are mutually exclusive
+	 */
+	sz = nla_total_size(4);  /* NHA_OIF */
+
+	switch (nhi->family) {
+	case AF_INET:
+		if (nhi->fib_nh.fib_nh_gw_family)
+			sz += nla_total_size(4);  /* NHA_GATEWAY */
+		break;
+
+	case AF_INET6:
+		/* NHA_GATEWAY */
+		if (nhi->fib6_nh.fib_nh_gw_family)
+			sz += nla_total_size(sizeof(const struct in6_addr));
+		break;
+	}
+
+	if (nhi->fib_nhc.nhc_lwtstate) {
+		sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
+		sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
+	}
+
+	return sz;
+}
+
+static size_t nh_nlmsg_size(struct nexthop *nh)
+{
+	size_t sz = nla_total_size(4);    /* NHA_ID */
+
+	if (nh->is_group)
+		sz += nh_nlmsg_size_grp(nh);
+	else
+		sz += nh_nlmsg_size_single(nh);
+
+	return sz;
+}
+
+static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
+{
+	unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
+	if (!skb)
+		goto errout;
+
+	err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
+		    info->nlh, gfp_any());
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+}
+
+static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
+			   struct netlink_ext_ack *extack)
+{
+	if (nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+		/* nested multipath (group within a group) is not
+		 * supported
+		 */
+		if (nhg->mpath) {
+			NL_SET_ERR_MSG(extack,
+				       "Multipath group can not be a nexthop within a group");
+			return false;
+		}
+	} else {
+		struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
+		if (nhi->reject_nh && npaths > 1) {
+			NL_SET_ERR_MSG(extack,
+				       "Blackhole nexthop can not be used in a group with more than 1 path");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
+			       struct netlink_ext_ack *extack)
+{
+	unsigned int len = nla_len(tb[NHA_GROUP]);
+	struct nexthop_grp *nhg;
+	unsigned int i, j;
+
+	if (len & (sizeof(struct nexthop_grp) - 1)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid length for nexthop group attribute");
+		return -EINVAL;
+	}
+
+	/* convert len to number of nexthop ids */
+	len /= sizeof(*nhg);
+
+	nhg = nla_data(tb[NHA_GROUP]);
+	for (i = 0; i < len; ++i) {
+		if (nhg[i].resvd1 || nhg[i].resvd2) {
+			NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+			return -EINVAL;
+		}
+		if (nhg[i].weight > 254) {
+			NL_SET_ERR_MSG(extack, "Invalid value for weight");
+			return -EINVAL;
+		}
+		for (j = i + 1; j < len; ++j) {
+			if (nhg[i].id == nhg[j].id) {
+				NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
+				return -EINVAL;
+			}
+		}
+	}
+
+	nhg = nla_data(tb[NHA_GROUP]);
+	for (i = 0; i < len; ++i) {
+		struct nexthop *nh;
+
+		nh = nexthop_find_by_id(net, nhg[i].id);
+		if (!nh) {
+			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+			return -EINVAL;
+		}
+		if (!valid_group_nh(nh, len, extack))
+			return -EINVAL;
+	}
+	for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		NL_SET_ERR_MSG(extack,
+			       "No other attributes can be set in nexthop groups");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool ipv6_good_nh(const struct fib6_nh *nh)
+{
+	int state = NUD_REACHABLE;
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+
+	n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
+	if (n)
+		state = n->nud_state;
+
+	rcu_read_unlock_bh();
+
+	return !!(state & NUD_VALID);
+}
+
+static bool ipv4_good_nh(const struct fib_nh *nh)
+{
+	int state = NUD_REACHABLE;
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+
+	n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+				      (__force u32)nh->fib_nh_gw4);
+	if (n)
+		state = n->nud_state;
+
+	rcu_read_unlock_bh();
+
+	return !!(state & NUD_VALID);
+}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
+{
+	struct nexthop *rc = NULL;
+	struct nh_group *nhg;
+	int i;
+
+	if (!nh->is_group)
+		return nh;
+
+	nhg = rcu_dereference(nh->nh_grp);
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		struct nh_info *nhi;
+
+		if (hash > atomic_read(&nhge->upper_bound))
+			continue;
+
+		/* nexthops always check if it is good and does
+		 * not rely on a sysctl for this behavior
+		 */
+		nhi = rcu_dereference(nhge->nh->nh_info);
+		switch (nhi->family) {
+		case AF_INET:
+			if (ipv4_good_nh(&nhi->fib_nh))
+				return nhge->nh;
+			break;
+		case AF_INET6:
+			if (ipv6_good_nh(&nhi->fib6_nh))
+				return nhge->nh;
+			break;
+		}
+
+		if (!rc)
+			rc = nhge->nh;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(nexthop_select_path);
+
+static void nh_group_rebalance(struct nh_group *nhg)
+{
+	int total = 0;
+	int w = 0;
+	int i;
+
+	for (i = 0; i < nhg->num_nh; ++i)
+		total += nhg->nh_entries[i].weight;
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		int upper_bound;
+
+		w += nhge->weight;
+		upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
+		atomic_set(&nhge->upper_bound, upper_bound);
+	}
+}
+
+static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
+				struct nh_group *nhg,
+				struct nl_info *nlinfo)
+{
+	struct nexthop *nh = nhge->nh;
+	struct nh_grp_entry *nhges;
+	bool found = false;
+	int i;
+
+	WARN_ON(!nh);
+
+	nhges = nhg->nh_entries;
+	for (i = 0; i < nhg->num_nh; ++i) {
+		if (found) {
+			nhges[i-1].nh = nhges[i].nh;
+			nhges[i-1].weight = nhges[i].weight;
+			list_del(&nhges[i].nh_list);
+			list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list);
+		} else if (nhg->nh_entries[i].nh == nh) {
+			found = true;
+		}
+	}
+
+	if (WARN_ON(!found))
+		return;
+
+	nhg->num_nh--;
+	nhg->nh_entries[nhg->num_nh].nh = NULL;
+
+	nh_group_rebalance(nhg);
+
+	nexthop_put(nh);
+
+	if (nlinfo)
+		nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo);
+}
+
+static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
+				       struct nl_info *nlinfo)
+{
+	struct nh_grp_entry *nhge, *tmp;
+
+	list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) {
+		struct nh_group *nhg;
+
+		list_del(&nhge->nh_list);
+		nhg = rtnl_dereference(nhge->nh_parent->nh_grp);
+		remove_nh_grp_entry(nhge, nhg, nlinfo);
+
+		/* if this group has no more entries then remove it */
+		if (!nhg->num_nh)
+			remove_nexthop(net, nhge->nh_parent, nlinfo);
+	}
+}
+
+static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
+{
+	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+	int i, num_nh = nhg->num_nh;
+
+	for (i = 0; i < num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+		if (WARN_ON(!nhge->nh))
+			continue;
+
+		list_del(&nhge->nh_list);
+		nexthop_put(nhge->nh);
+		nhge->nh = NULL;
+		nhg->num_nh--;
+	}
+}
+
+static void __remove_nexthop(struct net *net, struct nexthop *nh,
+			     struct nl_info *nlinfo)
+{
+	if (nh->is_group) {
+		remove_nexthop_group(nh, nlinfo);
+	} else {
+		struct nh_info *nhi;
+
+		nhi = rtnl_dereference(nh->nh_info);
+		if (nhi->fib_nhc.nhc_dev)
+			hlist_del(&nhi->dev_hash);
+
+		remove_nexthop_from_groups(net, nh, nlinfo);
+	}
+}
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+			   struct nl_info *nlinfo)
+{
+	/* remove from the tree */
+	rb_erase(&nh->rb_node, &net->nexthop.rb_root);
+
+	if (nlinfo)
+		nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
+
+	__remove_nexthop(net, nh, nlinfo);
+	nh_base_seq_inc(net);
+
+	nexthop_put(nh);
+}
+
+static int replace_nexthop(struct net *net, struct nexthop *old,
+			   struct nexthop *new, struct netlink_ext_ack *extack)
+{
+	return -EEXIST;
+}
+
+/* called with rtnl_lock held */
+static int insert_nexthop(struct net *net, struct nexthop *new_nh,
+			  struct nh_config *cfg, struct netlink_ext_ack *extack)
+{
+	struct rb_node **pp, *parent = NULL, *next;
+	struct rb_root *root = &net->nexthop.rb_root;
+	bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
+	bool create = !!(cfg->nlflags & NLM_F_CREATE);
+	u32 new_id = new_nh->id;
+	int rc = -EEXIST;
+
+	pp = &root->rb_node;
+	while (1) {
+		struct nexthop *nh;
+
+		next = rtnl_dereference(*pp);
+		if (!next)
+			break;
+
+		parent = next;
+
+		nh = rb_entry(parent, struct nexthop, rb_node);
+		if (new_id < nh->id) {
+			pp = &next->rb_left;
+		} else if (new_id > nh->id) {
+			pp = &next->rb_right;
+		} else if (replace) {
+			rc = replace_nexthop(net, nh, new_nh, extack);
+			if (!rc)
+				new_nh = nh; /* send notification with old nh */
+			goto out;
+		} else {
+			/* id already exists and not a replace */
+			goto out;
+		}
+	}
+
+	if (replace && !create) {
+		NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
+		rc = -ENOENT;
+		goto out;
+	}
+
+	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
+	rb_insert_color(&new_nh->rb_node, root);
+	rc = 0;
+out:
+	if (!rc) {
+		nh_base_seq_inc(net);
+		nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+	}
+
+	return rc;
+}
+
+/* rtnl */
+/* remove all nexthops tied to a device being deleted */
+static void nexthop_flush_dev(struct net_device *dev)
+{
+	unsigned int hash = nh_dev_hashfn(dev->ifindex);
+	struct net *net = dev_net(dev);
+	struct hlist_head *head = &net->nexthop.devhash[hash];
+	struct hlist_node *n;
+	struct nh_info *nhi;
+
+	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+		if (nhi->fib_nhc.nhc_dev != dev)
+			continue;
+
+		remove_nexthop(net, nhi->nh_parent, NULL);
+	}
+}
+
+/* rtnl; called when net namespace is deleted */
+static void flush_all_nexthops(struct net *net)
+{
+	struct rb_root *root = &net->nexthop.rb_root;
+	struct rb_node *node;
+	struct nexthop *nh;
+
+	while ((node = rb_first(root))) {
+		nh = rb_entry(node, struct nexthop, rb_node);
+		remove_nexthop(net, nh, NULL);
+		cond_resched();
+	}
+}
+
+static struct nexthop *nexthop_create_group(struct net *net,
+					    struct nh_config *cfg)
+{
+	struct nlattr *grps_attr = cfg->nh_grp;
+	struct nexthop_grp *entry = nla_data(grps_attr);
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	int i;
+
+	nh = nexthop_alloc();
+	if (!nh)
+		return ERR_PTR(-ENOMEM);
+
+	nh->is_group = 1;
+
+	nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry));
+	if (!nhg) {
+		kfree(nh);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nexthop *nhe;
+		struct nh_info *nhi;
+
+		nhe = nexthop_find_by_id(net, entry[i].id);
+		if (!nexthop_get(nhe))
+			goto out_no_nh;
+
+		nhi = rtnl_dereference(nhe->nh_info);
+		if (nhi->family == AF_INET)
+			nhg->has_v4 = true;
+
+		nhg->nh_entries[i].nh = nhe;
+		nhg->nh_entries[i].weight = entry[i].weight + 1;
+		list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
+		nhg->nh_entries[i].nh_parent = nh;
+	}
+
+	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
+		nhg->mpath = 1;
+		nh_group_rebalance(nhg);
+	}
+
+	rcu_assign_pointer(nh->nh_grp, nhg);
+
+	return nh;
+
+out_no_nh:
+	for (; i >= 0; --i)
+		nexthop_put(nhg->nh_entries[i].nh);
+
+	kfree(nhg);
+	kfree(nh);
+
+	return ERR_PTR(-ENOENT);
+}
+
+static int nh_create_ipv4(struct net *net, struct nexthop *nh,
+			  struct nh_info *nhi, struct nh_config *cfg,
+			  struct netlink_ext_ack *extack)
+{
+	struct fib_nh *fib_nh = &nhi->fib_nh;
+	struct fib_config fib_cfg = {
+		.fc_oif   = cfg->nh_ifindex,
+		.fc_gw4   = cfg->gw.ipv4,
+		.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
+		.fc_flags = cfg->nh_flags,
+		.fc_encap = cfg->nh_encap,
+		.fc_encap_type = cfg->nh_encap_type,
+	};
+	u32 tb_id = l3mdev_fib_table(cfg->dev);
+	int err = -EINVAL;
+
+	err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
+	if (err) {
+		fib_nh_release(net, fib_nh);
+		goto out;
+	}
+
+	/* sets nh_dev if successful */
+	err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
+	if (!err) {
+		nh->nh_flags = fib_nh->fib_nh_flags;
+		fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
+	} else {
+		fib_nh_release(net, fib_nh);
+	}
+out:
+	return err;
+}
+
+static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
+			  struct nh_info *nhi, struct nh_config *cfg,
+			  struct netlink_ext_ack *extack)
+{
+	struct fib6_nh *fib6_nh = &nhi->fib6_nh;
+	struct fib6_config fib6_cfg = {
+		.fc_table = l3mdev_fib_table(cfg->dev),
+		.fc_ifindex = cfg->nh_ifindex,
+		.fc_gateway = cfg->gw.ipv6,
+		.fc_flags = cfg->nh_flags,
+		.fc_encap = cfg->nh_encap,
+		.fc_encap_type = cfg->nh_encap_type,
+	};
+	int err = -EINVAL;
+
+	if (!ipv6_addr_any(&cfg->gw.ipv6))
+		fib6_cfg.fc_flags |= RTF_GATEWAY;
+
+	/* sets nh_dev if successful */
+	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
+				      extack);
+	if (err)
+		ipv6_stub->fib6_nh_release(fib6_nh);
+	else
+		nh->nh_flags = fib6_nh->fib_nh_flags;
+
+	return err;
+}
+
+static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
+				      struct netlink_ext_ack *extack)
+{
+	struct nh_info *nhi;
+	struct nexthop *nh;
+	int err = 0;
+
+	nh = nexthop_alloc();
+	if (!nh)
+		return ERR_PTR(-ENOMEM);
+
+	nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
+	if (!nhi) {
+		kfree(nh);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	nh->nh_flags = cfg->nh_flags;
+	nh->net = net;
+
+	nhi->nh_parent = nh;
+	nhi->family = cfg->nh_family;
+	nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
+
+	if (cfg->nh_blackhole) {
+		nhi->reject_nh = 1;
+		cfg->nh_ifindex = net->loopback_dev->ifindex;
+	}
+
+	switch (cfg->nh_family) {
+	case AF_INET:
+		err = nh_create_ipv4(net, nh, nhi, cfg, extack);
+		break;
+	case AF_INET6:
+		err = nh_create_ipv6(net, nh, nhi, cfg, extack);
+		break;
+	}
+
+	if (err) {
+		kfree(nhi);
+		kfree(nh);
+		return ERR_PTR(err);
+	}
+
+	/* add the entry to the device based hash */
+	nexthop_devhash_add(net, nhi);
+
+	rcu_assign_pointer(nh->nh_info, nhi);
+
+	return nh;
+}
+
+/* called with rtnl lock held */
+static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
+				   struct netlink_ext_ack *extack)
+{
+	struct nexthop *nh;
+	int err;
+
+	if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
+		NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!cfg->nh_id) {
+		cfg->nh_id = nh_find_unused_id(net);
+		if (!cfg->nh_id) {
+			NL_SET_ERR_MSG(extack, "No unused id");
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	if (cfg->nh_grp)
+		nh = nexthop_create_group(net, cfg);
+	else
+		nh = nexthop_create(net, cfg, extack);
+
+	if (IS_ERR(nh))
+		return nh;
+
+	refcount_set(&nh->refcnt, 1);
+	nh->id = cfg->nh_id;
+	nh->protocol = cfg->nh_protocol;
+	nh->net = net;
+
+	err = insert_nexthop(net, nh, cfg, extack);
+	if (err) {
+		__remove_nexthop(net, nh, NULL);
+		nexthop_put(nh);
+		nh = ERR_PTR(err);
+	}
+
+	return nh;
+}
+
+static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct nh_config *cfg,
+			    struct netlink_ext_ack *extack)
+{
+	struct nhmsg *nhm = nlmsg_data(nlh);
+	struct nlattr *tb[NHA_MAX + 1];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  extack);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (nhm->resvd || nhm->nh_scope) {
+		NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
+		goto out;
+	}
+	if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
+		NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
+		goto out;
+	}
+
+	switch (nhm->nh_family) {
+	case AF_INET:
+	case AF_INET6:
+		break;
+	case AF_UNSPEC:
+		if (tb[NHA_GROUP])
+			break;
+		/* fallthrough */
+	default:
+		NL_SET_ERR_MSG(extack, "Invalid address family");
+		goto out;
+	}
+
+	if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
+		NL_SET_ERR_MSG(extack, "Invalid attributes in request");
+		goto out;
+	}
+
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->nlflags = nlh->nlmsg_flags;
+	cfg->nlinfo.portid = NETLINK_CB(skb).portid;
+	cfg->nlinfo.nlh = nlh;
+	cfg->nlinfo.nl_net = net;
+
+	cfg->nh_family = nhm->nh_family;
+	cfg->nh_protocol = nhm->nh_protocol;
+	cfg->nh_flags = nhm->nh_flags;
+
+	if (tb[NHA_ID])
+		cfg->nh_id = nla_get_u32(tb[NHA_ID]);
+
+	if (tb[NHA_GROUP]) {
+		if (nhm->nh_family != AF_UNSPEC) {
+			NL_SET_ERR_MSG(extack, "Invalid family for group");
+			goto out;
+		}
+		cfg->nh_grp = tb[NHA_GROUP];
+
+		cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
+		if (tb[NHA_GROUP_TYPE])
+			cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
+
+		if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
+			NL_SET_ERR_MSG(extack, "Invalid group type");
+			goto out;
+		}
+		err = nh_check_attr_group(net, tb, extack);
+
+		/* no other attributes should be set */
+		goto out;
+	}
+
+	if (tb[NHA_BLACKHOLE]) {
+		if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
+		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
+			NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
+			goto out;
+		}
+
+		cfg->nh_blackhole = 1;
+		err = 0;
+		goto out;
+	}
+
+	if (!tb[NHA_OIF]) {
+		NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
+		goto out;
+	}
+
+	cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+	if (cfg->nh_ifindex)
+		cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+
+	if (!cfg->dev) {
+		NL_SET_ERR_MSG(extack, "Invalid device index");
+		goto out;
+	} else if (!(cfg->dev->flags & IFF_UP)) {
+		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+		err = -ENETDOWN;
+		goto out;
+	} else if (!netif_carrier_ok(cfg->dev)) {
+		NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	err = -EINVAL;
+	if (tb[NHA_GATEWAY]) {
+		struct nlattr *gwa = tb[NHA_GATEWAY];
+
+		switch (cfg->nh_family) {
+		case AF_INET:
+			if (nla_len(gwa) != sizeof(u32)) {
+				NL_SET_ERR_MSG(extack, "Invalid gateway");
+				goto out;
+			}
+			cfg->gw.ipv4 = nla_get_be32(gwa);
+			break;
+		case AF_INET6:
+			if (nla_len(gwa) != sizeof(struct in6_addr)) {
+				NL_SET_ERR_MSG(extack, "Invalid gateway");
+				goto out;
+			}
+			cfg->gw.ipv6 = nla_get_in6_addr(gwa);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack,
+				       "Unknown address family for gateway");
+			goto out;
+		}
+	} else {
+		/* device only nexthop (no gateway) */
+		if (cfg->nh_flags & RTNH_F_ONLINK) {
+			NL_SET_ERR_MSG(extack,
+				       "ONLINK flag can not be set for nexthop without a gateway");
+			goto out;
+		}
+	}
+
+	if (tb[NHA_ENCAP]) {
+		cfg->nh_encap = tb[NHA_ENCAP];
+
+		if (!tb[NHA_ENCAP_TYPE]) {
+			NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
+			goto out;
+		}
+
+		cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
+		err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
+		if (err < 0)
+			goto out;
+
+	} else if (tb[NHA_ENCAP_TYPE]) {
+		NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
+		goto out;
+	}
+
+
+	err = 0;
+out:
+	return err;
+}
+
+/* rtnl */
+static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nh_config cfg;
+	struct nexthop *nh;
+	int err;
+
+	err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
+	if (!err) {
+		nh = nexthop_add(net, &cfg, extack);
+		if (IS_ERR(nh))
+			err = PTR_ERR(nh);
+	}
+
+	return err;
+}
+
+static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
+				struct netlink_ext_ack *extack)
+{
+	struct nhmsg *nhm = nlmsg_data(nlh);
+	struct nlattr *tb[NHA_MAX + 1];
+	int err, i;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  extack);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	for (i = 0; i < __NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NHA_ID:
+			break;
+		default:
+			NL_SET_ERR_MSG_ATTR(extack, tb[i],
+					    "Unexpected attribute in request");
+			goto out;
+		}
+	}
+	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header");
+		goto out;
+	}
+
+	if (!tb[NHA_ID]) {
+		NL_SET_ERR_MSG(extack, "Nexthop id is missing");
+		goto out;
+	}
+
+	*id = nla_get_u32(tb[NHA_ID]);
+	if (!(*id))
+		NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+	else
+		err = 0;
+out:
+	return err;
+}
+
+/* rtnl */
+static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nl_info nlinfo = {
+		.nlh = nlh,
+		.nl_net = net,
+		.portid = NETLINK_CB(skb).portid,
+	};
+	struct nexthop *nh;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_del_req(nlh, &id, extack);
+	if (err)
+		return err;
+
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		return -ENOENT;
+
+	remove_nexthop(net, nh, &nlinfo);
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+			   struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *skb = NULL;
+	struct nexthop *nh;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_del_req(nlh, &id, extack);
+	if (err)
+		return err;
+
+	err = -ENOBUFS;
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	err = -ENOENT;
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		goto errout_free;
+
+	err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
+			   nlh->nlmsg_seq, 0);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		goto errout_free;
+	}
+
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+	return err;
+errout_free:
+	kfree_skb(skb);
+	goto out;
+}
+
+static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
+			     bool group_filter, u8 family)
+{
+	const struct net_device *dev;
+	const struct nh_info *nhi;
+
+	if (group_filter && !nh->is_group)
+		return true;
+
+	if (!dev_idx && !master_idx && !family)
+		return false;
+
+	if (nh->is_group)
+		return true;
+
+	nhi = rtnl_dereference(nh->nh_info);
+	if (family && nhi->family != family)
+		return true;
+
+	dev = nhi->fib_nhc.nhc_dev;
+	if (dev_idx && (!dev || dev->ifindex != dev_idx))
+		return true;
+
+	if (master_idx) {
+		struct net_device *master;
+
+		if (!dev)
+			return true;
+
+		master = netdev_master_upper_dev_get((struct net_device *)dev);
+		if (!master || master->ifindex != master_idx)
+			return true;
+	}
+
+	return false;
+}
+
+static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
+			     int *master_idx, bool *group_filter,
+			     struct netlink_callback *cb)
+{
+	struct netlink_ext_ack *extack = cb->extack;
+	struct nlattr *tb[NHA_MAX + 1];
+	struct nhmsg *nhm;
+	int err, i;
+	u32 idx;
+
+	err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+			  NULL);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= NHA_MAX; ++i) {
+		if (!tb[i])
+			continue;
+
+		switch (i) {
+		case NHA_OIF:
+			idx = nla_get_u32(tb[i]);
+			if (idx > INT_MAX) {
+				NL_SET_ERR_MSG(extack, "Invalid device index");
+				return -EINVAL;
+			}
+			*dev_idx = idx;
+			break;
+		case NHA_MASTER:
+			idx = nla_get_u32(tb[i]);
+			if (idx > INT_MAX) {
+				NL_SET_ERR_MSG(extack, "Invalid master device index");
+				return -EINVAL;
+			}
+			*master_idx = idx;
+			break;
+		case NHA_GROUPS:
+			*group_filter = true;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
+			return -EINVAL;
+		}
+	}
+
+	nhm = nlmsg_data(nlh);
+	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+		NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nhmsg *nhm = nlmsg_data(cb->nlh);
+	int dev_filter_idx = 0, master_idx = 0;
+	struct net *net = sock_net(skb->sk);
+	struct rb_root *root = &net->nexthop.rb_root;
+	bool group_filter = false;
+	struct rb_node *node;
+	int idx = 0, s_idx;
+	int err;
+
+	err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
+				&group_filter, cb);
+	if (err < 0)
+		return err;
+
+	s_idx = cb->args[0];
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		struct nexthop *nh;
+
+		if (idx < s_idx)
+			goto cont;
+
+		nh = rb_entry(node, struct nexthop, rb_node);
+		if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
+				     group_filter, nhm->nh_family))
+			goto cont;
+
+		err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
+				   NETLINK_CB(cb->skb).portid,
+				   cb->nlh->nlmsg_seq, NLM_F_MULTI);
+		if (err < 0) {
+			if (likely(skb->len))
+				goto out;
+
+			goto out_err;
+		}
+cont:
+		idx++;
+	}
+
+out:
+	err = skb->len;
+out_err:
+	cb->args[0] = idx;
+	cb->seq = net->nexthop.seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+	return err;
+}
+
+static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+	unsigned int hash = nh_dev_hashfn(dev->ifindex);
+	struct net *net = dev_net(dev);
+	struct hlist_head *head = &net->nexthop.devhash[hash];
+	struct hlist_node *n;
+	struct nh_info *nhi;
+
+	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+		if (nhi->fib_nhc.nhc_dev == dev) {
+			if (nhi->family == AF_INET)
+				fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
+						   orig_mtu);
+		}
+	}
+}
+
+/* rtnl */
+static int nh_netdev_event(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_info_ext *info_ext;
+
+	switch (event) {
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		nexthop_flush_dev(dev);
+		break;
+	case NETDEV_CHANGE:
+		if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
+			nexthop_flush_dev(dev);
+		break;
+	case NETDEV_CHANGEMTU:
+		info_ext = ptr;
+		nexthop_sync_mtu(dev, info_ext->ext.mtu);
+		rt_cache_flush(dev_net(dev));
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nh_netdev_notifier = {
+	.notifier_call = nh_netdev_event,
+};
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+	rtnl_lock();
+	flush_all_nexthops(net);
+	rtnl_unlock();
+	kfree(net->nexthop.devhash);
+}
+
+static int __net_init nexthop_net_init(struct net *net)
+{
+	size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
+
+	net->nexthop.rb_root = RB_ROOT;
+	net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
+	if (!net->nexthop.devhash)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct pernet_operations nexthop_net_ops = {
+	.init = nexthop_net_init,
+	.exit = nexthop_net_exit,
+};
+
+static int __init nexthop_init(void)
+{
+	register_pernet_subsys(&nexthop_net_ops);
+
+	register_netdevice_notifier(&nh_netdev_notifier);
+
+	rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
+		      rtm_dump_nexthop, 0);
+
+	rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+	return 0;
+}
+subsys_initcall(nexthop_init);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index c3610b37bb4c..4746f963c439 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -72,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "RAW: inuse %d\n",
 		   sock_prot_inuse_get(net, &raw_prot));
 	seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
-		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
-		   frag_mem_limit(&net->ipv4.frags));
+		   atomic_read(&net->ipv4.fqdir->rhashtable.nelems),
+		   frag_mem_limit(net->ipv4.fqdir));
 	return 0;
 }
 
@@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
 	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
 	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+	SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 875867b64d6a..90f09e47198b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
 	return ret;
 }
 
+static int sscanf_key(char *buf, __le32 *key)
+{
+	u32 user_key[4];
+	int i, ret = 0;
+
+	if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1,
+		   user_key + 2, user_key + 3) != 4) {
+		ret = -EINVAL;
+	} else {
+		for (i = 0; i < ARRAY_SIZE(user_key); i++)
+			key[i] = cpu_to_le32(user_key[i]);
+	}
+	pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+		 user_key[0], user_key[1], user_key[2], user_key[3], buf, ret);
+
+	return ret;
+}
+
 static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
 	    ipv4.sysctl_tcp_fastopen);
-	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
-	struct tcp_fastopen_context *ctxt;
-	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
-	__le32 key[4];
-	int ret, i;
+	/* maxlen to print the list of keys in hex (*2), with dashes
+	 * separating doublewords and a comma in between keys.
+	 */
+	struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+					    2 * TCP_FASTOPEN_KEY_MAX) +
+					    (TCP_FASTOPEN_KEY_MAX * 5)) };
+	struct tcp_fastopen_context *ctx;
+	u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
+	__le32 key[TCP_FASTOPEN_KEY_MAX * 4];
+	char *backup_data;
+	int ret, i = 0, off = 0, n_keys = 0;
 
 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 	if (!tbl.data)
 		return -ENOMEM;
 
 	rcu_read_lock();
-	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
-	if (ctxt)
-		memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
-	else
-		memset(key, 0, sizeof(key));
+	ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+	if (ctx) {
+		n_keys = tcp_fastopen_context_len(ctx);
+		memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
+	}
 	rcu_read_unlock();
 
-	for (i = 0; i < ARRAY_SIZE(key); i++)
+	if (!n_keys) {
+		memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
+		n_keys = 1;
+	}
+
+	for (i = 0; i < n_keys * 4; i++)
 		user_key[i] = le32_to_cpu(key[i]);
 
-	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
-		user_key[0], user_key[1], user_key[2], user_key[3]);
+	for (i = 0; i < n_keys; i++) {
+		off += snprintf(tbl.data + off, tbl.maxlen - off,
+				"%08x-%08x-%08x-%08x",
+				user_key[i * 4],
+				user_key[i * 4 + 1],
+				user_key[i * 4 + 2],
+				user_key[i * 4 + 3]);
+		if (i + 1 < n_keys)
+			off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
+	}
+
 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
-		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
-			   user_key + 2, user_key + 3) != 4) {
+		backup_data = strchr(tbl.data, ',');
+		if (backup_data) {
+			*backup_data = '\0';
+			backup_data++;
+		}
+		if (sscanf_key(tbl.data, key)) {
 			ret = -EINVAL;
 			goto bad_key;
 		}
-
-		for (i = 0; i < ARRAY_SIZE(user_key); i++)
-			key[i] = cpu_to_le32(user_key[i]);
-
+		if (backup_data) {
+			if (sscanf_key(backup_data, key + 4)) {
+				ret = -EINVAL;
+				goto bad_key;
+			}
+		}
 		tcp_fastopen_reset_cipher(net, NULL, key,
+					  backup_data ? key + 4 : NULL,
 					  TCP_FASTOPEN_KEY_LENGTH);
 	}
 
 bad_key:
-	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
-		user_key[0], user_key[1], user_key[2], user_key[3],
-	       (char *)tbl.data, ret);
 	kfree(tbl.data);
 	return ret;
 }
@@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = {
 		.procname	= "tcp_fastopen_key",
 		.mode		= 0600,
 		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
-		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+		/* maxlen to print the list of keys in hex (*2), with dashes
+		 * separating doublewords and a comma in between keys.
+		 */
+		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH *
+				   2 * TCP_FASTOPEN_KEY_MAX) +
+				   (TCP_FASTOPEN_KEY_MAX * 5)),
 		.proc_handler	= proc_tcp_fastopen_key,
 	},
 	{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 53d61ca3ac4b..27ce13ece510 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		return err;
 	}
 	case TCP_FASTOPEN_KEY: {
-		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
+		__u8 *backup_key = NULL;
 
-		if (optlen != sizeof(key))
+		/* Allow a backup key as well to facilitate key rotation
+		 * First key is the active one.
+		 */
+		if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
+		    optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
 			return -EINVAL;
 
 		if (copy_from_user(key, optval, optlen))
 			return -EFAULT;
 
-		return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+		if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
+			backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
+
+		return tcp_fastopen_reset_cipher(net, sk, key, backup_key,
+						 TCP_FASTOPEN_KEY_LENGTH);
 	}
 	default:
 		/* fallthru */
@@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return 0;
 
 	case TCP_FASTOPEN_KEY: {
-		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
 		struct tcp_fastopen_context *ctx;
+		unsigned int key_len = 0;
 
 		if (get_user(len, optlen))
 			return -EFAULT;
 
 		rcu_read_lock();
 		ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
-		if (ctx)
-			memcpy(key, ctx->key, sizeof(key));
-		else
-			len = 0;
+		if (ctx) {
+			key_len = tcp_fastopen_context_len(ctx) *
+					TCP_FASTOPEN_KEY_LENGTH;
+			memcpy(&key[0], &ctx->key[0], key_len);
+		}
 		rcu_read_unlock();
 
-		len = min_t(unsigned int, len, sizeof(key));
+		len = min_t(unsigned int, len, key_len);
 		if (put_user(len, optlen))
 			return -EFAULT;
 		if (copy_to_user(optval, key, len))
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..8e1580485c9e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net)
 	 * for a valid cookie, so this is an acceptable risk.
 	 */
 	get_random_bytes(key, sizeof(key));
-	tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
+	tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key));
 }
 
 static void tcp_fastopen_ctx_free(struct rcu_head *head)
 {
 	struct tcp_fastopen_context *ctx =
 	    container_of(head, struct tcp_fastopen_context, rcu);
-	crypto_free_cipher(ctx->tfm);
+	int i;
+
+	/* We own ctx, thus no need to hold the Fastopen-lock */
+	for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) {
+		if (ctx->tfm[i])
+			crypto_free_cipher(ctx->tfm[i]);
+	}
 	kfree(ctx);
 }
 
@@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net)
 		call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
 }
 
+struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key,
+						    void *backup_key,
+						    unsigned int len)
+{
+	struct tcp_fastopen_context *new_ctx;
+	void *key = primary_key;
+	int err, i;
+
+	new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
+	if (!new_ctx)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++)
+		new_ctx->tfm[i] = NULL;
+	for (i = 0; i < (backup_key ? 2 : 1); i++) {
+		new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0);
+		if (IS_ERR(new_ctx->tfm[i])) {
+			err = PTR_ERR(new_ctx->tfm[i]);
+			new_ctx->tfm[i] = NULL;
+			pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
+			goto out;
+		}
+		err = crypto_cipher_setkey(new_ctx->tfm[i], key, len);
+		if (err) {
+			pr_err("TCP: TFO cipher key error: %d\n", err);
+			goto out;
+		}
+		memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len);
+		key = backup_key;
+	}
+	return new_ctx;
+out:
+	tcp_fastopen_ctx_free(&new_ctx->rcu);
+	return ERR_PTR(err);
+}
+
 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
-			      void *key, unsigned int len)
+			      void *primary_key, void *backup_key,
+			      unsigned int len)
 {
 	struct tcp_fastopen_context *ctx, *octx;
 	struct fastopen_queue *q;
-	int err;
+	int err = 0;
 
-	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-	ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
-	if (IS_ERR(ctx->tfm)) {
-		err = PTR_ERR(ctx->tfm);
-error:		kfree(ctx);
-		pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
-		return err;
+	ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
 	}
-	err = crypto_cipher_setkey(ctx->tfm, key, len);
-	if (err) {
-		pr_err("TCP: TFO cipher key error: %d\n", err);
-		crypto_free_cipher(ctx->tfm);
-		goto error;
-	}
-	memcpy(ctx->key, key, len);
-
-
 	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
 	if (sk) {
 		q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
@@ -108,28 +135,42 @@ error:		kfree(ctx);
 
 	if (octx)
 		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+out:
 	return err;
 }
 
-static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
-				      struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
+					     struct sk_buff *syn,
+					     struct crypto_cipher *tfm,
+					     struct tcp_fastopen_cookie *foc)
 {
-	struct tcp_fastopen_context *ctx;
-	bool ok = false;
-
-	rcu_read_lock();
+	if (req->rsk_ops->family == AF_INET) {
+		const struct iphdr *iph = ip_hdr(syn);
+		__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
 
-	ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
-	if (!ctx)
-		ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+		crypto_cipher_encrypt_one(tfm, foc->val, (void *)path);
+		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+		return true;
+	}
 
-	if (ctx) {
-		crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (req->rsk_ops->family == AF_INET6) {
+		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+		struct tcp_fastopen_cookie tmp;
+		struct in6_addr *buf;
+		int i;
+
+		crypto_cipher_encrypt_one(tfm, tmp.val,
+					  (void *)&ip6h->saddr);
+		buf = &tmp.addr;
+		for (i = 0; i < 4; i++)
+			buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+		crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf);
 		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
-		ok = true;
+		return true;
 	}
-	rcu_read_unlock();
-	return ok;
+#endif
+	return false;
 }
 
 /* Generate the fastopen cookie by doing aes128 encryption on both
@@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
  *
  * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
  */
-static bool tcp_fastopen_cookie_gen(struct sock *sk,
+static void tcp_fastopen_cookie_gen(struct sock *sk,
 				    struct request_sock *req,
 				    struct sk_buff *syn,
 				    struct tcp_fastopen_cookie *foc)
 {
-	if (req->rsk_ops->family == AF_INET) {
-		const struct iphdr *iph = ip_hdr(syn);
-
-		__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
-		return __tcp_fastopen_cookie_gen(sk, path, foc);
-	}
-
-#if IS_ENABLED(CONFIG_IPV6)
-	if (req->rsk_ops->family == AF_INET6) {
-		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
-		struct tcp_fastopen_cookie tmp;
-
-		if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
-			struct in6_addr *buf = &tmp.addr;
-			int i;
+	struct tcp_fastopen_context *ctx;
 
-			for (i = 0; i < 4; i++)
-				buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
-			return __tcp_fastopen_cookie_gen(sk, buf, foc);
-		}
-	}
-#endif
-	return false;
+	rcu_read_lock();
+	ctx = tcp_fastopen_get_ctx(sk);
+	if (ctx)
+		__tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc);
+	rcu_read_unlock();
 }
 
-
 /* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
  * queue this additional data / FIN.
  */
@@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 		tcp_fin(sk);
 }
 
+/* returns 0 - no key match, 1 for primary, 2 for backup */
+static int tcp_fastopen_cookie_gen_check(struct sock *sk,
+					 struct request_sock *req,
+					 struct sk_buff *syn,
+					 struct tcp_fastopen_cookie *orig,
+					 struct tcp_fastopen_cookie *valid_foc)
+{
+	struct tcp_fastopen_cookie search_foc = { .len = -1 };
+	struct tcp_fastopen_cookie *foc = valid_foc;
+	struct tcp_fastopen_context *ctx;
+	int i, ret = 0;
+
+	rcu_read_lock();
+	ctx = tcp_fastopen_get_ctx(sk);
+	if (!ctx)
+		goto out;
+	for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
+		__tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc);
+		if (tcp_fastopen_cookie_match(foc, orig)) {
+			ret = i + 1;
+			goto out;
+		}
+		foc = &search_foc;
+	}
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 static struct sock *tcp_fastopen_create_child(struct sock *sk,
 					      struct sk_buff *skb,
 					      struct request_sock *req)
@@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sock *child;
+	int ret = 0;
 
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	    tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
 
-	if (foc->len >= 0 &&  /* Client presents or requests a cookie */
-	    tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
-	    foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
-	    foc->len == valid_foc.len &&
-	    !memcmp(foc->val, valid_foc.val, foc->len)) {
-		/* Cookie is valid. Create a (full) child socket to accept
-		 * the data in SYN before returning a SYN-ACK to ack the
-		 * data. If we fail to create the socket, fall back and
-		 * ack the ISN only but includes the same cookie.
-		 *
-		 * Note: Data-less SYN with valid cookie is allowed to send
-		 * data in SYN_RECV state.
-		 */
+	if (foc->len == 0) {
+		/* Client requests a cookie. */
+		tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+	} else if (foc->len > 0) {
+		ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
+						    &valid_foc);
+		if (!ret) {
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+		} else {
+			/* Cookie is valid. Create a (full) child socket to
+			 * accept the data in SYN before returning a SYN-ACK to
+			 * ack the data. If we fail to create the socket, fall
+			 * back and ack the ISN only but includes the same
+			 * cookie.
+			 *
+			 * Note: Data-less SYN with valid cookie is allowed to
+			 * send data in SYN_RECV state.
+			 */
 fastopen:
-		child = tcp_fastopen_create_child(sk, skb, req);
-		if (child) {
-			foc->len = -1;
+			child = tcp_fastopen_create_child(sk, skb, req);
+			if (child) {
+				if (ret == 2) {
+					valid_foc.exp = foc->exp;
+					*foc = valid_foc;
+					NET_INC_STATS(sock_net(sk),
+						      LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
+				} else {
+					foc->len = -1;
+				}
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPFASTOPENPASSIVE);
+				return child;
+			}
 			NET_INC_STATS(sock_net(sk),
-				      LINUX_MIB_TCPFASTOPENPASSIVE);
-			return child;
+				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 		}
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (foc->len > 0) /* Client presents an invalid cookie */
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-
+	}
 	valid_foc.exp = foc->exp;
 	*foc = valid_foc;
 	return NULL;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b51630ddb728..6b673d4f5ca9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,9 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
+		if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
 			continue;
-		if (no_gw && rt->fib6_nh.fib_nh_gw_family)
+		if (no_gw && rt->fib6_nh->fib_nh_gw_family)
 			continue;
 		if ((rt->fib6_flags & flags) != flags)
 			continue;
@@ -6354,16 +6354,16 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct fib6_info *rt = ifa->rt;
+			struct fib6_nh *nh = ifa->rt->fib6_nh;
 			int cpu;
 
 			rcu_read_lock();
 			ifa->rt->dst_nopolicy = val ? true : false;
-			if (rt->rt6i_pcpu) {
+			if (nh->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
 					struct rt6_info **rtp;
 
-					rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+					rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
 					addrconf_set_nopolicy(*rtp, val);
 				}
 			}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5b1246635e02..783f3c1466da 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -183,6 +183,11 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 	return -EAFNOSUPPORT;
 }
 
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+{
+	return -EAFNOSUPPORT;
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
 	.ipv6_route_input  = eafnosupport_ipv6_route_input,
@@ -192,6 +197,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.fib6_select_path  = eafnosupport_fib6_select_path,
 	.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
 	.fib6_nh_init	   = eafnosupport_fib6_nh_init,
+	.ip6_del_rt	   = eafnosupport_ip6_del_rt,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c04ae282f4e4..cc6f8d0c625a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -926,6 +926,9 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.fib6_nh_init	   = fib6_nh_init,
 	.fib6_nh_release   = fib6_nh_release,
+	.fib6_update_sernum = fib6_update_sernum_stub,
+	.fib6_rt_update	   = fib6_rt_update,
+	.ip6_del_rt	   = ip6_del_rt,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 008421b550c6..cdfb8500ccae 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -147,20 +147,18 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
 {
 	struct fib6_info *f6i;
+	size_t sz = sizeof(*f6i);
 
-	f6i = kzalloc(sizeof(*f6i), gfp_flags);
+	if (with_fib6_nh)
+		sz += sizeof(struct fib6_nh);
+
+	f6i = kzalloc(sz, gfp_flags);
 	if (!f6i)
 		return NULL;
 
-	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
-	if (!f6i->rt6i_pcpu) {
-		kfree(f6i);
-		return NULL;
-	}
-
 	INIT_LIST_HEAD(&f6i->fib6_siblings);
 	refcount_set(&f6i->fib6_ref, 1);
 
@@ -170,36 +168,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
-	struct rt6_exception_bucket *bucket;
 
 	WARN_ON(f6i->fib6_node);
 
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
-	kfree(bucket);
-
-	if (f6i->rt6i_pcpu) {
-		int cpu;
-
-		for_each_possible_cpu(cpu) {
-			struct rt6_info **ppcpu_rt;
-			struct rt6_info *pcpu_rt;
-
-			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
-			pcpu_rt = *ppcpu_rt;
-			if (pcpu_rt) {
-				dst_dev_put(&pcpu_rt->dst);
-				dst_release(&pcpu_rt->dst);
-				*ppcpu_rt = NULL;
-			}
-		}
-
-		free_percpu(f6i->rt6i_pcpu);
-	}
-
-	fib6_nh_release(&f6i->fib6_nh);
-
+	fib6_nh_release(f6i->fib6_nh);
 	ip_fib_metrics_put(f6i->fib6_metrics);
-
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -393,10 +366,10 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 	return call_fib6_notifier(nb, net, event_type, &info.info);
 }
 
-static int call_fib6_entry_notifiers(struct net *net,
-				     enum fib_event_type event_type,
-				     struct fib6_info *rt,
-				     struct netlink_ext_ack *extack)
+int call_fib6_entry_notifiers(struct net *net,
+			      enum fib_event_type event_type,
+			      struct fib6_info *rt,
+			      struct netlink_ext_ack *extack)
 {
 	struct fib6_entry_notifier_info info = {
 		.info.extack = extack,
@@ -899,16 +872,14 @@ insert_above:
 	return ln;
 }
 
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
-				const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+				  const struct fib6_info *match,
+				  const struct fib6_table *table)
 {
 	int cpu;
 
-	/* Make sure rt6_make_pcpu_route() wont add other percpu routes
-	 * while we are cleaning them here.
-	 */
-	f6i->fib6_destroying = 1;
-	mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+	if (!fib6_nh->rt6i_pcpu)
+		return;
 
 	/* release the reference to this fib entry from
 	 * all of its cached pcpu routes
@@ -917,9 +888,15 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 		struct rt6_info **ppcpu_rt;
 		struct rt6_info *pcpu_rt;
 
-		ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
 		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
+
+		/* only dropping the 'from' reference if the cached route
+		 * is using 'match'. The cached pcpu_rt->from only changes
+		 * from a fib6_info to NULL (ip6_dst_destroy); it can never
+		 * change from one fib6_info reference to another
+		 */
+		if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
 			struct fib6_info *from;
 
 			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
@@ -928,13 +905,27 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 	}
 }
 
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+				const struct fib6_table *table)
+{
+	struct fib6_nh *fib6_nh;
+
+	/* Make sure rt6_make_pcpu_route() wont add other percpu routes
+	 * while we are cleaning them here.
+	 */
+	f6i->fib6_destroying = 1;
+	mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
+	fib6_nh = f6i->fib6_nh;
+	__fib6_drop_pcpu_from(fib6_nh, f6i, table);
+}
+
 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
 	struct fib6_table *table = rt->fib6_table;
 
-	if (rt->rt6i_pcpu)
-		fib6_drop_pcpu_from(rt, table);
+	fib6_drop_pcpu_from(rt, table);
 
 	if (refcount_read(&rt->fib6_ref) != 1) {
 		/* This route is used as dummy address holder in some split
@@ -1222,6 +1213,14 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
 }
 
+/* allow ipv4 to update sernum via ipv6_stub */
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
+{
+	spin_lock_bh(&f6i->fib6_table->tb6_lock);
+	fib6_update_sernum_upto_root(net, f6i);
+	spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
 /*
  *	Add routing information to the routing tree.
  *	<destination addr>/<source addr>
@@ -2306,14 +2305,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 #else
 	seq_puts(seq, "00000000000000000000000000000000 00 ");
 #endif
-	if (rt->fib6_nh.fib_nh_gw_family) {
+	if (rt->fib6_nh->fib_nh_gw_family) {
 		flags |= RTF_GATEWAY;
-		seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+		seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
 	} else {
 		seq_puts(seq, "00000000000000000000000000000000");
 	}
 
-	dev = rt->fib6_nh.fib_nh_dev;
+	dev = rt->fib6_nh->fib_nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
 		   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
 		   flags, dev ? dev->name : "");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9e43323e667..0bb6b6de7962 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -592,6 +592,170 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+		      u8 nexthdr, __be32 frag_id,
+		      struct ip6_fraglist_iter *iter)
+{
+	unsigned int first_len;
+	struct frag_hdr *fh;
+
+	/* BUILD HEADER */
+	*prevhdr = NEXTHDR_FRAGMENT;
+	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+	if (!iter->tmp_hdr)
+		return -ENOMEM;
+
+	iter->frag_list = skb_shinfo(skb)->frag_list;
+	iter->frag = iter->frag_list;
+	skb_frag_list_init(skb);
+
+	iter->offset = 0;
+	iter->hlen = hlen;
+	iter->frag_id = frag_id;
+	iter->nexthdr = nexthdr;
+
+	__skb_pull(skb, hlen);
+	fh = __skb_push(skb, sizeof(struct frag_hdr));
+	__skb_push(skb, hlen);
+	skb_reset_network_header(skb);
+	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+	fh->nexthdr = nexthdr;
+	fh->reserved = 0;
+	fh->frag_off = htons(IP6_MF);
+	fh->identification = frag_id;
+
+	first_len = skb_pagelen(skb);
+	skb->data_len = first_len - skb_headlen(skb);
+	skb->len = first_len;
+	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+	return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+			  struct ip6_fraglist_iter *iter)
+{
+	struct sk_buff *frag = iter->frag;
+	unsigned int hlen = iter->hlen;
+	struct frag_hdr *fh;
+
+	frag->ip_summed = CHECKSUM_NONE;
+	skb_reset_transport_header(frag);
+	fh = __skb_push(frag, sizeof(struct frag_hdr));
+	__skb_push(frag, hlen);
+	skb_reset_network_header(frag);
+	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+	fh->nexthdr = iter->nexthdr;
+	fh->reserved = 0;
+	fh->frag_off = htons(iter->offset);
+	if (frag->next)
+		fh->frag_off |= htons(IP6_MF);
+	fh->identification = iter->frag_id;
+	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+	ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+	state->prevhdr = prevhdr;
+	state->nexthdr = nexthdr;
+	state->frag_id = frag_id;
+
+	state->hlen = hlen;
+	state->mtu = mtu;
+
+	state->left = skb->len - hlen;	/* Space per frame */
+	state->ptr = hlen;		/* Where to start from */
+
+	state->hroom = hdr_room;
+	state->troom = needed_tailroom;
+
+	state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+	struct sk_buff *frag;
+	struct frag_hdr *fh;
+	unsigned int len;
+
+	len = state->left;
+	/* IF: it doesn't fit, use 'mtu' - the data space left */
+	if (len > state->mtu)
+		len = state->mtu;
+	/* IF: we are not sending up to and including the packet end
+	   then align the next start on an eight byte boundary */
+	if (len < state->left)
+		len &= ~7;
+
+	/* Allocate buffer */
+	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+			 state->hroom + state->troom, GFP_ATOMIC);
+	if (!frag)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 *	Set up data on packet
+	 */
+
+	ip6_copy_metadata(frag, skb);
+	skb_reserve(frag, state->hroom);
+	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+	skb_reset_network_header(frag);
+	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+	frag->transport_header = (frag->network_header + state->hlen +
+				  sizeof(struct frag_hdr));
+
+	/*
+	 *	Charge the memory for the fragment to any owner
+	 *	it might possess
+	 */
+	if (skb->sk)
+		skb_set_owner_w(frag, skb->sk);
+
+	/*
+	 *	Copy the packet header into the new buffer.
+	 */
+	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+	fragnexthdr_offset = skb_network_header(frag);
+	fragnexthdr_offset += prevhdr - skb_network_header(skb);
+	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+	/*
+	 *	Build fragment header.
+	 */
+	fh->nexthdr = state->nexthdr;
+	fh->reserved = 0;
+	fh->identification = state->frag_id;
+
+	/*
+	 *	Copy a block of the IP datagram.
+	 */
+	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+			     len));
+	state->left -= len;
+
+	fh->frag_off = htons(state->offset);
+	if (state->left > 0)
+		fh->frag_off |= htons(IP6_MF);
+	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+	state->ptr += len;
+	state->offset += len;
+
+	return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
+
 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
@@ -599,12 +763,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 				inet6_sk(skb->sk) : NULL;
-	struct ipv6hdr *tmp_hdr;
-	struct frag_hdr *fh;
-	unsigned int mtu, hlen, left, len, nexthdr_offset;
-	int hroom, troom;
+	struct ip6_frag_state state;
+	unsigned int mtu, hlen, nexthdr_offset;
+	int hroom, err = 0;
 	__be32 frag_id;
-	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
 
 	err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -651,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 	if (skb_has_frag_list(skb)) {
 		unsigned int first_len = skb_pagelen(skb);
+		struct ip6_fraglist_iter iter;
 		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
@@ -678,74 +841,29 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			skb->truesize -= frag->truesize;
 		}
 
-		err = 0;
-		offset = 0;
-		/* BUILD HEADER */
-
-		*prevhdr = NEXTHDR_FRAGMENT;
-		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
-		if (!tmp_hdr) {
-			err = -ENOMEM;
+		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+					&iter);
+		if (err < 0)
 			goto fail;
-		}
-		frag = skb_shinfo(skb)->frag_list;
-		skb_frag_list_init(skb);
-
-		__skb_pull(skb, hlen);
-		fh = __skb_push(skb, sizeof(struct frag_hdr));
-		__skb_push(skb, hlen);
-		skb_reset_network_header(skb);
-		memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
-		fh->nexthdr = nexthdr;
-		fh->reserved = 0;
-		fh->frag_off = htons(IP6_MF);
-		fh->identification = frag_id;
-
-		first_len = skb_pagelen(skb);
-		skb->data_len = first_len - skb_headlen(skb);
-		skb->len = first_len;
-		ipv6_hdr(skb)->payload_len = htons(first_len -
-						   sizeof(struct ipv6hdr));
 
 		for (;;) {
 			/* Prepare header of the next frame,
 			 * before previous one went down. */
-			if (frag) {
-				frag->ip_summed = CHECKSUM_NONE;
-				skb_reset_transport_header(frag);
-				fh = __skb_push(frag, sizeof(struct frag_hdr));
-				__skb_push(frag, hlen);
-				skb_reset_network_header(frag);
-				memcpy(skb_network_header(frag), tmp_hdr,
-				       hlen);
-				offset += skb->len - hlen - sizeof(struct frag_hdr);
-				fh->nexthdr = nexthdr;
-				fh->reserved = 0;
-				fh->frag_off = htons(offset);
-				if (frag->next)
-					fh->frag_off |= htons(IP6_MF);
-				fh->identification = frag_id;
-				ipv6_hdr(frag)->payload_len =
-						htons(frag->len -
-						      sizeof(struct ipv6hdr));
-				ip6_copy_metadata(frag, skb);
-			}
+			if (iter.frag)
+				ip6_fraglist_prepare(skb, &iter);
 
 			err = output(net, sk, skb);
 			if (!err)
 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
-			if (err || !frag)
+			if (err || !iter.frag)
 				break;
 
-			skb = frag;
-			frag = skb->next;
-			skb_mark_not_on_list(skb);
+			skb = ip6_fraglist_next(&iter);
 		}
 
-		kfree(tmp_hdr);
+		kfree(iter.tmp_hdr);
 
 		if (err == 0) {
 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -753,7 +871,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			return 0;
 		}
 
-		kfree_skb_list(frag);
+		kfree_skb_list(iter.frag_list);
 
 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
@@ -770,91 +888,26 @@ slow_path_clean:
 	}
 
 slow_path:
-	left = skb->len - hlen;		/* Space per frame */
-	ptr = hlen;			/* Where to start from */
-
 	/*
 	 *	Fragment the datagram.
 	 */
 
-	troom = rt->dst.dev->needed_tailroom;
+	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+		      &state);
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
-	while (left > 0)	{
-		u8 *fragnexthdr_offset;
-
-		len = left;
-		/* IF: it doesn't fit, use 'mtu' - the data space left */
-		if (len > mtu)
-			len = mtu;
-		/* IF: we are not sending up to and including the packet end
-		   then align the next start on an eight byte boundary */
-		if (len < left)	{
-			len &= ~7;
-		}
 
-		/* Allocate buffer */
-		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
-				 hroom + troom, GFP_ATOMIC);
-		if (!frag) {
-			err = -ENOMEM;
+	while (state.left > 0) {
+		frag = ip6_frag_next(skb, &state);
+		if (IS_ERR(frag)) {
+			err = PTR_ERR(frag);
 			goto fail;
 		}
 
 		/*
-		 *	Set up data on packet
-		 */
-
-		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, hroom);
-		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
-		skb_reset_network_header(frag);
-		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
-		frag->transport_header = (frag->network_header + hlen +
-					  sizeof(struct frag_hdr));
-
-		/*
-		 *	Charge the memory for the fragment to any owner
-		 *	it might possess
-		 */
-		if (skb->sk)
-			skb_set_owner_w(frag, skb->sk);
-
-		/*
-		 *	Copy the packet header into the new buffer.
-		 */
-		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
-		fragnexthdr_offset = skb_network_header(frag);
-		fragnexthdr_offset += prevhdr - skb_network_header(skb);
-		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
-		/*
-		 *	Build fragment header.
-		 */
-		fh->nexthdr = nexthdr;
-		fh->reserved = 0;
-		fh->identification = frag_id;
-
-		/*
-		 *	Copy a block of the IP datagram.
-		 */
-		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
-				     len));
-		left -= len;
-
-		fh->frag_off = htons(offset);
-		if (left > 0)
-			fh->frag_off |= htons(IP6_MF);
-		ipv6_hdr(frag)->payload_len = htons(frag->len -
-						    sizeof(struct ipv6hdr));
-
-		ptr += len;
-		offset += len;
-
-		/*
 		 *	Put this fragment into the sending queue.
 		 */
 		err = output(net, sk, frag);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4c8e2ea8bf19..f874dde1ee85 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1293,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt) {
-		neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
-					 rt->fib6_nh.fib_nh_dev, NULL,
+		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+					 rt->fib6_nh->fib_nh_dev, NULL,
 					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
@@ -1323,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			return;
 		}
 
-		neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
-					 rt->fib6_nh.fib_nh_dev, NULL,
+		neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+					 rt->fib6_nh->fib_nh_dev, NULL,
 					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1240ccd57f39..9530cc280953 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -16,6 +16,9 @@
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include "../bridge/br_private.h"
 
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
@@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
 }
 EXPORT_SYMBOL_GPL(__nf_ip6_route);
 
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		    struct nf_ct_bridge_frag_data *data,
+		    int (*output)(struct net *, struct sock *sk,
+				  const struct nf_ct_bridge_frag_data *data,
+				  struct sk_buff *))
+{
+	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+	struct ip6_frag_state state;
+	u8 *prevhdr, nexthdr = 0;
+	unsigned int mtu, hlen;
+	int hroom, err = 0;
+	__be32 frag_id;
+
+	err = ip6_find_1stfragopt(skb, &prevhdr);
+	if (err < 0)
+		goto blackhole;
+	hlen = err;
+	nexthdr = *prevhdr;
+
+	mtu = skb->dev->mtu;
+	if (frag_max_size > mtu ||
+	    frag_max_size < IPV6_MIN_MTU)
+		goto blackhole;
+
+	mtu = frag_max_size;
+	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+		goto blackhole;
+	mtu -= hlen + sizeof(struct frag_hdr);
+
+	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+				    &ipv6_hdr(skb)->saddr);
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto blackhole;
+
+	hroom = LL_RESERVED_SPACE(skb->dev);
+	if (skb_has_frag_list(skb)) {
+		unsigned int first_len = skb_pagelen(skb);
+		struct ip6_fraglist_iter iter;
+		struct sk_buff *frag2;
+
+		if (first_len - hlen > mtu ||
+		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+			goto blackhole;
+
+		if (skb_cloned(skb))
+			goto slow_path;
+
+		skb_walk_frags(skb, frag2) {
+			if (frag2->len > mtu ||
+			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+				goto blackhole;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag2))
+				goto slow_path;
+		}
+
+		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+					&iter);
+		if (err < 0)
+			goto blackhole;
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down.
+			 */
+			if (iter.frag)
+				ip6_fraglist_prepare(skb, &iter);
+
+			err = output(net, sk, data, skb);
+			if (err || !iter.frag)
+				break;
+
+			skb = ip6_fraglist_next(&iter);
+		}
+
+		kfree(iter.tmp_hdr);
+		if (!err)
+			return 0;
+
+		kfree_skb_list(iter.frag_list);
+		return err;
+	}
+slow_path:
+	/* This is a linearized skbuff, the original geometry is lost for us.
+	 * This may also be a clone skbuff, we could preserve the geometry for
+	 * the copies but probably not worth the effort.
+	 */
+	ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
+		      LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
+		      &state);
+
+	while (state.left > 0) {
+		struct sk_buff *skb2;
+
+		skb2 = ip6_frag_next(skb, &state);
+		if (IS_ERR(skb2)) {
+			err = PTR_ERR(skb2);
+			goto blackhole;
+		}
+
+		err = output(net, sk, data, skb2);
+		if (err)
+			goto blackhole;
+	}
+	consume_skb(skb);
+	return err;
+
+blackhole:
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(br_ip6_fragment);
+
 static const struct nf_ipv6_ops ipv6ops = {
 #if IS_MODULE(CONFIG_IPV6)
 	.chk_addr		= ipv6_chk_addr,
@@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = {
 	.route_input		= ip6_route_input,
 	.fragment		= ip6_fragment,
 	.reroute		= nf_ip6_reroute,
+#if IS_MODULE(CONFIG_IPV6)
+	.br_defrag		= nf_ct_frag6_gather,
+	.br_fragment		= br_ip6_fragment,
+#endif
 };
 
 int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3de0e9b0a482..c5d59fa568d6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -58,26 +58,21 @@ static struct inet_frags nf_frags;
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &init_net.nf_frag.frags.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &init_net.nf_frag.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.nf_frag.frags.high_thresh
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &init_net.nf_frag.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.nf_frag.frags.low_thresh
 	},
 	{ }
 };
@@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 				GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
-
-		table[0].data = &net->nf_frag.frags.timeout;
-		table[1].data = &net->nf_frag.frags.low_thresh;
-		table[1].extra2 = &net->nf_frag.frags.high_thresh;
-		table[2].data = &net->nf_frag.frags.high_thresh;
-		table[2].extra1 = &net->nf_frag.frags.low_thresh;
-		table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
 	}
 
+	table[0].data	= &net->nf_frag.fqdir->timeout;
+	table[1].data	= &net->nf_frag.fqdir->low_thresh;
+	table[1].extra2	= &net->nf_frag.fqdir->high_thresh;
+	table[2].data	= &net->nf_frag.fqdir->high_thresh;
+	table[2].extra1	= &net->nf_frag.fqdir->low_thresh;
+	table[2].extra2	= &init_net.nf_frag.fqdir->high_thresh;
+
 	hdr = register_net_sysctl(net, "net/netfilter", table);
 	if (hdr == NULL)
 		goto err_reg;
@@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
-	struct net *net;
 
 	fq = container_of(frag, struct frag_queue, q);
-	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-	ip6frag_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
 }
 
 /* Creation primitives. */
@@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
 	};
 	struct inet_frag_queue *q;
 
-	q = inet_frag_find(&net->nf_frag.frags, &key);
+	q = inet_frag_find(net->nf_frag.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 	fq->ecn |= ecn;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net)
 {
 	int res;
 
-	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->nf_frag.frags.f = &nf_frags;
-
-	res = inet_frags_init_net(&net->nf_frag.frags);
+	res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
 	if (res < 0)
 		return res;
+
+	net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->nf_frag.frags);
+		fqdir_exit(net->nf_frag.fqdir);
 	return res;
 }
 
 static void nf_ct_net_exit(struct net *net)
 {
 	nf_ct_frags6_sysctl_unregister(net);
-	inet_frags_exit_net(&net->nf_frag.frags);
+	fqdir_exit(net->nf_frag.fqdir);
 }
 
 static struct pernet_operations nf_ct_net_ops = {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 2356b4af7309..0bbefc440bcd 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(net, &rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
-		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
-		   frag_mem_limit(&net->ipv6.frags));
+		   atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
+		   frag_mem_limit(net->ipv6.fqdir));
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1a832f5e190b..ff5b6d8de2c6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
-	struct net *net;
 
 	fq = container_of(frag, struct frag_queue, q);
-	net = container_of(fq->q.net, struct net, ipv6.frags);
 
-	ip6frag_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
 }
 
 static struct frag_queue *
@@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 					    IPV6_ADDR_LINKLOCAL)))
 		key.iif = 0;
 
-	q = inet_frag_find(&net->ipv6.frags, &key);
+	q = inet_frag_find(net->ipv6.fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
-	add_frag_mem_limit(fq->q.net, skb->truesize);
+	add_frag_mem_limit(fq->q.fqdir, skb->truesize);
 
 	fragsize = -skb_network_offset(skb) + skb->len;
 	if (fragsize > fq->q.max_size)
@@ -254,7 +252,7 @@ err:
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 			  struct sk_buff *prev_tail, struct net_device *dev)
 {
-	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+	struct net *net = fq->q.fqdir->net;
 	unsigned int nhoff;
 	void *reasm_data;
 	int payload_len;
@@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = {
 static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ip6frag_high_thresh",
-		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &init_net.ipv6.frags.low_thresh
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
-		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra2		= &init_net.ipv6.frags.high_thresh
 	},
 	{
 		.procname	= "ip6frag_time",
-		.data		= &init_net.ipv6.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 		if (!table)
 			goto err_alloc;
 
-		table[0].data = &net->ipv6.frags.high_thresh;
-		table[0].extra1 = &net->ipv6.frags.low_thresh;
-		table[1].data = &net->ipv6.frags.low_thresh;
-		table[1].extra2 = &net->ipv6.frags.high_thresh;
-		table[2].data = &net->ipv6.frags.timeout;
 	}
+	table[0].data	= &net->ipv6.fqdir->high_thresh;
+	table[0].extra1	= &net->ipv6.fqdir->low_thresh;
+	table[1].data	= &net->ipv6.fqdir->low_thresh;
+	table[1].extra2	= &net->ipv6.fqdir->high_thresh;
+	table[2].data	= &net->ipv6.fqdir->timeout;
 
 	hdr = register_net_sysctl(net, "net/ipv6", table);
 	if (!hdr)
@@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 {
 	int res;
 
-	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ipv6.frags.f = &ip6_frags;
-
-	res = inet_frags_init_net(&net->ipv6.frags);
+	res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
 	if (res < 0)
 		return res;
 
+	net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
 	res = ip6_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv6.frags);
+		fqdir_exit(net->ipv6.fqdir);
 	return res;
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
 {
 	ip6_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&net->ipv6.frags);
+	fqdir_exit(net->ipv6.fqdir);
 }
 
 static struct pernet_operations ip6_frags_ops = {
@@ -591,8 +583,8 @@ err_protocol:
 
 void ipv6_frag_exit(void)
 {
-	inet_frags_fini(&ip6_frags);
 	ip6_frags_sysctl_unregister();
 	unregister_pernet_subsys(&ip6_frags_ops);
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	inet_frags_fini(&ip6_frags);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 848e944f07df..fada5a13bcb2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -441,12 +441,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
 	if (!fl6->mp_hash)
 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
-	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 		goto out;
 
 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 				 fib6_siblings) {
-		const struct fib6_nh *nh = &sibling->fib6_nh;
+		const struct fib6_nh *nh = sibling->fib6_nh;
 		int nh_upper_bound;
 
 		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
@@ -460,7 +460,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
 
 out:
 	res->f6i = match;
-	res->nh = &match->fib6_nh;
+	res->nh = match->fib6_nh;
 }
 
 /*
@@ -496,13 +496,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
 	struct fib6_nh *nh;
 
 	if (!oif && ipv6_addr_any(saddr)) {
-		nh = &f6i->fib6_nh;
+		nh = f6i->fib6_nh;
 		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 			goto out;
 	}
 
 	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
-		nh = &spf6i->fib6_nh;
+		nh = spf6i->fib6_nh;
 		if (__rt6_device_match(net, nh, saddr, oif, flags)) {
 			res->f6i = spf6i;
 			goto out;
@@ -511,14 +511,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
 
 	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 		res->f6i = net->ipv6.fib6_null_entry;
-		nh = &res->f6i->fib6_nh;
+		nh = res->f6i->fib6_nh;
 		goto out;
 	}
 
-	nh = &f6i->fib6_nh;
+	nh = f6i->fib6_nh;
 	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 		res->f6i = net->ipv6.fib6_null_entry;
-		nh = &res->f6i->fib6_nh;
+		nh = res->f6i->fib6_nh;
 	}
 out:
 	res->nh = nh;
@@ -714,7 +714,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
 		if (fib6_check_expired(f6i))
 			continue;
 
-		nh = &f6i->fib6_nh;
+		nh = f6i->fib6_nh;
 		if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
 			res->f6i = f6i;
 			res->nh = nh;
@@ -796,7 +796,7 @@ static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 out:
 	if (!res->f6i) {
 		res->f6i = net->ipv6.fib6_null_entry;
-		res->nh = &res->f6i->fib6_nh;
+		res->nh = res->f6i->fib6_nh;
 		res->fib6_flags = res->f6i->fib6_flags;
 		res->fib6_type = res->f6i->fib6_type;
 	}
@@ -1270,7 +1270,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 {
 	struct rt6_info *pcpu_rt, **p;
 
-	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+	p = this_cpu_ptr(res->nh->rt6i_pcpu);
 	pcpu_rt = *p;
 
 	if (pcpu_rt)
@@ -1291,7 +1291,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
 	}
 
 	dst_hold(&pcpu_rt->dst);
-	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+	p = this_cpu_ptr(res->nh->rt6i_pcpu);
 	prev = cmpxchg(p, NULL, pcpu_rt);
 	BUG_ON(prev);
 
@@ -1461,25 +1461,74 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
 	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
 }
 
+#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+						       spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+
+	if (lock)
+		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+						   lockdep_is_held(lock));
+	else
+		bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+	/* remove bucket flushed bit if set */
+	if (bucket) {
+		unsigned long p = (unsigned long)bucket;
+
+		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+		bucket = (struct rt6_exception_bucket *)p;
+	}
+
+	return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+	unsigned long p = (unsigned long)bucket;
+
+	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+					      spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+	unsigned long p;
+
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					   lockdep_is_held(lock));
+
+	p = (unsigned long)bucket;
+	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+	bucket = (struct rt6_exception_bucket *)p;
+	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+}
+
 static int rt6_insert_exception(struct rt6_info *nrt,
 				const struct fib6_result *res)
 {
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
+	struct fib6_info *f6i = res->f6i;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *f6i = res->f6i;
+	struct fib6_nh *nh = res->nh;
 	int err = 0;
 
 	spin_lock_bh(&rt6_exception_lock);
 
-	if (f6i->exception_bucket_flushed) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					  lockdep_is_held(&rt6_exception_lock));
 	if (!bucket) {
 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
 				 GFP_ATOMIC);
@@ -1487,7 +1536,10 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 			err = -ENOMEM;
 			goto out;
 		}
-		rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
+		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+		err = -EINVAL;
+		goto out;
 	}
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1542,7 +1594,7 @@ out:
 	return err;
 }
 
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1550,25 +1602,33 @@ void rt6_flush_exceptions(struct fib6_info *rt)
 	int i;
 
 	spin_lock_bh(&rt6_exception_lock);
-	/* Prevent rt6_insert_exception() to recreate the bucket list */
-	rt->exception_bucket_flushed = 1;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		goto out;
 
+	/* Prevent rt6_insert_exception() to recreate the bucket list */
+	if (!from)
+		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
-			rt6_remove_exception(bucket, rt6_ex);
-		WARN_ON_ONCE(bucket->depth);
+		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+			if (!from ||
+			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
+				rt6_remove_exception(bucket, rt6_ex);
+		}
+		WARN_ON_ONCE(!from && bucket->depth);
 		bucket++;
 	}
-
 out:
 	spin_unlock_bh(&rt6_exception_lock);
 }
 
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+	fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+}
+
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
@@ -1597,7 +1657,7 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 		src_key = saddr;
 find_ex:
 #endif
-	bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
+	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
 
 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
@@ -1615,25 +1675,20 @@ find_ex:
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
+				    const struct rt6_info *rt)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *from;
 	int err;
 
-	from = rcu_dereference(rt->from);
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
-
-	if (!rcu_access_pointer(from->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return -ENOENT;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
 	 * and exception table is indexed by a hash of
@@ -1641,7 +1696,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1658,23 +1713,29 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	return err;
 }
 
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
-	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
-	struct rt6_exception *rt6_ex;
 	struct fib6_info *from;
 
-	rcu_read_lock();
 	from = rcu_dereference(rt->from);
 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
-		goto unlock;
+		return -EINVAL;
 
-	bucket = rcu_dereference(from->rt6i_exception_bucket);
+	return fib6_nh_remove_exception(from->fib6_nh,
+					from->fib6_src.plen, rt);
+}
 
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
+				     const struct rt6_info *rt)
+{
+	const struct in6_addr *src_key = NULL;
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+
+	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
 	 * and exception table is indexed by a hash of
@@ -1682,15 +1743,25 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
-	rt6_ex = __rt6_find_exception_rcu(&bucket,
-					  &rt->rt6i_dst.addr,
-					  src_key);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
 	if (rt6_ex)
 		rt6_ex->stamp = jiffies;
+}
 
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+
+	rcu_read_lock();
+
+	from = rcu_dereference(rt->from);
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
+		goto unlock;
+
+	fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
 unlock:
 	rcu_read_unlock();
 }
@@ -1718,15 +1789,13 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct fib6_info *rt, int mtu)
+				       const struct fib6_nh *nh, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	int i;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		return;
 
@@ -1748,21 +1817,19 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
-					struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+					    const struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				     lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1827,23 +1894,21 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct fib6_info *rt,
-			struct fib6_gc_args *gc_args,
-			unsigned long now)
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
+				   struct fib6_gc_args *gc_args,
+				   unsigned long now)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	rcu_read_lock_bh();
 	spin_lock(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1858,6 +1923,13 @@ void rt6_age_exceptions(struct fib6_info *rt,
 	rcu_read_unlock_bh();
 }
 
+void rt6_age_exceptions(struct fib6_info *f6i,
+			struct fib6_gc_args *gc_args,
+			unsigned long now)
+{
+	fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+}
+
 /* must be called with rcu lock held */
 int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
 		      struct flowi6 *fl6, struct fib6_result *res, int strict)
@@ -2384,7 +2456,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 			rcu_read_unlock();
 			return;
 		}
-		res.nh = &res.f6i->fib6_nh;
+		res.nh = res.f6i->fib6_nh;
 		res.fib6_flags = res.f6i->fib6_flags;
 		res.fib6_type = res.f6i->fib6_type;
 
@@ -2533,7 +2605,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
 		res.f6i = rt;
-		res.nh = &rt->fib6_nh;
+		res.nh = rt->fib6_nh;
 
 		if (fib6_check_expired(rt))
 			continue;
@@ -2557,7 +2629,7 @@ restart:
 	}
 
 	res.f6i = rt;
-	res.nh = &rt->fib6_nh;
+	res.nh = rt->fib6_nh;
 out:
 	if (ret) {
 		ip6_hold_safe(net, &ret);
@@ -3074,6 +3146,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 	    !netif_carrier_ok(dev))
 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 
+	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!fib6_nh->rt6i_pcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
 	if (err)
@@ -3098,6 +3176,38 @@ out:
 
 void fib6_nh_release(struct fib6_nh *fib6_nh)
 {
+	struct rt6_exception_bucket *bucket;
+
+	rcu_read_lock();
+
+	fib6_nh_flush_exceptions(fib6_nh, NULL);
+	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+	if (bucket) {
+		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+		kfree(bucket);
+	}
+
+	rcu_read_unlock();
+
+	if (fib6_nh->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+
+		free_percpu(fib6_nh->rt6i_pcpu);
+	}
+
 	fib_nh_common_release(&fib6_nh->nh_common);
 }
 
@@ -3160,7 +3270,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 
 	err = -ENOMEM;
-	rt = fib6_info_alloc(gfp_flags);
+	rt = fib6_info_alloc(gfp_flags, true);
 	if (!rt)
 		goto out;
 
@@ -3200,7 +3310,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
 	rt->fib6_src.plen = cfg->fc_src_len;
 #endif
-	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
+	err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
 	if (err)
 		goto out;
 
@@ -3208,7 +3318,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 	 * they would result in kernel looping; promote them to reject routes
 	 */
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
-	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
+	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type))
 		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
 
 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
@@ -3326,7 +3436,7 @@ out_put:
 	return err;
 }
 
-static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 {
 	int rc = -ESRCH;
 
@@ -3342,10 +3452,25 @@ out:
 	return rc;
 }
 
+static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
+			     struct fib6_nh *nh)
+{
+	struct fib6_result res = {
+		.f6i = rt,
+		.nh = nh,
+	};
+	struct rt6_info *rt_cache;
+
+	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
+	if (rt_cache)
+		return __ip6_del_cached_rt(rt_cache, cfg);
+
+	return 0;
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt_cache;
 	struct fib6_table *table;
 	struct fib6_info *rt;
 	struct fib6_node *fn;
@@ -3368,26 +3493,18 @@ static int ip6_route_del(struct fib6_config *cfg,
 		for_each_fib6_node_rt_rcu(fn) {
 			struct fib6_nh *nh;
 
+			nh = rt->fib6_nh;
 			if (cfg->fc_flags & RTF_CACHE) {
-				struct fib6_result res = {
-					.f6i = rt,
-				};
 				int rc;
 
-				rt_cache = rt6_find_cached_rt(&res,
-							      &cfg->fc_dst,
-							      &cfg->fc_src);
-				if (rt_cache) {
-					rc = ip6_del_cached_rt(rt_cache, cfg);
-					if (rc != -ESRCH) {
-						rcu_read_unlock();
-						return rc;
-					}
+				rc = ip6_del_cached_rt(cfg, rt, nh);
+				if (rc != -ESRCH) {
+					rcu_read_unlock();
+					return rc;
 				}
 				continue;
 			}
 
-			nh = &rt->fib6_nh;
 			if (cfg->fc_ifindex &&
 			    (!nh->fib_nh_dev ||
 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3509,7 +3626,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	if (!res.f6i)
 		goto out;
 
-	res.nh = &res.f6i->fib6_nh;
+	res.nh = res.f6i->fib6_nh;
 	res.fib6_flags = res.f6i->fib6_flags;
 	res.fib6_type = res.f6i->fib6_type;
 	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
@@ -3561,12 +3678,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
+		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
 			continue;
 		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
-		    !rt->fib6_nh.fib_nh_gw_family)
+		    !rt->fib6_nh->fib_nh_gw_family)
 			continue;
-		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
+		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
 			continue;
 		if (!fib6_info_hold_safe(rt))
 			continue;
@@ -3624,7 +3741,7 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
 
 	rcu_read_lock();
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		struct fib6_nh *nh = &rt->fib6_nh;
+		struct fib6_nh *nh = rt->fib6_nh;
 
 		if (dev == nh->fib_nh_dev &&
 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -3876,7 +3993,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
 
-	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
+	if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
 	    rt != net->ipv6.fib6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
@@ -3904,18 +4021,17 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
+	struct fib6_nh *nh = rt->fib6_nh;
 
 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
-	    rt->fib6_nh.fib_nh_gw_family &&
-	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
+	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
 		return -1;
-	}
 
 	/* Further clean up cached routes in exception table.
 	 * This is needed because cached route may have a different
 	 * gateway than its 'parent' in the case of an ip redirect.
 	 */
-	rt6_exceptions_clean_tohost(rt, gateway);
+	fib6_nh_exceptions_clean_tohost(nh, gateway);
 
 	return 0;
 }
@@ -3955,9 +4071,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 
 static bool rt6_is_dead(const struct fib6_info *rt)
 {
-	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
-	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
-	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
+	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
+	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
+	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
 		return true;
 
 	return false;
@@ -3969,11 +4085,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt)
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
-		total += rt->fib6_nh.fib_nh_weight;
+		total += rt->fib6_nh->fib_nh_weight;
 
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
 		if (!rt6_is_dead(iter))
-			total += iter->fib6_nh.fib_nh_weight;
+			total += iter->fib6_nh->fib_nh_weight;
 	}
 
 	return total;
@@ -3984,11 +4100,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
 	int upper_bound = -1;
 
 	if (!rt6_is_dead(rt)) {
-		*weight += rt->fib6_nh.fib_nh_weight;
+		*weight += rt->fib6_nh->fib_nh_weight;
 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
 						    total) - 1;
 	}
-	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
+	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
 }
 
 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -4032,8 +4148,8 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
 	struct net *net = dev_net(arg->dev);
 
 	if (rt != net->ipv6.fib6_null_entry &&
-	    rt->fib6_nh.fib_nh_dev == arg->dev) {
-		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
+	    rt->fib6_nh->fib_nh_dev == arg->dev) {
+		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
 		fib6_update_sernum_upto_root(net, rt);
 		rt6_multipath_rebalance(rt);
 	}
@@ -4061,10 +4177,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
 {
 	struct fib6_info *iter;
 
-	if (rt->fib6_nh.fib_nh_dev == dev)
+	if (rt->fib6_nh->fib_nh_dev == dev)
 		return true;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.fib_nh_dev == dev)
+		if (iter->fib6_nh->fib_nh_dev == dev)
 			return true;
 
 	return false;
@@ -4085,12 +4201,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
 	struct fib6_info *iter;
 	unsigned int dead = 0;
 
-	if (rt->fib6_nh.fib_nh_dev == down_dev ||
-	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh->fib_nh_dev == down_dev ||
+	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
 		dead++;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.fib_nh_dev == down_dev ||
-		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+		if (iter->fib6_nh->fib_nh_dev == down_dev ||
+		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
 			dead++;
 
 	return dead;
@@ -4102,11 +4218,11 @@ static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
 {
 	struct fib6_info *iter;
 
-	if (rt->fib6_nh.fib_nh_dev == dev)
-		rt->fib6_nh.fib_nh_flags |= nh_flags;
+	if (rt->fib6_nh->fib_nh_dev == dev)
+		rt->fib6_nh->fib_nh_flags |= nh_flags;
 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
-		if (iter->fib6_nh.fib_nh_dev == dev)
-			iter->fib6_nh.fib_nh_flags |= nh_flags;
+		if (iter->fib6_nh->fib_nh_dev == dev)
+			iter->fib6_nh->fib_nh_flags |= nh_flags;
 }
 
 /* called with write lock held for table with rt */
@@ -4121,12 +4237,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
 
 	switch (arg->event) {
 	case NETDEV_UNREGISTER:
-		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
 	case NETDEV_DOWN:
 		if (rt->should_flush)
 			return -1;
 		if (!rt->fib6_nsiblings)
-			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
 		if (rt6_multipath_uses_dev(rt, dev)) {
 			unsigned int count;
 
@@ -4142,10 +4258,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
 		}
 		return -2;
 	case NETDEV_CHANGE:
-		if (rt->fib6_nh.fib_nh_dev != dev ||
+		if (rt->fib6_nh->fib_nh_dev != dev ||
 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
 			break;
-		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
+		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 		rt6_multipath_rebalance(rt);
 		break;
 	}
@@ -4179,9 +4295,36 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event)
 struct rt6_mtu_change_arg {
 	struct net_device *dev;
 	unsigned int mtu;
+	struct fib6_info *f6i;
 };
 
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
+{
+	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+	struct fib6_info *f6i = arg->f6i;
+
+	/* For administrative MTU increase, there is no way to discover
+	 * IPv6 PMTU increase, so PMTU increase should be updated here.
+	 * Since RFC 1981 doesn't include administrative MTU increase
+	 * update PMTU increase is a MUST. (i.e. jumbo frame)
+	 */
+	if (nh->fib_nh_dev == arg->dev) {
+		struct inet6_dev *idev = __in6_dev_get(arg->dev);
+		u32 mtu = f6i->fib6_pmtu;
+
+		if (mtu >= arg->mtu ||
+		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+		spin_lock_bh(&rt6_exception_lock);
+		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
+		spin_unlock_bh(&rt6_exception_lock);
+	}
+
+	return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -4196,24 +4339,11 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 	if (!idev)
 		return 0;
 
-	/* For administrative MTU increase, there is no way to discover
-	   IPv6 PMTU increase, so PMTU increase should be updated here.
-	   Since RFC 1981 doesn't include administrative MTU increase
-	   update PMTU increase is a MUST. (i.e. jumbo frame)
-	 */
-	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
-	    !fib6_metric_locked(rt, RTAX_MTU)) {
-		u32 mtu = rt->fib6_pmtu;
-
-		if (mtu >= arg->mtu ||
-		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
-			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+	if (fib6_metric_locked(f6i, RTAX_MTU))
+		return 0;
 
-		spin_lock_bh(&rt6_exception_lock);
-		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
-		spin_unlock_bh(&rt6_exception_lock);
-	}
-	return 0;
+	arg->f6i = f6i;
+	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
 }
 
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
@@ -4227,6 +4357,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
 }
 
 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
 	[RTA_OIF]               = { .type = NLA_U32 },
@@ -4492,7 +4623,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 			goto cleanup;
 		}
 
-		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
+		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
 
 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
 					    rt, &r_cfg);
@@ -4659,7 +4790,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
 			    + NLA_ALIGN(sizeof(struct rtnexthop))
 			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
+			    + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
 
 		nexthop_len *= rt->fib6_nsiblings;
 	}
@@ -4677,7 +4808,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
 	       + nla_total_size(1) /* RTA_PREF */
-	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+	       + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
 	       + nexthop_len;
 }
 
@@ -4797,14 +4928,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 		if (!mp)
 			goto nla_put_failure;
 
-		if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
-				    rt->fib6_nh.fib_nh_weight) < 0)
+		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
+				    rt->fib6_nh->fib_nh_weight) < 0)
 			goto nla_put_failure;
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->fib6_siblings, fib6_siblings) {
-			if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
-					    sibling->fib6_nh.fib_nh_weight) < 0)
+			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
+					    sibling->fib6_nh->fib_nh_weight) < 0)
 				goto nla_put_failure;
 		}
 
@@ -4812,7 +4943,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	} else {
 		unsigned char nh_flags = 0;
 
-		if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
 				     &nh_flags, false) < 0)
 			goto nla_put_failure;
 
@@ -4842,7 +4973,7 @@ nla_put_failure:
 static bool fib6_info_uses_dev(const struct fib6_info *f6i,
 			       const struct net_device *dev)
 {
-	if (f6i->fib6_nh.fib_nh_dev == dev)
+	if (f6i->fib6_nh->fib_nh_dev == dev)
 		return true;
 
 	if (f6i->fib6_nsiblings) {
@@ -4850,7 +4981,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &f6i->fib6_siblings, fib6_siblings) {
-			if (sibling->fib6_nh.fib_nh_dev == dev)
+			if (sibling->fib6_nh->fib_nh_dev == dev)
 				return true;
 		}
 	}
@@ -5129,6 +5260,38 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
 }
 
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+		    struct nl_info *info)
+{
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	/* call_fib6_entry_notifiers will be removed when in-kernel notifier
+	 * is implemented and supported for nexthop objects
+	 */
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+	if (!skb)
+		goto errout;
+
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+		    info->nlh, gfp_any());
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
 static int ip6_route_dev_notify(struct notifier_block *this,
 				unsigned long event, void *ptr)
 {
@@ -5139,7 +5302,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
-		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
+		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
 		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5333,11 +5496,11 @@ static int __net_init ip6_route_net_init(struct net *net)
 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
 		goto out_ip6_dst_ops;
 
-	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
-					    sizeof(*net->ipv6.fib6_null_entry),
-					    GFP_KERNEL);
+	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
 	if (!net->ipv6.fib6_null_entry)
 		goto out_ip6_dst_entries;
+	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
+	       sizeof(*net->ipv6.fib6_null_entry));
 
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
@@ -5474,7 +5637,7 @@ void __init ip6_route_init_special_entries(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
+	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 37bb530d848f..a0560d175a7f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
 #include <net/netfilter/nf_log.h>
 
 #include <linux/ip.h>
@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
 };
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
 
-static unsigned int nf_confirm(struct sk_buff *skb,
-			       unsigned int protoff,
-			       struct nf_conn *ct,
-			       enum ip_conntrack_info ctinfo)
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	const struct nf_conn_help *help;
 
@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb,
 	/* We've seen it coming out the other side: confirm it */
 	return nf_conntrack_confirm(skb);
 }
+EXPORT_SYMBOL_GPL(nf_confirm);
 
 static unsigned int ipv4_confirm(void *priv,
 				 struct sk_buff *skb,
@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
 	return 0;
 }
 
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 {
 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
-	bool fixup_needed = false;
+	bool fixup_needed = false, retry = true;
 	int err = 0;
-
+retry:
 	mutex_lock(&nf_ct_proto_mutex);
 
 	switch (nfproto) {
@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 			fixup_needed = true;
 		break;
 #endif
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info) {
+			if (!retry) {
+				err = -EPROTO;
+				goto out_unlock;
+			}
+			mutex_unlock(&nf_ct_proto_mutex);
+			request_module("nf_conntrack_bridge");
+			retry = false;
+			goto retry;
+		}
+		if (!try_module_get(nf_ct_bridge_info->me)) {
+			err = -EPROTO;
+			goto out_unlock;
+		}
+		cnet->users_bridge++;
+		if (cnet->users_bridge > 1)
+			goto out_unlock;
+
+		err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+					    nf_ct_bridge_info->ops_size);
+		if (err)
+			cnet->users_bridge = 0;
+		else
+			fixup_needed = true;
+		break;
 	default:
 		err = -EPROTO;
 		break;
@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
 						ARRAY_SIZE(ipv6_conntrack_ops));
 		break;
 #endif
+	case NFPROTO_BRIDGE:
+		if (!nf_ct_bridge_info)
+			break;
+		if (cnet->users_bridge && (--cnet->users_bridge == 0))
+			nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+						nf_ct_bridge_info->ops_size);
+
+		module_put(nf_ct_bridge_info->me);
+		break;
 	}
-
 	mutex_unlock(&nf_ct_proto_mutex);
 }
 
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_inet_get(struct net *net)
 {
 	int err;
 
-	if (nfproto == NFPROTO_INET) {
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
-		if (err < 0)
-			goto err1;
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
-		if (err < 0)
-			goto err2;
-	} else {
-		err = nf_ct_netns_do_get(net, nfproto);
-		if (err < 0)
-			goto err1;
-	}
-	return 0;
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+	if (err < 0)
+		goto err1;
+	err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+	if (err < 0)
+		goto err2;
 
+	return err;
 err2:
 	nf_ct_netns_put(net, NFPROTO_IPV4);
 err1:
 	return err;
 }
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+	int err;
+
+	switch (nfproto) {
+	case NFPROTO_INET:
+		err = nf_ct_netns_inet_get(net);
+		break;
+	case NFPROTO_BRIDGE:
+		err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
+		if (err < 0)
+			return err;
+
+		err = nf_ct_netns_inet_get(net);
+		if (err < 0) {
+			nf_ct_netns_put(net, NFPROTO_BRIDGE);
+			return err;
+		}
+		break;
+	default:
+		err = nf_ct_netns_do_get(net, nfproto);
+		break;
+	}
+	return err;
+}
 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
 
 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
 {
-	if (nfproto == NFPROTO_INET) {
+	switch (nfproto) {
+	case NFPROTO_BRIDGE:
+		nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+		/* fall through */
+	case NFPROTO_INET:
 		nf_ct_netns_do_put(net, NFPROTO_IPV4);
 		nf_ct_netns_do_put(net, NFPROTO_IPV6);
-	} else {
+		break;
+	default:
 		nf_ct_netns_do_put(net, nfproto);
+		break;
 	}
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = info;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+	WARN_ON(!nf_ct_bridge_info);
+	mutex_lock(&nf_ct_proto_mutex);
+	nf_ct_bridge_info = NULL;
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
+
 int nf_conntrack_proto_init(void)
 {
 	int ret;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2c72d95c3050..d104f7ee26c7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -877,6 +877,23 @@ config NET_ACT_CONNMARK
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_CTINFO
+        tristate "Netfilter Connection Mark Actions"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+        help
+	  Say Y here to allow transfer of a connmark stored information.
+	  Current actions transfer connmark stored DSCP into
+	  ipv4/v6 diffserv and/or to transfer connmark to packet
+	  mark.  Both are useful for restoring egress based marks
+	  back onto ingress connections for qdisc priority mapping
+	  purposes.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_ctinfo.
+
 config NET_ACT_SKBMOD
         tristate "skb data modification action"
         depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8a40431d7b5c..d54bfcbd7981 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_CTINFO)	+= act_ctinfo.o
 obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
new file mode 100644
index 000000000000..e78b60e47c0f
--- /dev/null
+++ b/net/sched/act_ctinfo.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* net/sched/act_ctinfo.c  netfilter ctinfo connmark actions
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <uapi/linux/tc_act/tc_ctinfo.h>
+#include <net/tc_act/tc_ctinfo.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static struct tc_action_ops act_ctinfo_ops;
+static unsigned int ctinfo_net_id;
+
+static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				struct tcf_ctinfo_params *cp,
+				struct sk_buff *skb, int wlen, int proto)
+{
+	u8 dscp, newdscp;
+
+	newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+		     ~INET_ECN_MASK;
+
+	switch (proto) {
+	case NFPROTO_IPV4:
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv4_change_dsfield(ip_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	case NFPROTO_IPV6:
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv6_change_dsfield(ipv6_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				  struct tcf_ctinfo_params *cp,
+				  struct sk_buff *skb)
+{
+	ca->stats_cpmark_set++;
+	skb->mark = ct->mark & cp->cpmarkmask;
+}
+
+static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash = NULL;
+	struct tcf_ctinfo *ca = to_ctinfo(a);
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_ctinfo_params *cp;
+	struct nf_conn *ct;
+	int proto, wlen;
+	int action;
+
+	cp = rcu_dereference_bh(ca->params);
+
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+	action = READ_ONCE(ca->tcf_action);
+
+	wlen = skb_network_offset(skb);
+	if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+		wlen += sizeof(struct iphdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+		wlen += sizeof(struct ipv6hdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct) { /* look harder, usually ingress */
+		if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       proto, cp->net, &tuple))
+			goto out;
+		zone.id = cp->zone;
+		zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+		thash = nf_conntrack_find_get(cp->net, &zone, &tuple);
+		if (!thash)
+			goto out;
+
+		ct = nf_ct_tuplehash_to_ctrack(thash);
+	}
+
+	if (cp->mode & CTINFO_MODE_DSCP)
+		if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+			tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
+
+	if (cp->mode & CTINFO_MODE_CPMARK)
+		tcf_ctinfo_cpmark_set(ct, ca, cp, skb);
+
+	if (thash)
+		nf_ct_put(ct);
+out:
+	return action;
+}
+
+static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
+	[TCA_CTINFO_ACT]		  = { .len = sizeof(struct
+							    tc_ctinfo) },
+	[TCA_CTINFO_ZONE]		  = { .type = NLA_U16 },
+	[TCA_CTINFO_PARMS_DSCP_MASK]	  = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_CPMARK_MASK]	  = { .type = NLA_U32 },
+};
+
+static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
+			   struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+	struct nlattr *tb[TCA_CTINFO_MAX + 1];
+	struct tcf_ctinfo_params *cp_new;
+	struct tcf_chain *goto_ch = NULL;
+	u32 dscpmask = 0, dscpstatemask;
+	struct tc_ctinfo *actparm;
+	struct tcf_ctinfo *ci;
+	u8 dscpmaskshift;
+	int ret = 0, err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_CTINFO_ACT])
+		return -EINVAL;
+	actparm = nla_data(tb[TCA_CTINFO_ACT]);
+
+	/* do some basic validation here before dynamically allocating things */
+	/* that we would otherwise have to clean up.			      */
+	if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+		dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]);
+		/* need contiguous 6 bit mask */
+		dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0;
+		if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f)
+			return -EINVAL;
+		dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
+			nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+		/* mask & statemask must not overlap */
+		if (dscpmask & dscpstatemask)
+			return -EINVAL;
+	}
+
+	/* done the validation:now to the actual action allocation */
+	err = tcf_idr_check_alloc(tn, &actparm->index, a, bind);
+	if (!err) {
+		ret = tcf_idr_create(tn, actparm->index, est, a,
+				     &act_ctinfo_ops, bind, false);
+		if (ret) {
+			tcf_idr_cleanup(tn, actparm->index);
+			return ret;
+		}
+	} else if (err > 0) {
+		if (bind) /* don't override defaults */
+			return 0;
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
+			return -EEXIST;
+		}
+	} else {
+		return err;
+	}
+
+	err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
+	ci = to_ctinfo(*a);
+
+	cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL);
+	if (unlikely(!cp_new)) {
+		err = -ENOMEM;
+		goto put_chain;
+	}
+
+	cp_new->net = net;
+	cp_new->zone = tb[TCA_CTINFO_ZONE] ?
+			nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+	if (dscpmask) {
+		cp_new->dscpmask = dscpmask;
+		cp_new->dscpmaskshift = dscpmaskshift;
+		cp_new->dscpstatemask = dscpstatemask;
+		cp_new->mode |= CTINFO_MODE_DSCP;
+	}
+
+	if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
+		cp_new->cpmarkmask =
+				nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
+		cp_new->mode |= CTINFO_MODE_CPMARK;
+	}
+
+	spin_lock_bh(&ci->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
+	rcu_swap_protected(ci->params, cp_new,
+			   lockdep_is_held(&ci->tcf_lock));
+	spin_unlock_bh(&ci->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (cp_new)
+		kfree_rcu(cp_new, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+
+	return ret;
+
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_ctinfo *ci = to_ctinfo(a);
+	struct tc_ctinfo opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+	};
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ctinfo_params *cp;
+	struct tcf_t t;
+
+	spin_lock_bh(&ci->tcf_lock);
+	cp = rcu_dereference_protected(ci->params,
+				       lockdep_is_held(&ci->tcf_lock));
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	opt.action = ci->tcf_action;
+	if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone))
+		goto nla_put_failure;
+
+	if (cp->mode & CTINFO_MODE_DSCP) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK,
+				cp->dscpmask))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK,
+				cp->dscpstatemask))
+			goto nla_put_failure;
+	}
+
+	if (cp->mode & CTINFO_MODE_CPMARK) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK,
+				cp->cpmarkmask))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
+			      ci->stats_dscp_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
+			      ci->stats_dscp_error, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
+			      ci->stats_cpmark_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	spin_unlock_bh(&ci->tcf_lock);
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_ctinfo_ops = {
+	.kind	= "ctinfo",
+	.id	= TCA_ID_CTINFO,
+	.owner	= THIS_MODULE,
+	.act	= tcf_ctinfo_act,
+	.dump	= tcf_ctinfo_dump,
+	.init	= tcf_ctinfo_init,
+	.walk	= tcf_ctinfo_walker,
+	.lookup	= tcf_ctinfo_search,
+	.size	= sizeof(struct tcf_ctinfo),
+};
+
+static __net_init int ctinfo_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tc_action_net_init(tn, &act_ctinfo_ops);
+}
+
+static void __net_exit ctinfo_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, ctinfo_net_id);
+}
+
+static struct pernet_operations ctinfo_net_ops = {
+	.init		= ctinfo_init_net,
+	.exit_batch	= ctinfo_exit_net,
+	.id		= &ctinfo_net_id,
+	.size		= sizeof(struct tc_action_net),
+};
+
+static int __init ctinfo_init_module(void)
+{
+	return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+static void __exit ctinfo_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+module_init(ctinfo_init_module);
+module_exit(ctinfo_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("Connection tracking mark actions");
+MODULE_LICENSE("GPL");
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index edfcf16e704c..dac46dfadab5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -103,11 +103,6 @@ static const struct net_offload sctp6_offload = {
 	},
 };
 
-static const struct skb_checksum_ops crc32c_csum_ops = {
-	.update  = sctp_csum_update,
-	.combine = sctp_csum_combine,
-};
-
 int __init sctp_offload_init(void)
 {
 	int ret;
@@ -120,7 +115,7 @@ int __init sctp_offload_init(void)
 	if (ret)
 		goto ipv4;
 
-	crc32c_csum_stub = &crc32c_csum_ops;
+	crc32c_csum_stub = &sctp_csum_ops;
 	return ret;
 
 ipv4:
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c51a707260fa..9ff64f9df1f3 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -5,9 +5,11 @@
 #include <linux/unix_diag.h>
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/uidgid.h>
 #include <net/netlink.h>
 #include <net/af_unix.h>
 #include <net/tcp_states.h>
+#include <net/sock.h>
 
 static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
 {
@@ -111,6 +113,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 	return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
 }
 
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+{
+	uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+	return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 portid, u32 seq, u32 flags, int sk_ino)
 {
@@ -157,6 +165,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
 		goto out_nlmsg_trim;
 
+	if ((req->udiag_show & UDIAG_SHOW_UID) &&
+	    sk_diag_dump_uid(sk, skb))
+		goto out_nlmsg_trim;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 982a8dc49e03..063073fd4952 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -23,14 +23,14 @@
 #include <net/sock.h>
 #include <net/af_vsock.h>
 
-/* The host side's design of the feature requires 6 exact 4KB pages for
- * recv/send rings respectively -- this is suboptimal considering memory
- * consumption, however unluckily we have to live with it, before the
- * host comes up with a better design in the future.
+/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
+ * hosts don't have this limitation; but, keep the defaults the same for compat.
  */
 #define PAGE_SIZE_4K		4096
 #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
 #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
 
 /* The MTU is 16KB per the host side's design */
 #define HVS_MTU_SIZE		(1024 * 16)
@@ -55,8 +55,9 @@ struct hvs_recv_buf {
 };
 
 /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
- * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
- * buffer, because tests show there is no significant performance difference.
+ * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
+ * guest and the host processing as one VMBUS packet is the smallest processing
+ * unit.
  *
  * Note: the buffer can be eliminated in the future when we add new VMBus
  * ringbuffer APIs that allow us to directly copy data from userspace buffer
@@ -344,9 +345,12 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 
 	struct sockaddr_vm addr;
 	struct sock *sk, *new = NULL;
-	struct vsock_sock *vnew;
-	struct hvsock *hvs, *hvs_new;
+	struct vsock_sock *vnew = NULL;
+	struct hvsock *hvs = NULL;
+	struct hvsock *hvs_new = NULL;
+	int rcvbuf;
 	int ret;
+	int sndbuf;
 
 	if_type = &chan->offermsg.offer.if_type;
 	if_instance = &chan->offermsg.offer.if_instance;
@@ -388,9 +392,34 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 	}
 
 	set_channel_read_mode(chan, HV_CALL_DIRECT);
-	ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
-			 RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
-			 hvs_channel_cb, conn_from_host ? new : sk);
+
+	/* Use the socket buffer sizes as hints for the VMBUS ring size. For
+	 * server side sockets, 'sk' is the parent socket and thus, this will
+	 * allow the child sockets to inherit the size from the parent. Keep
+	 * the mins to the default value and align to page size as per VMBUS
+	 * requirements.
+	 * For the max, the socket core library will limit the socket buffer
+	 * size that can be set by the user, but, since currently, the hv_sock
+	 * VMBUS ring buffer is physically contiguous allocation, restrict it
+	 * further.
+	 * Older versions of hv_sock host side code cannot handle bigger VMBUS
+	 * ring buffer size. Use the version number to limit the change to newer
+	 * versions.
+	 */
+	if (vmbus_proto_version < VERSION_WIN10_V5) {
+		sndbuf = RINGBUFFER_HVS_SND_SIZE;
+		rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
+	} else {
+		sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
+		sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
+		sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+		rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
+		rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
+		rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+	}
+
+	ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
+			 conn_from_host ? new : sk);
 	if (ret != 0) {
 		if (conn_from_host) {
 			hvs_new->chan = NULL;
@@ -441,6 +470,7 @@ static u32 hvs_get_local_cid(void)
 static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 {
 	struct hvsock *hvs;
+	struct sock *sk = sk_vsock(vsk);
 
 	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
 	if (!hvs)
@@ -448,7 +478,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 
 	vsk->trans = hvs;
 	hvs->vsk = vsk;
-
+	sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
+	sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
 	return 0;
 }
 
@@ -644,7 +675,9 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 	struct hvsock *hvs = vsk->trans;
 	struct vmbus_channel *chan = hvs->chan;
 	struct hvs_send_buf *send_buf;
-	ssize_t to_write, max_writable, ret;
+	ssize_t to_write, max_writable;
+	ssize_t ret = 0;
+	ssize_t bytes_written = 0;
 
 	BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
 
@@ -652,20 +685,34 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 	if (!send_buf)
 		return -ENOMEM;
 
-	max_writable = hvs_channel_writable_bytes(chan);
-	to_write = min_t(ssize_t, len, max_writable);
-	to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
-
-	ret = memcpy_from_msg(send_buf->data, msg, to_write);
-	if (ret < 0)
-		goto out;
+	/* Reader(s) could be draining data from the channel as we write.
+	 * Maximize bandwidth, by iterating until the channel is found to be
+	 * full.
+	 */
+	while (len) {
+		max_writable = hvs_channel_writable_bytes(chan);
+		if (!max_writable)
+			break;
+		to_write = min_t(ssize_t, len, max_writable);
+		to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+		/* memcpy_from_msg is safe for loop as it advances the offsets
+		 * within the message iterator.
+		 */
+		ret = memcpy_from_msg(send_buf->data, msg, to_write);
+		if (ret < 0)
+			goto out;
 
-	ret = hvs_send_data(hvs->chan, send_buf, to_write);
-	if (ret < 0)
-		goto out;
+		ret = hvs_send_data(hvs->chan, send_buf, to_write);
+		if (ret < 0)
+			goto out;
 
-	ret = to_write;
+		bytes_written += to_write;
+		len -= to_write;
+	}
 out:
+	/* If any data has been sent, return that */
+	if (bytes_written)
+		ret = bytes_written;
 	kfree(send_buf);
 	return ret;
 }
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 9cec81209617..2c75d823d8e2 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -83,6 +83,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_NEWCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_DELCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_GETCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_NEWNEXTHOP,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELNEXTHOP,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETNEXTHOP,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -166,7 +169,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 		 * structures at the top of this file with the new mappings
 		 * before updating the BUILD_BUG_ON() macro!
 		 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOP + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6f81130605d7..4ce0bc1612f5 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,3 +17,7 @@ tcp_inq
 tls
 txring_overwrite
 ip_defrag
+so_txtime
+flowlabel
+flowlabel_mgr
+tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1e6d14d2825c..9a275d932fd5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -9,12 +9,15 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 474040448601..89f84b5118bf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -25,3 +25,5 @@ CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_ETF=m
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 864f865eee55..c287b90b8af8 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -4,6 +4,7 @@
 # IPv4 and IPv6 onlink tests
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+VERBOSE=0
 
 # Network interfaces
 # - odd in current namespace; even in peer ns
@@ -91,10 +92,10 @@ log_test()
 
 	if [ ${rc} -eq ${expected} ]; then
 		nsuccess=$((nsuccess+1))
-		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+		printf "    TEST: %-50s  [ OK ]\n" "${msg}"
 	else
 		nfail=$((nfail+1))
-		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		printf "    TEST: %-50s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 			echo
 			echo "hit enter to continue, 'q' to quit"
@@ -121,9 +122,23 @@ log_subsection()
 
 run_cmd()
 {
-	echo
-	echo "COMMAND: $*"
-	eval $*
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
 }
 
 get_linklocal()
@@ -451,11 +466,34 @@ run_onlink_tests()
 }
 
 ################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -p          Pause on fail
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
 # main
 
 nsuccess=0
 nfail=0
 
+while getopts :t:pPhv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 cleanup
 setup
 run_onlink_tests
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
new file mode 100755
index 000000000000..76a7c4472dc3
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -0,0 +1,485 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# redirect test
+#
+#                     .253 +----+
+#                     +----| r1 |
+#                     |    +----+
+# +----+              |       |.1
+# | h1 |--------------+       |   10.1.1.0/30 2001:db8:1::0/126
+# +----+ .1           |       |.2
+#         172.16.1/24 |    +----+                   +----+
+#    2001:db8:16:1/64 +----| r2 |-------------------| h2 |
+#                     .254 +----+ .254           .2 +----+
+#                                    172.16.2/24
+#                                  2001:db8:16:2/64
+#
+# Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2.
+# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
+# from r1 to h1 telling h1 to use r2 when talking to h2.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+R1_R2_N1_IP=10.1.1.1
+R2_R1_N1_IP=10.1.1.2
+
+R1_R2_N1_IP6=2001:db8:1::1
+R2_R1_N1_IP6=2001:db8:1::2
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+H2_N2_IP=172.16.2.2
+R2_N2_IP=172.16.2.254
+H2_N2_IP6=2001:db8:16:2::2
+R2_N2_IP6=2001:db8:16:2::254
+
+VRF=red
+VRF_TABLE=1111
+
+################################################################################
+# helpers
+
+log_section()
+{
+	echo
+	echo "###########################################################################"
+	echo "$*"
+	echo "###########################################################################"
+	echo
+}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_debug()
+{
+	if [ "$VERBOSE" = "1" ]; then
+		echo "$*"
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+get_linklocal()
+{
+	local ns=$1
+	local dev=$2
+	local addr
+
+	addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+	local ns
+
+	for ns in h1 h2 r1 r2; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+create_vrf()
+{
+	local ns=$1
+
+	ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE}
+	ip -netns ${ns} link set ${VRF} up
+	ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192
+	ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192
+
+	ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF}
+	ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad
+
+	ip -netns ${ns} ru del pref 0
+	ip -netns ${ns} ru add pref 32765 from all lookup local
+	ip -netns ${ns} -6 ru del pref 0
+	ip -netns ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+setup()
+{
+	local ns
+
+	#
+	# create nodes as namespaces
+	#
+	for ns in h1 h2 r1 r2; do
+		ip netns add $ns
+		ip -netns $ns li set lo up
+
+		case "${ns}" in
+		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+			;;
+		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+		esac
+	done
+
+	#
+	# create interconnects
+	#
+	ip -netns h1 li add eth0 type veth peer name r1h1
+	ip -netns h1 li set r1h1 netns r1 name eth0 up
+
+	ip -netns h1 li add eth1 type veth peer name r2h1
+	ip -netns h1 li set r2h1 netns r2 name eth0 up
+
+	ip -netns h2 li add eth0 type veth peer name r2h2
+	ip -netns h2 li set eth0 up
+	ip -netns h2 li set r2h2 netns r2 name eth2 up
+
+	ip -netns r1 li add eth1 type veth peer name r2r1
+	ip -netns r1 li set eth1 up
+	ip -netns r1 li set r2r1 netns r2 name eth1 up
+
+	#
+	# h1
+	#
+	if [ "${WITH_VRF}" = "yes" ]; then
+		create_vrf "h1"
+		H1_VRF_ARG="vrf ${VRF}"
+		H1_PING_ARG="-I ${VRF}"
+	else
+		H1_VRF_ARG=
+		H1_PING_ARG=
+	fi
+	ip -netns h1 li add br0 type bridge
+	if [ "${WITH_VRF}" = "yes" ]; then
+		ip -netns h1 li set br0 vrf ${VRF} up
+	else
+		ip -netns h1 li set br0 up
+	fi
+	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns h1 li set eth0 master br0 up
+	ip -netns h1 li set eth1 master br0 up
+
+	#
+	# h2
+	#
+	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
+	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+
+	#
+	# r1
+	#
+	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+	ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+
+	#
+	# r2
+	#
+	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+	ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+	ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
+	ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+
+	sleep 2
+
+	R1_LLADDR=$(get_linklocal r1 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r1's eth0"
+		exit 1
+	fi
+	log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
+
+	R2_LLADDR=$(get_linklocal r2 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r2's eth0"
+		exit 1
+	fi
+	log_debug "initial gateway is R2's lladdr = ${R2_LLADDR}"
+}
+
+change_h2_mtu()
+{
+	local mtu=$1
+
+	run_cmd ip -netns h2 li set eth0 mtu ${mtu}
+	run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+}
+
+check_exception()
+{
+	local mtu="$1"
+	local with_redirect="$2"
+	local desc="$3"
+
+	# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Commands to check for exception:"
+		run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+		run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+	fi
+
+	if [ -n "${mtu}" ]; then
+		mtu=" mtu ${mtu}"
+	fi
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache <redirected> expires [0-9]*sec${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache expires [0-9]*sec${mtu}"
+	else
+		# want to verify that neither mtu nor redirected appears in
+		# the route get output. The -v will wipe out the cache line
+		# if either are set so the last grep -q will not find a match
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -E -v 'mtu|redirected' | grep -q "cache"
+	fi
+	log_test $? 0 "IPv4: ${desc}"
+
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${mtu}"
+	else
+		# IPv6 is a bit harder. First strip out the match if it
+		# contains an mtu exception and then look for the first
+		# gateway - R1's lladdr
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -v "mtu" | grep -q "${R1_LLADDR}"
+	fi
+	log_test $? 0 "IPv6: ${desc}"
+}
+
+run_ping()
+{
+	local sz=$1
+
+	run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+}
+
+replace_route_legacy()
+{
+	# r1 to h2 via r2 and eth0
+	run_cmd ip -netns r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
+	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+}
+
+reset_route_legacy()
+{
+	run_cmd ip -netns r1    ro del ${H2_N2}
+	run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+
+	run_cmd ip -netns h1    ro del ${H1_VRF_ARG} ${H2_N2}
+	run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+
+	initial_route_legacy
+}
+
+initial_route_legacy()
+{
+	# r1 to h2 via r2 and eth1
+	run_cmd ip -netns r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
+	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+
+	# h1 to h2 via r1
+	# - IPv6 redirect only works if gateway is the LLA
+	run_cmd ip -netns h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+}
+
+check_connectivity()
+{
+	local rc
+
+	run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+	rc=$?
+	run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+	[ $? -ne 0 ] && rc=$?
+
+	return $rc
+}
+
+do_test()
+{
+	local ttype="$1"
+
+	eval initial_route_${ttype}
+
+	# verify connectivity
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken"
+		ret=1
+		return
+	fi
+
+	# redirect exception followed by mtu
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "" "yes" "redirect exception"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "yes" "redirect exception plus mtu"
+
+	# remove exceptions and restore routing
+	change_h2_mtu 1500
+	eval reset_route_${ttype}
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after reset"
+		ret=1
+		return
+	fi
+	check_exception "" "no" "routing reset"
+
+	# MTU exception followed by redirect
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "no" "mtu exception"
+
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "1300" "yes" "mtu exception plus redirect"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+	-p          Pause on fail
+	-v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :pv o
+do
+	case $o in
+                p) PAUSE_ON_FAIL=yes;;
+                v) VERBOSE=$(($VERBOSE + 1));;
+                *) usage; exit 1;;
+	esac
+done
+
+trap cleanup EXIT
+
+cleanup
+WITH_VRF=no
+setup
+
+log_section "Legacy routing"
+do_test "legacy"
+
+cleanup
+log_section "Legacy routing with VRF"
+WITH_VRF=yes
+setup
+do_test "legacy"
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
new file mode 100644
index 000000000000..a7c41375374f
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO cmsg on send and recv */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWINFO
+#define IPV6_FLOWINFO 11
+#endif
+
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR 32
+#endif
+
+#define FLOWLABEL_WILDCARD	((uint32_t) -1)
+
+static const char cfg_data[]	= "a";
+static uint32_t cfg_label	= 1;
+
+static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
+{
+	char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = (char *)cfg_data;
+	iov.iov_len = sizeof(cfg_data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (with_flowlabel) {
+		struct cmsghdr *cm;
+
+		cm = (void *)control;
+		cm->cmsg_len = CMSG_LEN(sizeof(flowlabel));
+		cm->cmsg_level = SOL_IPV6;
+		cm->cmsg_type = IPV6_FLOWINFO;
+		*(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel);
+
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+	}
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "send");
+
+	if (with_flowlabel)
+		fprintf(stderr, "sent with label %u\n", flowlabel);
+	else
+		fprintf(stderr, "sent without label\n");
+}
+
+static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
+{
+	char control[CMSG_SPACE(sizeof(expect))];
+	char data[sizeof(cfg_data)];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint32_t flowlabel;
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	memset(control, 0, sizeof(control));
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "recv: truncated");
+	if (ret != sizeof(cfg_data))
+		error(1, 0, "recv: length mismatch");
+	if (memcmp(data, cfg_data, sizeof(data)))
+		error(1, 0, "recv: data mismatch");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (with_flowlabel) {
+		if (!cm)
+			error(1, 0, "recv: missing cmsg");
+		if (CMSG_NXTHDR(&msg, cm))
+			error(1, 0, "recv: too many cmsg");
+		if (cm->cmsg_level != SOL_IPV6 ||
+		    cm->cmsg_type != IPV6_FLOWINFO)
+			error(1, 0, "recv: unexpected cmsg level or type");
+
+		flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
+		fprintf(stderr, "recv with label %u\n", flowlabel);
+
+		if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+			fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
+					flowlabel, expect);
+
+	} else {
+		fprintf(stderr, "recv without label\n");
+	}
+}
+
+static bool get_autoflowlabel_enabled(void)
+{
+	int fd, ret;
+	char val;
+
+	fd = open("/proc/sys/net/ipv6/auto_flowlabels", O_RDONLY);
+	if (fd == -1)
+		error(1, errno, "open sysctl");
+
+	ret = read(fd, &val, 1);
+	if (ret == -1)
+		error(1, errno, "read sysctl");
+	if (ret == 0)
+		error(1, 0, "read sysctl: 0");
+
+	if (close(fd))
+		error(1, errno, "close sysctl");
+
+	return val == '1';
+}
+
+static void flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)))
+		error(1, errno, "setsockopt flowlabel get");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "l:")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_label = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct sockaddr_in6 addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(8000),
+		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+	};
+	const int one = 1;
+	int fdt, fdr;
+
+	parse_opts(argc, argv);
+
+	fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (connect(fdt, (void *)&addr, sizeof(addr)))
+		error(1, errno, "connect");
+	if (bind(fdr, (void *)&addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
+
+	if (setsockopt(fdr, SOL_IPV6, IPV6_FLOWINFO, &one, sizeof(one)))
+		error(1, errno, "setsockopt flowinfo");
+
+	if (get_autoflowlabel_enabled()) {
+		fprintf(stderr, "send no label: recv auto flowlabel\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, true, FLOWLABEL_WILDCARD);
+	} else {
+		fprintf(stderr, "send no label: recv no label (auto off)\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, false, 0);
+	}
+
+	fprintf(stderr, "send label\n");
+	do_send(fdt, true, cfg_label);
+	do_recv(fdr, true, cfg_label);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
new file mode 100755
index 000000000000..d3bc6442704e
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for IPv6 flowlabels
+#
+# run in separate namespaces to avoid mgmt db conflicts betweent tests
+
+set -e
+
+echo "TEST management"
+./in_netns.sh ./ipv6_flowlabel_mgr
+
+echo "TEST datapath"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=0 && ./ipv6_flowlabel -l 1'
+
+echo "TEST datapath (with auto-flowlabels)"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/ipv6_flowlabel_mgr.c b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
new file mode 100644
index 000000000000..af95b48acea9
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO_MGR */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR	32
+#endif
+
+/* from net/ipv6/ip6_flowlabel.c */
+#define FL_MIN_LINGER		6
+
+#define explain(x)							\
+	do { if (cfg_verbose) fprintf(stderr, "       " x "\n"); } while (0)
+
+#define __expect(x)							\
+	do {								\
+		if (!(x))						\
+			fprintf(stderr, "[OK]   " #x "\n");		\
+		else							\
+			error(1, 0, "[ERR]  " #x " (line %d)", __LINE__); \
+	} while (0)
+
+#define expect_pass(x)	__expect(x)
+#define expect_fail(x)	__expect(!(x))
+
+static bool cfg_long_running;
+static bool cfg_verbose;
+
+static int flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static int flowlabel_put(int fd, uint32_t label)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_PUT,
+		.flr_label = htonl(label),
+	};
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static void run_tests(int fd)
+{
+	int wstatus;
+	pid_t pid;
+
+	explain("cannot get non-existent label");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+
+	explain("cannot put non-existent label");
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("cannot create label greater than 20 bits");
+	expect_fail(flowlabel_get(fd, 0x1FFFFF, IPV6_FL_S_ANY,
+				  IPV6_FL_F_CREATE));
+
+	explain("create a new label (FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("can get the label (without FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+	explain("can get it again with create flag set, too");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("cannot get it again with the exclusive (FL_FL_EXCL) flag");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY,
+					 IPV6_FL_F_CREATE | IPV6_FL_F_EXCL));
+	explain("can now put exactly three references");
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("create a new exclusive label (FL_S_EXCL)");
+	expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,  IPV6_FL_F_CREATE));
+	explain("cannot get it again in exclusive mode either");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	expect_pass(flowlabel_put(fd, 2));
+
+	if (cfg_long_running) {
+		explain("cannot reuse the label, due to linger");
+		expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+		explain("after sleep, can reuse");
+		sleep(FL_MIN_LINGER * 2 + 1);
+		expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+	}
+
+	explain("create a new user-private label (FL_S_USER)");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_ANY, 0));
+	explain("cannot get it again in exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_EXCL, 0));
+	explain("can get it again in user mode");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+	explain("child process can get it too, but not after setuid(nobody)");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		if (setuid(USHRT_MAX))
+			fprintf(stderr, "[INFO] skip setuid child test\n");
+		else
+			expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+
+	explain("create a new process-private label (FL_S_PROCESS)");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, IPV6_FL_F_CREATE));
+	explain("can get it again");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+	explain("child process cannot can get it");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_fail(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "lv")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_long_running = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	parse_opts(argc, argv);
+
+	fd = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	run_tests(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 317dafcd605d..4a1275990d7e 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -152,10 +152,10 @@ tests="
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
 	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
 
-NS_A="ns-$(mktemp -u XXXXXX)"
-NS_B="ns-$(mktemp -u XXXXXX)"
-NS_R1="ns-$(mktemp -u XXXXXX)"
-NS_R2="ns-$(mktemp -u XXXXXX)"
+NS_A="ns-A"
+NS_B="ns-B"
+NS_R1="ns-R1"
+NS_R2="ns-R2"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
 ns_r1="ip netns exec ${NS_R1}"
@@ -212,7 +212,6 @@ dummy6_0_prefix="fc00:1000::"
 dummy6_1_prefix="fc00:1001::"
 dummy6_mask="64"
 
-cleanup_done=1
 err_buf=
 tcpdump_pids=
 
@@ -495,7 +494,7 @@ setup_routing() {
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
-	cleanup_done=0
+	cleanup
 	for arg do
 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
 	done
@@ -519,11 +518,9 @@ cleanup() {
 	done
 	tcpdump_pids=
 
-	[ ${cleanup_done} -eq 1 ] && return
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
-	cleanup_done=1
 }
 
 mtu() {
@@ -1136,6 +1133,9 @@ done
 
 trap cleanup EXIT
 
+# start clean
+cleanup
+
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
@@ -1156,7 +1156,6 @@ for t in ${tests}; do
 
 		eval test_${name}
 		ret=$?
-		cleanup
 
 		if [ $ret -eq 0 ]; then
 			printf "TEST: %-60s  [ OK ]\n" "${t}"
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
new file mode 100644
index 000000000000..53f598f06647
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test the SO_TXTIME API
+ *
+ * Takes two streams of { payload, delivery time }[], one input and one output.
+ * Sends the input stream and verifies arrival matches the output stream.
+ * The two streams can differ due to out-of-order delivery and drops.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/net_tstamp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+static int	cfg_clockid	= CLOCK_TAI;
+static bool	cfg_do_ipv4;
+static bool	cfg_do_ipv6;
+static uint16_t	cfg_port	= 8000;
+static int	cfg_variance_us	= 2000;
+
+static uint64_t glob_tstart;
+
+/* encode one timed transmission (of a 1B payload) */
+struct timed_send {
+	char	data;
+	int64_t	delay_us;
+};
+
+#define MAX_NUM_PKT	8
+static struct timed_send cfg_in[MAX_NUM_PKT];
+static struct timed_send cfg_out[MAX_NUM_PKT];
+static int cfg_num_pkt;
+
+static uint64_t gettime_ns(void)
+{
+	struct timespec ts;
+
+	if (clock_gettime(cfg_clockid, &ts))
+		error(1, errno, "gettime");
+
+	return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
+}
+
+static void do_send_one(int fdt, struct timed_send *ts)
+{
+	char control[CMSG_SPACE(sizeof(uint64_t))];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint64_t tdeliver;
+	int ret;
+
+	iov.iov_base = &ts->data;
+	iov.iov_len = 1;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (ts->delay_us >= 0) {
+		memset(control, 0, sizeof(control));
+		msg.msg_control = &control;
+		msg.msg_controllen = sizeof(control);
+
+		tdeliver = glob_tstart + ts->delay_us * 1000;
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_SOCKET;
+		cm->cmsg_type = SCM_TXTIME;
+		cm->cmsg_len = CMSG_LEN(sizeof(tdeliver));
+		memcpy(CMSG_DATA(cm), &tdeliver, sizeof(tdeliver));
+	}
+
+	ret = sendmsg(fdt, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret == 0)
+		error(1, 0, "write: 0B");
+
+}
+
+static void do_recv_one(int fdr, struct timed_send *ts)
+{
+	int64_t tstop, texpect;
+	char rbuf[2];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "read");
+	if (ret != 1)
+		error(1, 0, "read: %dB", ret);
+
+	tstop = (gettime_ns() - glob_tstart) / 1000;
+	texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
+
+	fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
+			rbuf[0], tstop, texpect);
+
+	if (rbuf[0] != ts->data)
+		error(1, 0, "payload mismatch. expected %c", ts->data);
+
+	if (labs(tstop - texpect) > cfg_variance_us)
+		error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+}
+
+static void do_recv_verify_empty(int fdr)
+{
+	char rbuf[1];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret != -1 || errno != EAGAIN)
+		error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
+}
+
+static void setsockopt_txtime(int fd)
+{
+	struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
+	struct sock_txtime so_txtime_val_read = { 0 };
+	socklen_t vallen = sizeof(so_txtime_val);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val, sizeof(so_txtime_val)))
+		error(1, errno, "setsockopt txtime");
+
+	if (getsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val_read, &vallen))
+		error(1, errno, "getsockopt txtime");
+
+	if (vallen != sizeof(so_txtime_val) ||
+	    memcmp(&so_txtime_val, &so_txtime_val_read, vallen))
+		error(1, 0, "getsockopt txtime: mismatch");
+}
+
+static int setup_tx(struct sockaddr *addr, socklen_t alen)
+{
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (connect(fd, addr, alen))
+		error(1, errno, "connect");
+
+	setsockopt_txtime(fd);
+
+	return fd;
+}
+
+static int setup_rx(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fd, addr, alen))
+		error(1, errno, "bind");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	return fd;
+}
+
+static void do_test(struct sockaddr *addr, socklen_t alen)
+{
+	int fdt, fdr, i;
+
+	fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
+			addr->sa_family == PF_INET ? '4' : '6',
+			cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
+
+	fdt = setup_tx(addr, alen);
+	fdr = setup_rx(addr, alen);
+
+	glob_tstart = gettime_ns();
+
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_send_one(fdt, &cfg_in[i]);
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_recv_one(fdr, &cfg_out[i]);
+
+	do_recv_verify_empty(fdr);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+}
+
+static int parse_io(const char *optarg, struct timed_send *array)
+{
+	char *arg, *tok;
+	int aoff = 0;
+
+	arg = strdup(optarg);
+	if (!arg)
+		error(1, errno, "strdup");
+
+	while ((tok = strtok(arg, ","))) {
+		arg = NULL;	/* only pass non-zero on first call */
+
+		if (aoff / 2 == MAX_NUM_PKT)
+			error(1, 0, "exceeds max pkt count (%d)", MAX_NUM_PKT);
+
+		if (aoff & 1) {	/* parse delay */
+			array->delay_us = strtol(tok, NULL, 0) * 1000;
+			array++;
+		} else {	/* parse character */
+			array->data = tok[0];
+		}
+
+		aoff++;
+	}
+
+	free(arg);
+
+	return aoff / 2;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c, ilen, olen;
+
+	while ((c = getopt(argc, argv, "46c:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			if (!strcmp(optarg, "tai"))
+				cfg_clockid = CLOCK_TAI;
+			else if (!strcmp(optarg, "monotonic") ||
+				 !strcmp(optarg, "mono"))
+				cfg_clockid = CLOCK_MONOTONIC;
+			else
+				error(1, 0, "unknown clock id %s", optarg);
+			break;
+		default:
+			error(1, 0, "parse error at %d", optind);
+		}
+	}
+
+	if (argc - optind != 2)
+		error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+
+	ilen = parse_io(argv[optind], cfg_in);
+	olen = parse_io(argv[optind + 1], cfg_out);
+	if (ilen != olen)
+		error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
+	cfg_num_pkt = ilen;
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv6) {
+		struct sockaddr_in6 addr6 = {0};
+
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_port = htons(cfg_port);
+		addr6.sin6_addr = in6addr_loopback;
+		do_test((void *)&addr6, sizeof(addr6));
+	}
+
+	if (cfg_do_ipv4) {
+		struct sockaddr_in addr4 = {0};
+
+		addr4.sin_family = AF_INET;
+		addr4.sin_port = htons(cfg_port);
+		addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		do_test((void *)&addr4, sizeof(addr4));
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
new file mode 100755
index 000000000000..5aa519328a5b
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for the SO_TXTIME interface
+
+# Run in network namespace
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh $0 __subprocess
+	exit $?
+fi
+
+set -e
+
+tc qdisc add dev lo root fq
+./so_txtime -4 -6 -c mono a,-1 a,-1
+./so_txtime -4 -6 -c mono a,0 a,0
+./so_txtime -4 -6 -c mono a,10 a,10
+./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
+./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
+
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+	! ./so_txtime -4 -6 -c tai a,-1 a,-1
+	! ./so_txtime -4 -6 -c tai a,0 a,0
+	./so_txtime -4 -6 -c tai a,10 a,10
+	./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
+	./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+else
+	echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+fi
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
new file mode 100644
index 000000000000..58bb77d9e7e1
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test key rotation for TFO.
+ * New keys are 'rotated' in two steps:
+ * 1) Add new key as the 'backup' key 'behind' the primary key
+ * 2) Make new key the primary by swapping the backup and primary keys
+ *
+ * The rotation is done in stages using multiple sockets bound
+ * to the same port via SO_REUSEPORT. This simulates key rotation
+ * behind say a load balancer. We verify that across the rotation
+ * there are no cases in which a cookie is not accepted by verifying
+ * that TcpExtTCPFastOpenPassiveFail remains 0.
+ */
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <netinet/tcp.h>
+#include <fcntl.h>
+#include <time.h>
+
+#ifndef TCP_FASTOPEN_KEY
+#define TCP_FASTOPEN_KEY 33
+#endif
+
+#define N_LISTEN 10
+#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
+#define KEY_LENGTH 16
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static bool do_ipv6;
+static bool do_sockopt;
+static bool do_rotate;
+static int key_len = KEY_LENGTH;
+static int rcv_fds[N_LISTEN];
+static int proc_fd;
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const int PORT = 8891;
+
+static void get_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+	int len = KEY_LENGTH * 2;
+
+	if (do_sockopt) {
+		if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
+			error(1, errno, "Unable to get key");
+		return;
+	}
+	lseek(proc_fd, 0, SEEK_SET);
+	if (read(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
+	if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
+	    keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
+		error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
+}
+
+static void set_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+
+	if (do_sockopt) {
+		if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
+		    key_len))
+			error(1, errno, "Unable to set key");
+		return;
+	}
+	if (do_rotate)
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
+			 keys[6], keys[7]);
+	else
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3]);
+	lseek(proc_fd, 0, SEEK_SET);
+	if (write(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
+}
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds)
+{
+	struct sockaddr_in  addr4 = {0};
+	struct sockaddr_in6 addr6 = {0};
+	struct sockaddr *addr;
+	int opt = 1, i, sz;
+	int qlen = 100;
+	uint32_t keys[8];
+
+	switch (family) {
+	case AF_INET:
+		addr4.sin_family = family;
+		addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4.sin_port = htons(PORT);
+		sz = sizeof(addr4);
+		addr = (struct sockaddr *)&addr4;
+		break;
+	case AF_INET6:
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_addr = in6addr_any;
+		addr6.sin6_port = htons(PORT);
+		sz = sizeof(addr6);
+		addr = (struct sockaddr *)&addr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return;
+	}
+	for (i = 0; i < ARRAY_SIZE(keys); i++)
+		keys[i] = rand();
+	for (i = 0; i < N_LISTEN; i++) {
+		rcv_fds[i] = socket(family, proto, 0);
+		if (rcv_fds[i] < 0)
+			error(1, errno, "failed to create receive socket");
+		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+		if (bind(rcv_fds[i], addr, sz))
+			error(1, errno, "failed to bind receive socket");
+		if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
+			       sizeof(qlen)))
+			error(1, errno, "failed to set TCP_FASTOPEN");
+		set_keys(rcv_fds[i], keys);
+		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static int connect_and_send(int family, int proto)
+{
+	struct sockaddr_in  saddr4 = {0};
+	struct sockaddr_in  daddr4 = {0};
+	struct sockaddr_in6 saddr6 = {0};
+	struct sockaddr_in6 daddr6 = {0};
+	struct sockaddr *saddr, *daddr;
+	int fd, sz, ret;
+	char data[1];
+
+	switch (family) {
+	case AF_INET:
+		saddr4.sin_family = AF_INET;
+		saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4.sin_port = 0;
+
+		daddr4.sin_family = AF_INET;
+		if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+			error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+		daddr4.sin_port = htons(PORT);
+
+		sz = sizeof(saddr4);
+		saddr = (struct sockaddr *)&saddr4;
+		daddr = (struct sockaddr *)&daddr4;
+		break;
+	case AF_INET6:
+		saddr6.sin6_family = AF_INET6;
+		saddr6.sin6_addr = in6addr_any;
+
+		daddr6.sin6_family = AF_INET6;
+		if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+			error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+		daddr6.sin6_port = htons(PORT);
+
+		sz = sizeof(saddr6);
+		saddr = (struct sockaddr *)&saddr6;
+		daddr = (struct sockaddr *)&daddr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, daddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return -1;
+	}
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+	if (bind(fd, saddr, sz))
+		error(1, errno, "failed to bind send socket");
+	data[0] = 'a';
+	ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
+	if (ret != 1)
+		error(1, errno, "failed to sendto");
+
+	return fd;
+}
+
+static bool is_listen_fd(int fd)
+{
+	int i;
+
+	for (i = 0; i < N_LISTEN; i++) {
+		if (rcv_fds[i] == fd)
+			return true;
+	}
+	return false;
+}
+
+static int rotate_key(int fd)
+{
+	static int iter;
+	static uint32_t new_key[4];
+	uint32_t keys[8];
+	uint32_t tmp_key[4];
+	int i;
+	int len = KEY_LENGTH * 2;
+
+	if (iter < N_LISTEN) {
+		/* first set new key as backups */
+		if (iter == 0) {
+			for (i = 0; i < ARRAY_SIZE(new_key); i++)
+				new_key[i] = rand();
+		}
+		get_keys(fd, keys);
+		memcpy(keys + 4, new_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	} else {
+		/* swap the keys */
+		get_keys(fd, keys);
+		memcpy(tmp_key, keys + 4, KEY_LENGTH);
+		memcpy(keys + 4, keys, KEY_LENGTH);
+		memcpy(keys, tmp_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	}
+	if (++iter >= (N_LISTEN * 2))
+		iter = 0;
+}
+
+static void run_one_test(int family)
+{
+	struct epoll_event ev;
+	int i, send_fd;
+	int n_loops = 10000;
+	int rotate_key_fd = 0;
+	int key_rotate_interval = 50;
+	int fd, epfd;
+	char buf[1];
+
+	build_rcv_fd(family, SOCK_STREAM, rcv_fds);
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	ev.events = EPOLLIN;
+	for (i = 0; i < N_LISTEN; i++) {
+		ev.data.fd = rcv_fds[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+	while (n_loops--) {
+		send_fd = connect_and_send(family, SOCK_STREAM);
+		if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
+			rotate_key(rcv_fds[rotate_key_fd]);
+			if (++rotate_key_fd >= N_LISTEN)
+				rotate_key_fd = 0;
+		}
+		while (1) {
+			i = epoll_wait(epfd, &ev, 1, -1);
+			if (i < 0)
+				error(1, errno, "epoll_wait failed");
+			if (is_listen_fd(ev.data.fd)) {
+				fd = accept(ev.data.fd, NULL, NULL);
+				if (fd < 0)
+					error(1, errno, "failed to accept");
+				ev.data.fd = fd;
+				if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+					error(1, errno, "failed epoll add");
+				continue;
+			}
+			i = recv(ev.data.fd, buf, sizeof(buf), 0);
+			if (i != 1)
+				error(1, errno, "failed recv data");
+			if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
+				error(1, errno, "failed epoll del");
+			close(ev.data.fd);
+			break;
+		}
+		close(send_fd);
+	}
+	for (i = 0; i < N_LISTEN; i++)
+		close(rcv_fds[i]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46sr")) != -1) {
+		switch (c) {
+		case '4':
+			do_ipv6 = false;
+			break;
+		case '6':
+			do_ipv6 = true;
+			break;
+		case 's':
+			do_sockopt = true;
+			break;
+		case 'r':
+			do_rotate = true;
+			key_len = KEY_LENGTH * 2;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
+	if (proc_fd < 0)
+		error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
+	srand(time(NULL));
+	if (do_ipv6)
+		run_one_test(AF_INET6);
+	else
+		run_one_test(AF_INET);
+	close(proc_fd);
+	fprintf(stderr, "PASS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
new file mode 100755
index 000000000000..41476399e184
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# rotate TFO keys for ipv4/ipv6 and verify that the client does
+# not present an invalid cookie.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
+		>/dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+	# flush routes before each run, otherwise successive runs can
+	# initially present an old TFO cookie
+	ip netns exec "${NETNS}" ip tcp_metrics flush
+	ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
+	val=$(ip netns exec "${NETNS}" nstat -az | \
+		grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
+	if [ $val -ne 0 ]; then
+		echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
+		return 1
+	fi
+}
+
+do_test "-4"
+do_test "-6"
+do_test "-4"
+do_test "-6"
+do_test "-4s"
+do_test "-6s"
+do_test "-4s"
+do_test "-6s"
+do_test "-4r"
+do_test "-6r"
+do_test "-4r"
+do_test "-6r"
+do_test "-4sr"
+do_test "-6sr"
+do_test "-4sr"
+do_test "-6sr"
+echo "all tests done"
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 203302065458..b235efd55367 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -38,6 +38,7 @@ CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_ACT_BPF=m
 CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m