98 files changed, 14844 insertions, 2284 deletions
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 9909bfda167e..c931ec8cac40 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -7,7 +7,7 @@ config NET_VENDOR_CHELSIO
 	bool "Chelsio devices"
 	default y
 	depends on PCI
-	---help---
+	help
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
@@ -22,11 +22,11 @@ config CHELSIO_T1
 	depends on PCI
 	select CRC32
 	select MDIO
-	---help---
+	help
 	  This driver supports Chelsio gigabit and 10-gigabit
 	  Ethernet cards. More information about adapter features and
 	  performance tuning is in
-	  <file:Documentation/networking/device_drivers/chelsio/cxgb.txt>.
+	  <file:Documentation/networking/device_drivers/ethernet/chelsio/cxgb.rst>.
 
 	  For general information about Chelsio and our products, visit
 	  our website at <http://www.chelsio.com>.
@@ -42,7 +42,7 @@ config CHELSIO_T1
 config CHELSIO_T1_1G
 	bool "Chelsio gigabit Ethernet support"
 	depends on CHELSIO_T1
-	---help---
+	help
 	  Enables support for Chelsio's gigabit Ethernet PCI cards.  If you
 	  are using only 10G cards say 'N' here.
 
@@ -51,7 +51,7 @@ config CHELSIO_T3
 	depends on PCI && INET
 	select FW_LOADER
 	select MDIO
-	---help---
+	help
 	  This driver supports Chelsio T3-based gigabit and 10Gb Ethernet
 	  adapters.
 
@@ -68,11 +68,12 @@ config CHELSIO_T3
 
 config CHELSIO_T4
 	tristate "Chelsio Communications T4/T5/T6 Ethernet support"
-	depends on PCI && (IPV6 || IPV6=n)
+	depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n)
+	depends on PTP_1588_CLOCK_OPTIONAL
 	select FW_LOADER
 	select MDIO
 	select ZLIB_DEFLATE
-	---help---
+	help
 	  This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
 	  adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
 	  Ethernet adapters.
@@ -92,7 +93,7 @@ config CHELSIO_T4_DCB
 	bool "Data Center Bridging (DCB) Support for Chelsio T4/T5/T6 cards"
 	default n
 	depends on CHELSIO_T4 && DCB
-	---help---
+	help
 	  Enable DCB support through rtNetlink interface.
 	  Say Y here if you want to enable Data Center Bridging (DCB) support
 	  in the driver.
@@ -103,7 +104,7 @@ config CHELSIO_T4_FCOE
 	bool "Fibre Channel over Ethernet (FCoE) Support for Chelsio T5 cards"
 	default n
 	depends on CHELSIO_T4 && CHELSIO_T4_DCB && FCOE
-	---help---
+	help
 	  Enable FCoE offload features.
 	  Say Y here if you want to enable Fibre Channel over Ethernet (FCoE) support
 	  in the driver.
@@ -113,7 +114,7 @@ config CHELSIO_T4_FCOE
 config CHELSIO_T4VF
 	tristate "Chelsio Communications T4/T5/T6 Virtual Function Ethernet support"
 	depends on PCI
-	---help---
+	help
 	  This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
 	  adapters and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
 	  Ethernet adapters with PCI-E SR-IOV Virtual Functions.
@@ -131,7 +132,9 @@ config CHELSIO_T4VF
 
 config CHELSIO_LIB
 	tristate
-	---help---
+	help
 	Common library for Chelsio drivers.
 
+source "drivers/net/ethernet/chelsio/inline_crypto/Kconfig"
+
 endif # NET_VENDOR_CHELSIO
diff --git a/drivers/net/ethernet/chelsio/Makefile b/drivers/net/ethernet/chelsio/Makefile
index c0f978d2e8a7..1a6fd8b2bb7d 100644
--- a/drivers/net/ethernet/chelsio/Makefile
+++ b/drivers/net/ethernet/chelsio/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_CHELSIO_T3) += cxgb3/
 obj-$(CONFIG_CHELSIO_T4) += cxgb4/
 obj-$(CONFIG_CHELSIO_T4VF) += cxgb4vf/
 obj-$(CONFIG_CHELSIO_LIB) += libcxgb/
+obj-$(CONFIG_CHELSIO_INLINE_CRYPTO) += inline_crypto/
diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
index 94b9482f14a5..e56eff701395 100644
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: common.h                                                            *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -55,7 +46,6 @@
 
 #define DRV_DESCRIPTION "Chelsio 10Gb Ethernet Driver"
 #define DRV_NAME "cxgb"
-#define DRV_VERSION "2.2"
 
 #define CH_DEVICE(devid, ssid, idx) \
 	{ PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx }
@@ -239,7 +229,6 @@ struct adapter {
 	int msg_enable;
 	u32 mmio_len;
 
-	struct work_struct ext_intr_handler_task;
 	struct adapter_params params;
 
 	/* Terminator modules. */
@@ -258,6 +247,7 @@ struct adapter {
 
 	/* guards async operations */
 	spinlock_t async_lock ____cacheline_aligned;
+	u32 pending_thread_intr;
 	u32 slow_intr_mask;
 	int t1powersave;
 };
@@ -335,8 +325,7 @@ void t1_interrupts_enable(adapter_t *adapter);
 void t1_interrupts_disable(adapter_t *adapter);
 void t1_interrupts_clear(adapter_t *adapter);
 int t1_elmer0_ext_intr_handler(adapter_t *adapter);
-void t1_elmer0_ext_intr(adapter_t *adapter);
-int t1_slow_intr_handler(adapter_t *adapter);
+irqreturn_t t1_slow_intr_handler(adapter_t *adapter);
 
 int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc);
 const struct board_info *t1_get_board_info(unsigned int board_id);
@@ -348,7 +337,6 @@ int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi,
 int t1_init_hw_modules(adapter_t *adapter);
 int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi);
 void t1_free_sw_modules(adapter_t *adapter);
-void t1_fatal_err(adapter_t *adapter);
 void t1_link_changed(adapter_t *adapter, int port_id);
 void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat,
 			    int speed, int duplex, int pause);
diff --git a/drivers/net/ethernet/chelsio/cxgb/cphy.h b/drivers/net/ethernet/chelsio/cxgb/cphy.h
index bf43da6c6a63..12639b688ddc 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cphy.h
+++ b/drivers/net/ethernet/chelsio/cxgb/cphy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: cphy.h                                                              *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
diff --git a/drivers/net/ethernet/chelsio/cxgb/cpl5_cmd.h b/drivers/net/ethernet/chelsio/cxgb/cpl5_cmd.h
index 5249686afe71..a30fb407115d 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cpl5_cmd.h
+++ b/drivers/net/ethernet/chelsio/cxgb/cpl5_cmd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: cpl5_cmd.h                                                          *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -635,4 +626,3 @@ struct cpl_mss_change {
 };
 
 #endif /* _CXGB_CPL5_CMD_H_ */
-
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0ccdde366ae1..d2286adf09fe 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -211,9 +211,10 @@ static int cxgb_up(struct adapter *adapter)
 	t1_interrupts_clear(adapter);
 
 	adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
-	err = request_irq(adapter->pdev->irq, t1_interrupt,
-			  adapter->params.has_msi ? 0 : IRQF_SHARED,
-			  adapter->name, adapter);
+	err = request_threaded_irq(adapter->pdev->irq, t1_interrupt,
+				   t1_interrupt_thread,
+				   adapter->params.has_msi ? 0 : IRQF_SHARED,
+				   adapter->name, adapter);
 	if (err) {
 		if (adapter->params.has_msi)
 			pci_disable_msi(adapter->pdev);
@@ -428,9 +429,8 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
 	struct adapter *adapter = dev->ml_priv;
 
-	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
-	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(adapter->pdev),
+	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(adapter->pdev),
 		sizeof(info->bus_info));
 }
 
@@ -710,7 +710,9 @@ static int set_pauseparam(struct net_device *dev,
 	return 0;
 }
 
-static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			  struct kernel_ethtool_ringparam *kernel_e,
+			  struct netlink_ext_ack *extack)
 {
 	struct adapter *adapter = dev->ml_priv;
 	int jumbo_fl = t1_is_T1B(adapter) ? 1 : 0;
@@ -724,7 +726,9 @@ static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 	e->tx_pending = adapter->params.sge.cmdQ_size[0];
 }
 
-static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			 struct kernel_ethtool_ringparam *kernel_e,
+			 struct netlink_ext_ack *extack)
 {
 	struct adapter *adapter = dev->ml_priv;
 	int jumbo_fl = t1_is_T1B(adapter) ? 1 : 0;
@@ -748,7 +752,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 	return 0;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	struct adapter *adapter = dev->ml_priv;
 
@@ -759,7 +765,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 	return 0;
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	struct adapter *adapter = dev->ml_priv;
 
@@ -794,6 +802,9 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
 }
 
 static const struct ethtool_ops t1_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX |
+				     ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL,
 	.get_drvinfo       = get_drvinfo,
 	.get_msglevel      = get_msglevel,
 	.set_msglevel      = set_msglevel,
@@ -846,7 +857,7 @@ static int t1_set_mac_addr(struct net_device *dev, void *p)
 	if (!mac->ops->macaddress_set)
 		return -EOPNOTSUPP;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	eth_hw_addr_set(dev, addr->sa_data);
 	mac->ops->macaddress_set(mac, dev->dev_addr);
 	return 0;
 }
@@ -914,51 +925,6 @@ static void mac_stats_task(struct work_struct *work)
 	spin_unlock(&adapter->work_lock);
 }
 
-/*
- * Processes elmer0 external interrupts in process context.
- */
-static void ext_intr_task(struct work_struct *work)
-{
-	struct adapter *adapter =
-		container_of(work, struct adapter, ext_intr_handler_task);
-
-	t1_elmer0_ext_intr_handler(adapter);
-
-	/* Now reenable external interrupts */
-	spin_lock_irq(&adapter->async_lock);
-	adapter->slow_intr_mask |= F_PL_INTR_EXT;
-	writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
-		   adapter->regs + A_PL_ENABLE);
-	spin_unlock_irq(&adapter->async_lock);
-}
-
-/*
- * Interrupt-context handler for elmer0 external interrupts.
- */
-void t1_elmer0_ext_intr(struct adapter *adapter)
-{
-	/*
-	 * Schedule a task to handle external interrupts as we require
-	 * a process context.  We disable EXT interrupts in the interim
-	 * and let the task reenable them when it's done.
-	 */
-	adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
-		   adapter->regs + A_PL_ENABLE);
-	schedule_work(&adapter->ext_intr_handler_task);
-}
-
-void t1_fatal_err(struct adapter *adapter)
-{
-	if (adapter->flags & FULL_INIT_DONE) {
-		t1_sge_stop(adapter->sge);
-		t1_interrupts_disable(adapter);
-	}
-	pr_alert("%s: encountered fatal error, operation suspended\n",
-		 adapter->name);
-}
-
 static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_open		= cxgb_open,
 	.ndo_stop		= cxgb_close,
@@ -966,7 +932,7 @@ static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_get_stats		= t1_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= t1_set_rxmode,
-	.ndo_do_ioctl		= t1_ioctl,
+	.ndo_eth_ioctl		= t1_ioctl,
 	.ndo_change_mtu		= t1_change_mtu,
 	.ndo_set_mac_address	= t1_set_mac_addr,
 	.ndo_fix_features	= t1_fix_features,
@@ -978,13 +944,11 @@ static const struct net_device_ops cxgb_netdev_ops = {
 
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	int i, err, pci_using_dac = 0;
 	unsigned long mmio_start, mmio_len;
 	const struct board_info *bi;
 	struct adapter *adapter = NULL;
 	struct port_info *pi;
-
-	pr_info_once("%s - version %s\n", DRV_DESCRIPTION, DRV_VERSION);
+	int i, err;
 
 	err = pci_enable_device(pdev);
 	if (err)
@@ -997,17 +961,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_disable_pdev;
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		pci_using_dac = 1;
-
-		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-			pr_err("%s: unable to obtain 64-bit DMA for "
-			       "consistent allocations\n", pci_name(pdev));
-			err = -ENODEV;
-			goto out_disable_pdev;
-		}
-
-	} else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
 		pr_err("%s: no usable DMA configuration\n", pci_name(pdev));
 		goto out_disable_pdev;
 	}
@@ -1062,8 +1017,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			spin_lock_init(&adapter->async_lock);
 			spin_lock_init(&adapter->mac_lock);
 
-			INIT_WORK(&adapter->ext_intr_handler_task,
-				  ext_intr_task);
 			INIT_DELAYED_WORK(&adapter->stats_update_task,
 					  mac_stats_task);
 
@@ -1081,10 +1034,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM |
 			NETIF_F_RXCSUM;
 		netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM |
-			NETIF_F_RXCSUM | NETIF_F_LLTX;
+			NETIF_F_RXCSUM | NETIF_F_LLTX | NETIF_F_HIGHDMA;
 
-		if (pci_using_dac)
-			netdev->features |= NETIF_F_HIGHDMA;
 		if (vlan_tso_capable(adapter)) {
 			netdev->features |=
 				NETIF_F_HW_VLAN_CTAG_TX |
@@ -1102,7 +1053,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hard_header_len += (netdev->hw_features & NETIF_F_TSO) ?
 			sizeof(struct cpl_tx_pkt_lso) : sizeof(struct cpl_tx_pkt);
 
-		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
+		netif_napi_add(netdev, &adapter->napi, t1_poll);
 
 		netdev->ethtool_ops = &t1_ethtool_ops;
 
@@ -1153,6 +1104,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!adapter->registered_device_map) {
 		pr_err("%s: could not register any net devices\n",
 		       pci_name(pdev));
+		err = -EINVAL;
 		goto out_release_adapter_res;
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/elmer0.h b/drivers/net/ethernet/chelsio/cxgb/elmer0.h
index 81526ad36339..0427e894c277 100644
--- a/drivers/net/ethernet/chelsio/cxgb/elmer0.h
+++ b/drivers/net/ethernet/chelsio/cxgb/elmer0.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: elmer0.h                                                            *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -154,4 +145,3 @@ enum {
 #define MI1_OP_INDIRECT_READ     3
 
 #endif /* _CXGB_ELMER0_H_ */
-
diff --git a/drivers/net/ethernet/chelsio/cxgb/espi.c b/drivers/net/ethernet/chelsio/cxgb/espi.c
index 3e182eee799e..ef70569435be 100644
--- a/drivers/net/ethernet/chelsio/cxgb/espi.c
+++ b/drivers/net/ethernet/chelsio/cxgb/espi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*****************************************************************************
  *                                                                           *
  * File: espi.c                                                              *
@@ -7,16 +8,6 @@
  *  Ethernet SPI functionality.                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
diff --git a/drivers/net/ethernet/chelsio/cxgb/espi.h b/drivers/net/ethernet/chelsio/cxgb/espi.h
index 162de5259df9..f588e9f3b37a 100644
--- a/drivers/net/ethernet/chelsio/cxgb/espi.h
+++ b/drivers/net/ethernet/chelsio/cxgb/espi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: espi.h                                                              *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
diff --git a/drivers/net/ethernet/chelsio/cxgb/gmac.h b/drivers/net/ethernet/chelsio/cxgb/gmac.h
index dfa77491a910..96077da1ed5e 100644
--- a/drivers/net/ethernet/chelsio/cxgb/gmac.h
+++ b/drivers/net/ethernet/chelsio/cxgb/gmac.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: gmac.h                                                              *
@@ -7,16 +8,6 @@
  *  Generic MAC functionality.                                               *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -117,7 +108,7 @@ struct cmac_ops {
 	const struct cmac_statistics *(*statistics_update)(struct cmac *, int);
 
 	int (*macaddress_get)(struct cmac *, u8 mac_addr[6]);
-	int (*macaddress_set)(struct cmac *, u8 mac_addr[6]);
+	int (*macaddress_set)(struct cmac *, const u8 mac_addr[6]);
 };
 
 typedef struct _cmac_instance cmac_instance;
diff --git a/drivers/net/ethernet/chelsio/cxgb/mv88x201x.c b/drivers/net/ethernet/chelsio/cxgb/mv88x201x.c
index 7ddb301bcba0..556c8ad68fa8 100644
--- a/drivers/net/ethernet/chelsio/cxgb/mv88x201x.c
+++ b/drivers/net/ethernet/chelsio/cxgb/mv88x201x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*****************************************************************************
  *                                                                           *
  * File: mv88x201x.c                                                         *
@@ -7,16 +8,6 @@
  *  Marvell PHY (mv88x201x) functionality.                                   *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
index c27908e66f5e..cbfa03d5663a 100644
--- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c
+++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*****************************************************************************
  *                                                                           *
  * File: pm3393.c                                                            *
@@ -7,16 +8,6 @@
  *  PMC/SIERRA (pm3393) MAC-PHY functionality.                               *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -496,7 +487,7 @@ static int pm3393_macaddress_get(struct cmac *cmac, u8 mac_addr[6])
 	return 0;
 }
 
-static int pm3393_macaddress_set(struct cmac *cmac, u8 ma[6])
+static int pm3393_macaddress_set(struct cmac *cmac, const u8 ma[6])
 {
 	u32 val, lo, mid, hi, enabled = cmac->instance->enabled;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/regs.h b/drivers/net/ethernet/chelsio/cxgb/regs.h
index 964ce59ee169..f751e680cf7d 100644
--- a/drivers/net/ethernet/chelsio/cxgb/regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: regs.h                                                              *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 47b5c8e2104b..861edff5ed89 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*****************************************************************************
  *                                                                           *
  * File: sge.c                                                               *
@@ -7,16 +8,6 @@
  *  DMA engine.                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -239,8 +230,10 @@ struct sched {
 	unsigned int	num;		/* num skbs in per port queues */
 	struct sched_port p[MAX_NPORTS];
 	struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */
+	struct sge *sge;
 };
-static void restart_sched(unsigned long);
+
+static void restart_sched(struct tasklet_struct *t);
 
 
 /*
@@ -378,7 +371,8 @@ static int tx_sched_init(struct sge *sge)
 		return -ENOMEM;
 
 	pr_debug("tx_sched_init\n");
-	tasklet_init(&s->sched_tsk, restart_sched, (unsigned long) sge);
+	tasklet_setup(&s->sched_tsk, restart_sched);
+	s->sge = sge;
 	sge->tx_sched = s;
 
 	for (i = 0; i < MAX_NPORTS; i++) {
@@ -509,9 +503,8 @@ static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *q)
 	while (q->credits--) {
 		struct freelQ_ce *ce = &q->centries[cidx];
 
-		pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
-				 dma_unmap_len(ce, dma_len),
-				 PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+				 dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 		dev_kfree_skb(ce->skb);
 		ce->skb = NULL;
 		if (++cidx == q->size)
@@ -529,8 +522,8 @@ static void free_rx_resources(struct sge *sge)
 
 	if (sge->respQ.entries) {
 		size = sizeof(struct respQ_e) * sge->respQ.size;
-		pci_free_consistent(pdev, size, sge->respQ.entries,
-				    sge->respQ.dma_addr);
+		dma_free_coherent(&pdev->dev, size, sge->respQ.entries,
+				  sge->respQ.dma_addr);
 	}
 
 	for (i = 0; i < SGE_FREELQ_N; i++) {
@@ -542,8 +535,8 @@ static void free_rx_resources(struct sge *sge)
 		}
 		if (q->entries) {
 			size = sizeof(struct freelQ_e) * q->size;
-			pci_free_consistent(pdev, size, q->entries,
-					    q->dma_addr);
+			dma_free_coherent(&pdev->dev, size, q->entries,
+					  q->dma_addr);
 		}
 	}
 }
@@ -564,7 +557,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
 		q->size = p->freelQ_size[i];
 		q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN;
 		size = sizeof(struct freelQ_e) * q->size;
-		q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+		q->entries = dma_alloc_coherent(&pdev->dev, size,
+						&q->dma_addr, GFP_KERNEL);
 		if (!q->entries)
 			goto err_no_mem;
 
@@ -601,7 +595,8 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
 	sge->respQ.credits = 0;
 	size = sizeof(struct respQ_e) * sge->respQ.size;
 	sge->respQ.entries =
-		pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr);
+		dma_alloc_coherent(&pdev->dev, size, &sge->respQ.dma_addr,
+				   GFP_KERNEL);
 	if (!sge->respQ.entries)
 		goto err_no_mem;
 	return 0;
@@ -624,9 +619,10 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
 	ce = &q->centries[cidx];
 	while (n--) {
 		if (likely(dma_unmap_len(ce, dma_len))) {
-			pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
+			dma_unmap_single(&pdev->dev,
+					 dma_unmap_addr(ce, dma_addr),
 					 dma_unmap_len(ce, dma_len),
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 			if (q->sop)
 				q->sop = 0;
 		}
@@ -663,8 +659,8 @@ static void free_tx_resources(struct sge *sge)
 		}
 		if (q->entries) {
 			size = sizeof(struct cmdQ_e) * q->size;
-			pci_free_consistent(pdev, size, q->entries,
-					    q->dma_addr);
+			dma_free_coherent(&pdev->dev, size, q->entries,
+					  q->dma_addr);
 		}
 	}
 }
@@ -689,7 +685,8 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
 		q->stop_thres = 0;
 		spin_lock_init(&q->lock);
 		size = sizeof(struct cmdQ_e) * q->size;
-		q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
+		q->entries = dma_alloc_coherent(&pdev->dev, size,
+						&q->dma_addr, GFP_KERNEL);
 		if (!q->entries)
 			goto err_no_mem;
 
@@ -837,8 +834,8 @@ static void refill_free_list(struct sge *sge, struct freelQ *q)
 			break;
 
 		skb_reserve(skb, q->dma_offset);
-		mapping = pci_map_single(pdev, skb->data, dma_len,
-					 PCI_DMA_FROMDEVICE);
+		mapping = dma_map_single(&pdev->dev, skb->data, dma_len,
+					 DMA_FROM_DEVICE);
 		skb_reserve(skb, sge->rx_pkt_pad);
 
 		ce->skb = skb;
@@ -934,10 +931,11 @@ void t1_sge_intr_clear(struct sge *sge)
 /*
  * SGE 'Error' interrupt handler
  */
-int t1_sge_intr_error_handler(struct sge *sge)
+bool t1_sge_intr_error_handler(struct sge *sge)
 {
 	struct adapter *adapter = sge->adapter;
 	u32 cause = readl(adapter->regs + A_SG_INT_CAUSE);
+	bool wake = false;
 
 	if (adapter->port[0].dev->hw_features & NETIF_F_TSO)
 		cause &= ~F_PACKET_TOO_BIG;
@@ -961,11 +959,14 @@ int t1_sge_intr_error_handler(struct sge *sge)
 		sge->stats.pkt_mismatch++;
 		pr_alert("%s: SGE packet mismatch\n", adapter->name);
 	}
-	if (cause & SGE_INT_FATAL)
-		t1_fatal_err(adapter);
+	if (cause & SGE_INT_FATAL) {
+		t1_interrupts_disable(adapter);
+		adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
+		wake = true;
+	}
 
 	writel(cause, adapter->regs + A_SG_INT_CAUSE);
-	return 0;
+	return wake;
 }
 
 const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge)
@@ -1049,15 +1050,15 @@ static inline struct sk_buff *get_packet(struct adapter *adapter,
 			goto use_orig_buf;
 
 		skb_put(skb, len);
-		pci_dma_sync_single_for_cpu(pdev,
-					    dma_unmap_addr(ce, dma_addr),
-					    dma_unmap_len(ce, dma_len),
-					    PCI_DMA_FROMDEVICE);
+		dma_sync_single_for_cpu(&pdev->dev,
+					dma_unmap_addr(ce, dma_addr),
+					dma_unmap_len(ce, dma_len),
+					DMA_FROM_DEVICE);
 		skb_copy_from_linear_data(ce->skb, skb->data, len);
-		pci_dma_sync_single_for_device(pdev,
-					       dma_unmap_addr(ce, dma_addr),
-					       dma_unmap_len(ce, dma_len),
-					       PCI_DMA_FROMDEVICE);
+		dma_sync_single_for_device(&pdev->dev,
+					   dma_unmap_addr(ce, dma_addr),
+					   dma_unmap_len(ce, dma_len),
+					   DMA_FROM_DEVICE);
 		recycle_fl_buf(fl, fl->cidx);
 		return skb;
 	}
@@ -1068,8 +1069,8 @@ use_orig_buf:
 		return NULL;
 	}
 
-	pci_unmap_single(pdev, dma_unmap_addr(ce, dma_addr),
-			 dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&pdev->dev, dma_unmap_addr(ce, dma_addr),
+			 dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 	skb = ce->skb;
 	prefetch(skb->data);
 
@@ -1091,8 +1092,9 @@ static void unexpected_offload(struct adapter *adapter, struct freelQ *fl)
 	struct freelQ_ce *ce = &fl->centries[fl->cidx];
 	struct sk_buff *skb = ce->skb;
 
-	pci_dma_sync_single_for_cpu(adapter->pdev, dma_unmap_addr(ce, dma_addr),
-			    dma_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&adapter->pdev->dev,
+				dma_unmap_addr(ce, dma_addr),
+				dma_unmap_len(ce, dma_len), DMA_FROM_DEVICE);
 	pr_err("%s: unexpected offload packet, cmd %u\n",
 	       adapter->name, *skb->data);
 	recycle_fl_buf(fl, fl->cidx);
@@ -1209,8 +1211,8 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb,
 	e = e1 = &q->entries[pidx];
 	ce = &q->centries[pidx];
 
-	mapping = pci_map_single(adapter->pdev, skb->data,
-				 skb_headlen(skb), PCI_DMA_TODEVICE);
+	mapping = dma_map_single(&adapter->pdev->dev, skb->data,
+				 skb_headlen(skb), DMA_TO_DEVICE);
 
 	desc_mapping = mapping;
 	desc_len = skb_headlen(skb);
@@ -1301,9 +1303,10 @@ static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q)
  * Called from tasklet. Checks the scheduler for any
  * pending skbs that can be sent.
  */
-static void restart_sched(unsigned long arg)
+static void restart_sched(struct tasklet_struct *t)
 {
-	struct sge *sge = (struct sge *) arg;
+	struct sched *s = from_tasklet(s, t, sched_tsk);
+	struct sge *sge = s->sge;
 	struct adapter *adapter = sge->adapter;
 	struct cmdQ *q = &sge->cmdQ[0];
 	struct sk_buff *skb;
@@ -1347,7 +1350,7 @@ static void restart_sched(unsigned long arg)
  *	@fl: the free list that contains the packet buffer
  *	@len: the packet length
  *
- *	Process an ingress ethernet pakcet and deliver it to the stack.
+ *	Process an ingress ethernet packet and deliver it to the stack.
  */
 static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 {
@@ -1611,11 +1614,46 @@ int t1_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+irqreturn_t t1_interrupt_thread(int irq, void *data)
+{
+	struct adapter *adapter = data;
+	u32 pending_thread_intr;
+
+	spin_lock_irq(&adapter->async_lock);
+	pending_thread_intr = adapter->pending_thread_intr;
+	adapter->pending_thread_intr = 0;
+	spin_unlock_irq(&adapter->async_lock);
+
+	if (!pending_thread_intr)
+		return IRQ_NONE;
+
+	if (pending_thread_intr & F_PL_INTR_EXT)
+		t1_elmer0_ext_intr_handler(adapter);
+
+	/* This error is fatal, interrupts remain off */
+	if (pending_thread_intr & F_PL_INTR_SGE_ERR) {
+		pr_alert("%s: encountered fatal error, operation suspended\n",
+			 adapter->name);
+		t1_sge_stop(adapter->sge);
+		return IRQ_HANDLED;
+	}
+
+	spin_lock_irq(&adapter->async_lock);
+	adapter->slow_intr_mask |= F_PL_INTR_EXT;
+
+	writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
+	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
+	       adapter->regs + A_PL_ENABLE);
+	spin_unlock_irq(&adapter->async_lock);
+
+	return IRQ_HANDLED;
+}
+
 irqreturn_t t1_interrupt(int irq, void *data)
 {
 	struct adapter *adapter = data;
 	struct sge *sge = adapter->sge;
-	int handled;
+	irqreturn_t handled;
 
 	if (likely(responses_pending(adapter))) {
 		writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
@@ -1637,10 +1675,10 @@ irqreturn_t t1_interrupt(int irq, void *data)
 	handled = t1_slow_intr_handler(adapter);
 	spin_unlock(&adapter->async_lock);
 
-	if (!handled)
+	if (handled == IRQ_NONE)
 		sge->stats.unhandled_irqs++;
 
-	return IRQ_RETVAL(handled != 0);
+	return handled;
 }
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
index a1ba591b3431..f7e6f64040ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: sge.h                                                               *
@@ -6,16 +7,6 @@
  * Description:                                                              *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -74,6 +65,7 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *);
 int t1_sge_configure(struct sge *, struct sge_params *);
 int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
 void t1_sge_destroy(struct sge *);
+irqreturn_t t1_interrupt_thread(int irq, void *data);
 irqreturn_t t1_interrupt(int irq, void *cookie);
 int t1_poll(struct napi_struct *, int);
 
@@ -81,7 +73,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
 void t1_sge_start(struct sge *);
 void t1_sge_stop(struct sge *);
-int t1_sge_intr_error_handler(struct sge *);
+bool t1_sge_intr_error_handler(struct sge *sge);
 void t1_sge_intr_enable(struct sge *);
 void t1_sge_intr_disable(struct sge *);
 void t1_sge_intr_clear(struct sge *);
diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c
index ea0f8741d7cf..367a9e4581d5 100644
--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
+++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*****************************************************************************
  *                                                                           *
  * File: subr.c                                                              *
@@ -7,16 +8,6 @@
  *  Various subroutines (intr,pio,etc.) used by Chelsio 10G Ethernet driver. *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -170,7 +161,7 @@ void t1_link_changed(adapter_t *adapter, int port_id)
 	t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc);
 }
 
-static int t1_pci_intr_handler(adapter_t *adapter)
+static bool t1_pci_intr_handler(adapter_t *adapter)
 {
 	u32 pcix_cause;
 
@@ -179,9 +170,13 @@ static int t1_pci_intr_handler(adapter_t *adapter)
 	if (pcix_cause) {
 		pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE,
 				       pcix_cause);
-		t1_fatal_err(adapter);    /* PCI errors are fatal */
+		/* PCI errors are fatal */
+		t1_interrupts_disable(adapter);
+		adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
+		pr_alert("%s: PCI error encountered.\n", adapter->name);
+		return true;
 	}
-	return 0;
+	return false;
 }
 
 #ifdef CONFIG_CHELSIO_T1_1G
@@ -210,13 +205,16 @@ static int fpga_phy_intr_handler(adapter_t *adapter)
 /*
  * Slow path interrupt handler for FPGAs.
  */
-static int fpga_slow_intr(adapter_t *adapter)
+static irqreturn_t fpga_slow_intr(adapter_t *adapter)
 {
 	u32 cause = readl(adapter->regs + A_PL_CAUSE);
+	irqreturn_t ret = IRQ_NONE;
 
 	cause &= ~F_PL_INTR_SGE_DATA;
-	if (cause & F_PL_INTR_SGE_ERR)
-		t1_sge_intr_error_handler(adapter->sge);
+	if (cause & F_PL_INTR_SGE_ERR) {
+		if (t1_sge_intr_error_handler(adapter->sge))
+			ret = IRQ_WAKE_THREAD;
+	}
 
 	if (cause & FPGA_PCIX_INTERRUPT_GMAC)
 		fpga_phy_intr_handler(adapter);
@@ -231,14 +229,19 @@ static int fpga_slow_intr(adapter_t *adapter)
 		/* Clear TP interrupt */
 		writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE);
 	}
-	if (cause & FPGA_PCIX_INTERRUPT_PCIX)
-		t1_pci_intr_handler(adapter);
+	if (cause & FPGA_PCIX_INTERRUPT_PCIX) {
+		if (t1_pci_intr_handler(adapter))
+			ret = IRQ_WAKE_THREAD;
+	}
 
 	/* Clear the interrupts just processed. */
 	if (cause)
 		writel(cause, adapter->regs + A_PL_CAUSE);
 
-	return cause != 0;
+	if (ret != IRQ_NONE)
+		return ret;
+
+	return cause == 0 ? IRQ_NONE : IRQ_HANDLED;
 }
 #endif
 
@@ -842,31 +845,45 @@ void t1_interrupts_clear(adapter_t* adapter)
 /*
  * Slow path interrupt handler for ASICs.
  */
-static int asic_slow_intr(adapter_t *adapter)
+static irqreturn_t asic_slow_intr(adapter_t *adapter)
 {
 	u32 cause = readl(adapter->regs + A_PL_CAUSE);
+	irqreturn_t ret = IRQ_HANDLED;
 
 	cause &= adapter->slow_intr_mask;
 	if (!cause)
-		return 0;
-	if (cause & F_PL_INTR_SGE_ERR)
-		t1_sge_intr_error_handler(adapter->sge);
+		return IRQ_NONE;
+	if (cause & F_PL_INTR_SGE_ERR) {
+		if (t1_sge_intr_error_handler(adapter->sge))
+			ret = IRQ_WAKE_THREAD;
+	}
 	if (cause & F_PL_INTR_TP)
 		t1_tp_intr_handler(adapter->tp);
 	if (cause & F_PL_INTR_ESPI)
 		t1_espi_intr_handler(adapter->espi);
-	if (cause & F_PL_INTR_PCIX)
-		t1_pci_intr_handler(adapter);
-	if (cause & F_PL_INTR_EXT)
-		t1_elmer0_ext_intr(adapter);
+	if (cause & F_PL_INTR_PCIX) {
+		if (t1_pci_intr_handler(adapter))
+			ret = IRQ_WAKE_THREAD;
+	}
+	if (cause & F_PL_INTR_EXT) {
+		/* Wake the threaded interrupt to handle external interrupts as
+		 * we require a process context. We disable EXT interrupts in
+		 * the interim and let the thread reenable them when it's done.
+		 */
+		adapter->pending_thread_intr |= F_PL_INTR_EXT;
+		adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
+		writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
+		       adapter->regs + A_PL_ENABLE);
+		ret = IRQ_WAKE_THREAD;
+	}
 
 	/* Clear the interrupts just processed. */
 	writel(cause, adapter->regs + A_PL_CAUSE);
 	readl(adapter->regs + A_PL_CAUSE); /* flush writes */
-	return 1;
+	return ret;
 }
 
-int t1_slow_intr_handler(adapter_t *adapter)
+irqreturn_t t1_slow_intr_handler(adapter_t *adapter)
 {
 #ifdef CONFIG_CHELSIO_T1_1G
 	if (!t1_is_asic(adapter))
@@ -1114,7 +1131,7 @@ int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi)
 			       adapter->port[i].dev->name);
 			goto error;
 		}
-		memcpy(adapter->port[i].dev->dev_addr, hw_addr, ETH_ALEN);
+		eth_hw_addr_set(adapter->port[i].dev, hw_addr);
 		init_link_config(&adapter->port[i].link_config, bi);
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/suni1x10gexp_regs.h b/drivers/net/ethernet/chelsio/cxgb/suni1x10gexp_regs.h
index 7f79cc7ceb75..4c883170683b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/suni1x10gexp_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb/suni1x10gexp_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*****************************************************************************
  *                                                                           *
  * File: suni1x10gexp_regs.h                                                 *
@@ -7,16 +8,6 @@
  *  PMC/SIERRA (pm3393) MAC-PHY functionality.                               *
  *  part of the Chelsio 10Gb Ethernet Driver.                                *
  *                                                                           *
- * This program is free software; you can redistribute it and/or modify      *
- * it under the terms of the GNU General Public License, version 2, as       *
- * published by the Free Software Foundation.                                *
- *                                                                           *
- * You should have received a copy of the GNU General Public License along   *
- * with this program; if not, see <http://www.gnu.org/licenses/>.            *
- *                                                                           *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED    *
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF      *
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.                     *
  *                                                                           *
  * http://www.chelsio.com                                                    *
  *                                                                           *
@@ -1639,4 +1630,3 @@
 #define SUNI1x10GEXP_BITMSK_PL4IDU_DIP4I       0x0002
 
 #endif /* _CXGB_SUNI1x10GEXP_REGS_H_ */
-
diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
index 873c1c7b4ca0..2ad3efb550c2 100644
--- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
+++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
@@ -379,7 +379,7 @@ static int mac_intr_clear(struct cmac *mac)
 }
 
 /* Expect MAC address to be in network byte order. */
-static int mac_set_address(struct cmac* mac, u8 addr[6])
+static int mac_set_address(struct cmac* mac, const u8 addr[6])
 {
 	u32 val;
 	int port = mac->instance->index;
@@ -591,7 +591,7 @@ static void port_stats_update(struct cmac *mac)
 	} hw_stats[] = {
 
 #define HW_STAT(reg, stat_name) \
-	{ reg, (&((struct cmac_statistics *)NULL)->stat_name) - (u64 *)NULL }
+	{ reg, offsetof(struct cmac_statistics, stat_name) / sizeof(u64) }
 
 		/* Rx stats */
 		HW_STAT(RxUnicast, RxUnicastFramesOK),
diff --git a/drivers/net/ethernet/chelsio/cxgb3/adapter.h b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
index 087ff0ffb597..6d682b7c7aac 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
@@ -178,7 +178,7 @@ struct sge_txq {		/* state for an SGE Tx queue */
 	unsigned int token;	/* WR token */
 	dma_addr_t phys_addr;	/* physical address of the ring */
 	struct sk_buff_head sendq;	/* List of backpressured offload packets */
-	struct tasklet_struct qresume_tsk;	/* restarts the queue */
+	struct work_struct qresume_task;	/* restarts the queue */
 	unsigned int cntxt_id;	/* SGE context id for the Tx q */
 	unsigned long stops;	/* # of times q has been stopped */
 	unsigned long restarts;	/* # of queue restarts */
@@ -313,6 +313,7 @@ void t3_os_link_fault(struct adapter *adapter, int port_id, int state);
 void t3_os_link_fault_handler(struct adapter *adapter, int port_id);
 
 void t3_sge_start(struct adapter *adap);
+void t3_sge_stop_dma(struct adapter *adap);
 void t3_sge_stop(struct adapter *adap);
 void t3_start_sge_timers(struct adapter *adap);
 void t3_stop_sge_timers(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/ael1002.c b/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
index dadf11e3dddb..9d591f0ddfc5 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/ael1002.c
@@ -815,17 +815,12 @@ static const struct cphy_ops ael2020_ops = {
 int t3_ael2020_phy_prep(struct cphy *phy, struct adapter *adapter, int phy_addr,
 			const struct mdio_ops *mdio_ops)
 {
-	int err;
-
 	cphy_init(phy, adapter, phy_addr, &ael2020_ops, mdio_ops,
 		  SUPPORTED_10000baseT_Full | SUPPORTED_AUI | SUPPORTED_FIBRE |
 		  SUPPORTED_IRQ, "10GBASE-R");
 	msleep(125);
 
-	err = set_phy_regs(phy, ael2020_reset_regs);
-	if (err)
-		return err;
-	return 0;
+	return set_phy_regs(phy, ael2020_reset_regs);
 }
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h
index 1bd7d89666c4..ecd025dda8d6 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/common.h
@@ -676,8 +676,6 @@ void t3_link_changed(struct adapter *adapter, int port_id);
 void t3_link_fault(struct adapter *adapter, int port_id);
 int t3_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc);
 const struct adapter_info *t3_get_adapter_info(unsigned int board_id);
-int t3_seeprom_read(struct adapter *adapter, u32 addr, __le32 *data);
-int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data);
 int t3_seeprom_wp(struct adapter *adapter, int enable);
 int t3_get_tp_version(struct adapter *adapter, u32 *vers);
 int t3_check_tpsram_version(struct adapter *adapter);
@@ -710,7 +708,7 @@ int t3_mac_enable(struct cmac *mac, int which);
 int t3_mac_disable(struct cmac *mac, int which);
 int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu);
 int t3_mac_set_rx_mode(struct cmac *mac, struct net_device *dev);
-int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6]);
+int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6]);
 int t3_mac_set_num_ucast(struct cmac *mac, int n);
 const struct mac_stats *t3_mac_update_stats(struct cmac *mac);
 int t3_mac_set_speed_duplex_fc(struct cmac *mac, int speed, int duplex, int fc);
@@ -770,4 +768,6 @@ int t3_xaui_direct_phy_prep(struct cphy *phy, struct adapter *adapter,
 			    int phy_addr, const struct mdio_ops *mdio_ops);
 int t3_aq100x_phy_prep(struct cphy *phy, struct adapter *adapter,
 			    int phy_addr, const struct mdio_ops *mdio_ops);
+
+extern struct workqueue_struct *cxgb3_wq;
 #endif				/* __CHELSIO_COMMON_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_ioctl.h b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_ioctl.h
index b19e4376ba76..401827b82aa1 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_ioctl.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_ioctl.h
@@ -79,7 +79,7 @@ struct ch_mem_range {
 	uint32_t addr;
 	uint32_t len;
 	uint32_t version;
-	uint8_t buf[0];
+	uint8_t buf[];
 };
 
 struct ch_qset_params {
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 883cfa9c4b6d..9b84c8d8d309 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -105,7 +105,6 @@ static const struct pci_device_id cxgb3_pci_tbl[] = {
 MODULE_DESCRIPTION(DRV_DESC);
 MODULE_AUTHOR("Chelsio Communications");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, cxgb3_pci_tbl);
 
 static int dflt_msg_enable = DFLT_MSG_ENABLE;
@@ -149,7 +148,7 @@ struct workqueue_struct *cxgb3_wq;
 
 /**
  *	link_report - show link status and link speed/duplex
- *	@p: the port whose settings are to be reported
+ *	@dev: the port whose settings are to be reported
  *
  *	Shows the link status, speed, and duplex of a port.
  */
@@ -305,8 +304,8 @@ void t3_os_link_changed(struct adapter *adapter, int port_id, int link_stat,
 
 /**
  *	t3_os_phymod_changed - handle PHY module changes
- *	@phy: the PHY reporting the module change
- *	@mod_type: new module type
+ *	@adap: the adapter associated with the link change
+ *	@port_id: the port index whose limk status has changed
  *
  *	This is the OS-dependent handler for PHY module changes.  It is
  *	invoked when a PHY module is removed or inserted for any OS-specific
@@ -610,8 +609,7 @@ static void init_napi(struct adapter *adap)
 		struct sge_qset *qs = &adap->sge.qs[i];
 
 		if (qs->adap)
-			netif_napi_add(qs->netdev, &qs->napi, qs->napi.poll,
-				       64);
+			netif_napi_add(qs->netdev, &qs->napi, qs->napi.poll);
 	}
 
 	/*
@@ -1201,7 +1199,7 @@ static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features)
 
 /**
  *	cxgb_up - enable the adapter
- *	@adapter: adapter being enabled
+ *	@adap: adapter being enabled
  *
  *	Called when the first port is enabled, this function performs the
  *	actions necessary to make an adapter operational, such as completing
@@ -1274,14 +1272,14 @@ static int cxgb_up(struct adapter *adap)
 			free_irq(adap->msix_info[0].vec, adap);
 			goto irq_err;
 		}
-	} else if ((err = request_irq(adap->pdev->irq,
-				      t3_intr_handler(adap,
-						      adap->sge.qs[0].rspq.
-						      polling),
-				      (adap->flags & USING_MSI) ?
-				       0 : IRQF_SHARED,
-				      adap->name, adap)))
-		goto irq_err;
+	} else {
+		err = request_irq(adap->pdev->irq,
+				  t3_intr_handler(adap, adap->sge.qs[0].rspq.polling),
+				  (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
+				  adap->name, adap);
+		if (err)
+			goto irq_err;
+	}
 
 	enable_all_napi(adap);
 	t3_sge_start(adap);
@@ -1303,6 +1301,7 @@ static int cxgb_up(struct adapter *adap)
 		if (ret < 0) {
 			CH_ERR(adap, "failed to bind qsets, err %d\n", ret);
 			t3_intr_disable(adap);
+			quiesce_rx(adap);
 			free_irq_resources(adap);
 			err = ret;
 			goto out;
@@ -1628,9 +1627,8 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	t3_get_tp_version(adapter, &tp_vers);
 	spin_unlock(&adapter->stats_lock);
 
-	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
-	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(adapter->pdev),
+	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(adapter->pdev),
 		sizeof(info->bus_info));
 	if (fw_vers)
 		snprintf(info->fw_version, sizeof(info->fw_version),
@@ -1950,7 +1948,9 @@ static int set_pauseparam(struct net_device *dev,
 	return 0;
 }
 
-static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			  struct kernel_ethtool_ringparam *kernel_e,
+			  struct netlink_ext_ack *extack)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -1966,7 +1966,9 @@ static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 	e->tx_pending = q->txq_size[0];
 }
 
-static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			 struct kernel_ethtool_ringparam *kernel_e,
+			 struct netlink_ext_ack *extack)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -1998,7 +2000,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 	return 0;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2019,7 +2023,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 	return 0;
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2034,20 +2040,16 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
-	int i, err = 0;
-
-	u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
+	int cnt;
 
 	e->magic = EEPROM_MAGIC;
-	for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
-		err = t3_seeprom_read(adapter, i, (__le32 *) & buf[i]);
+	cnt = pci_read_vpd(adapter->pdev, e->offset, e->len, data);
+	if (cnt < 0)
+		return cnt;
 
-	if (!err)
-		memcpy(data, buf + e->offset, e->len);
-	kfree(buf);
-	return err;
+	e->len = cnt;
+
+	return 0;
 }
 
 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
@@ -2056,7 +2058,6 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
 	u32 aligned_offset, aligned_len;
-	__le32 *p;
 	u8 *buf;
 	int err;
 
@@ -2070,12 +2071,9 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 		buf = kmalloc(aligned_len, GFP_KERNEL);
 		if (!buf)
 			return -ENOMEM;
-		err = t3_seeprom_read(adapter, aligned_offset, (__le32 *) buf);
-		if (!err && aligned_len > 4)
-			err = t3_seeprom_read(adapter,
-					      aligned_offset + aligned_len - 4,
-					      (__le32 *) & buf[aligned_len - 4]);
-		if (err)
+		err = pci_read_vpd(adapter->pdev, aligned_offset, aligned_len,
+				   buf);
+		if (err < 0)
 			goto out;
 		memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
 	} else
@@ -2085,17 +2083,13 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 	if (err)
 		goto out;
 
-	for (p = (__le32 *) buf; !err && aligned_len; aligned_len -= 4, p++) {
-		err = t3_seeprom_write(adapter, aligned_offset, *p);
-		aligned_offset += 4;
-	}
-
-	if (!err)
+	err = pci_write_vpd(adapter->pdev, aligned_offset, aligned_len, buf);
+	if (err >= 0)
 		err = t3_seeprom_wp(adapter, 1);
 out:
 	if (buf != data)
 		kfree(buf);
-	return err;
+	return err < 0 ? err : 0;
 }
 
 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -2106,6 +2100,7 @@ static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 }
 
 static const struct ethtool_ops cxgb_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS,
 	.get_drvinfo = get_drvinfo,
 	.get_msglevel = get_msglevel,
 	.set_msglevel = set_msglevel,
@@ -2136,13 +2131,18 @@ static int in_range(int val, int lo, int hi)
 	return val < 0 || (val <= hi && val >= lo);
 }
 
-static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
+static int cxgb_siocdevprivate(struct net_device *dev,
+			       struct ifreq *ifreq,
+			       void __user *useraddr,
+			       int cmd)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
-	u32 cmd;
 	int ret;
 
+	if (cmd != SIOCCHIOCTL)
+		return -EOPNOTSUPP;
+
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
@@ -2544,11 +2544,9 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 		    !(data->phy_id & 0xe0e0))
 			data->phy_id = mdio_phy_id_c45(data->phy_id >> 8,
 						       data->phy_id & 0x1f);
-		/* FALLTHRU */
+		fallthrough;
 	case SIOCGMIIPHY:
 		return mdio_mii_ioctl(&pi->phy.mdio, data, cmd);
-	case SIOCCHIOCTL:
-		return cxgb_extension_ioctl(dev, req->ifr_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2580,7 +2578,7 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	eth_hw_addr_set(dev, addr->sa_data);
 	t3_mac_set_address(&pi->mac, LAN_MAC_IDX, dev->dev_addr);
 	if (offload_running(adapter))
 		write_smt_entry(adapter, pi->port_id);
@@ -2997,7 +2995,7 @@ void t3_fatal_err(struct adapter *adapter)
 	unsigned int fw_status[4];
 
 	if (adapter->flags & FULL_INIT_DONE) {
-		t3_sge_stop(adapter);
+		t3_sge_stop_dma(adapter);
 		t3_write_reg(adapter, A_XGM_TX_CTRL, 0);
 		t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
 		t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
@@ -3099,8 +3097,9 @@ static void set_nqsets(struct adapter *adap)
 			nqsets = num_cpus;
 		if (nqsets < 1 || hwports == 4)
 			nqsets = 1;
-	} else
+	} else {
 		nqsets = 1;
+	}
 
 	for_each_port(adap, i) {
 		struct port_info *pi = adap2pinfo(adap, i);
@@ -3181,7 +3180,8 @@ static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_get_stats		= cxgb_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= cxgb_set_rxmode,
-	.ndo_do_ioctl		= cxgb_ioctl,
+	.ndo_eth_ioctl		= cxgb_ioctl,
+	.ndo_siocdevprivate	= cxgb_siocdevprivate,
 	.ndo_change_mtu		= cxgb_change_mtu,
 	.ndo_set_mac_address	= cxgb_set_mac_addr,
 	.ndo_fix_features	= cxgb_fix_features,
@@ -3204,14 +3204,12 @@ static void cxgb3_init_iscsi_mac(struct net_device *dev)
 			NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	int i, err, pci_using_dac = 0;
+	int i, err;
 	resource_size_t mmio_start, mmio_len;
 	const struct adapter_info *ai;
 	struct adapter *adapter = NULL;
 	struct port_info *pi;
 
-	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
-
 	if (!cxgb3_wq) {
 		cxgb3_wq = create_singlethread_workqueue(DRV_NAME);
 		if (!cxgb3_wq) {
@@ -3233,15 +3231,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_disable_device;
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		pci_using_dac = 1;
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (err) {
-			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
-			       "coherent allocations\n");
-			goto out_release_regions;
-		}
-	} else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
 		dev_err(&pdev->dev, "no usable DMA configuration\n");
 		goto out_release_regions;
 	}
@@ -3317,8 +3308,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->features |= netdev->hw_features |
 				    NETIF_F_HW_VLAN_CTAG_TX;
 		netdev->vlan_features |= netdev->features & VLAN_FEAT;
-		if (pci_using_dac)
-			netdev->features |= NETIF_F_HIGHDMA;
+
+		netdev->features |= NETIF_F_HIGHDMA;
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
 		netdev->ethtool_ops = &cxgb_ethtool_ops;
@@ -3358,6 +3349,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	if (!adapter->registered_device_map) {
 		dev_err(&pdev->dev, "could not register any net devices\n");
+		err = -ENODEV;
 		goto out_free_dev;
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index 84604aff53ce..89256b866840 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -243,7 +243,7 @@ static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req,
 
 		/*
 		 * on rx, the iscsi pdu has to be < rx page size and the
-		 * the max rx data length programmed in TP
+		 * max rx data length programmed in TP
 		 */
 		val = min(adapter->params.tp.rx_pg_size,
 			  ((t3_read_reg(adapter, A_TP_PARA_REG2)) >>
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index b3e4118a15e7..9749d1239f58 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
@@ -136,7 +136,7 @@ again:
 		if (e->state == L2T_STATE_STALE)
 			e->state = L2T_STATE_VALID;
 		spin_unlock_bh(&e->lock);
-		/* fall through */
+		fallthrough;
 	case L2T_STATE_VALID:	/* fast-path, send the packet on */
 		return cxgb3_ofld_send(dev, skb);
 	case L2T_STATE_RESOLVING:
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 6dabbf1502c7..62dfbdd33365 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -126,8 +126,10 @@ struct rsp_desc {		/* response queue descriptor */
 	struct rss_header rss_hdr;
 	__be32 flags;
 	__be32 len_cq;
-	u8 imm_data[47];
-	u8 intr_gen;
+	struct_group(immediate,
+		u8 imm_data[47];
+		u8 intr_gen;
+	);
 };
 
 /*
@@ -244,8 +246,8 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 	frag_idx = d->fragidx;
 
 	if (frag_idx == 0 && skb_headlen(skb)) {
-		pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
-				 skb_headlen(skb), PCI_DMA_TODEVICE);
+		dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),
+				 skb_headlen(skb), DMA_TO_DEVICE);
 		j = 1;
 	}
 
@@ -253,9 +255,9 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 	nfrags = skb_shinfo(skb)->nr_frags;
 
 	while (frag_idx < nfrags && curflit < WR_FLITS) {
-		pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
+		dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
 			       skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
-			       PCI_DMA_TODEVICE);
+			       DMA_TO_DEVICE);
 		j ^= 1;
 		if (j == 0) {
 			sgp++;
@@ -355,15 +357,14 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 	if (q->use_pages && d->pg_chunk.page) {
 		(*d->pg_chunk.p_cnt)--;
 		if (!*d->pg_chunk.p_cnt)
-			pci_unmap_page(pdev,
-				       d->pg_chunk.mapping,
-				       q->alloc_size, PCI_DMA_FROMDEVICE);
+			dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
+				       q->alloc_size, DMA_FROM_DEVICE);
 
 		put_page(d->pg_chunk.page);
 		d->pg_chunk.page = NULL;
 	} else {
-		pci_unmap_single(pdev, dma_unmap_addr(d, dma_addr),
-				 q->buf_size, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
+				 q->buf_size, DMA_FROM_DEVICE);
 		kfree_skb(d->skb);
 		d->skb = NULL;
 	}
@@ -372,7 +373,7 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 /**
  *	free_rx_bufs - free the Rx buffers on an SGE free list
  *	@pdev: the PCI device associated with the adapter
- *	@rxq: the SGE free list to clean up
+ *	@q: the SGE free list to clean up
  *
  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
  *	this queue should be stopped before calling this function.
@@ -414,8 +415,8 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
 {
 	dma_addr_t mapping;
 
-	mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
-	if (unlikely(pci_dma_mapping_error(pdev, mapping)))
+	mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
 		return -ENOMEM;
 
 	dma_unmap_addr_set(sd, dma_addr, mapping);
@@ -453,9 +454,9 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 		q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 				    SGE_PG_RSVD;
 		q->pg_chunk.offset = 0;
-		mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
-				       0, q->alloc_size, PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) {
+		mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
+				       0, q->alloc_size, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
 			__free_pages(q->pg_chunk.page, order);
 			q->pg_chunk.page = NULL;
 			return -EIO;
@@ -493,7 +494,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 
 /**
  *	refill_fl - refill an SGE free-buffer list
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@q: the free-list to refill
  *	@n: the number of new buffers to allocate
  *	@gfp: the gfp flags for allocating new buffers
@@ -522,9 +523,9 @@ nomem:				q->alloc_failed++;
 			dma_unmap_addr_set(sd, dma_addr, mapping);
 
 			add_one_rx_chunk(mapping, d, q->gen);
-			pci_dma_sync_single_for_device(adap->pdev, mapping,
-						q->buf_size - SGE_PG_RSVD,
-						PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&adap->pdev->dev, mapping,
+						   q->buf_size - SGE_PG_RSVD,
+						   DMA_FROM_DEVICE);
 		} else {
 			void *buf_start;
 
@@ -568,7 +569,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 
 /**
  *	recycle_rx_buf - recycle a receive buffer
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@q: the SGE free list
  *	@idx: index of buffer to recycle
  *
@@ -665,7 +666,7 @@ static void t3_reset_qset(struct sge_qset *q)
 
 
 /**
- *	free_qset - free the resources of an SGE queue set
+ *	t3_free_qset - free the resources of an SGE queue set
  *	@adapter: the adapter owning the queue set
  *	@q: the queue set
  *
@@ -793,13 +794,13 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 		skb = alloc_skb(len, GFP_ATOMIC);
 		if (likely(skb != NULL)) {
 			__skb_put(skb, len);
-			pci_dma_sync_single_for_cpu(adap->pdev,
-					    dma_unmap_addr(sd, dma_addr), len,
-					    PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&adap->pdev->dev,
+						dma_unmap_addr(sd, dma_addr),
+						len, DMA_FROM_DEVICE);
 			memcpy(skb->data, sd->skb->data, len);
-			pci_dma_sync_single_for_device(adap->pdev,
-					    dma_unmap_addr(sd, dma_addr), len,
-					    PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&adap->pdev->dev,
+						   dma_unmap_addr(sd, dma_addr),
+						   len, DMA_FROM_DEVICE);
 		} else if (!drop_thres)
 			goto use_orig_buf;
 recycle:
@@ -813,8 +814,8 @@ recycle:
 		goto recycle;
 
 use_orig_buf:
-	pci_unmap_single(adap->pdev, dma_unmap_addr(sd, dma_addr),
-			 fl->buf_size, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
+			 fl->buf_size, DMA_FROM_DEVICE);
 	skb = sd->skb;
 	skb_put(skb, len);
 	__refill_fl(adap, fl);
@@ -825,6 +826,7 @@ use_orig_buf:
  *	get_packet_pg - return the next ingress packet buffer from a free list
  *	@adap: the adapter that received the packet
  *	@fl: the SGE free list holding the packet
+ *	@q: the queue
  *	@len: the packet length including any SGE padding
  *	@drop_thres: # of remaining buffers before we start dropping packets
  *
@@ -853,12 +855,11 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 		newskb = alloc_skb(len, GFP_ATOMIC);
 		if (likely(newskb != NULL)) {
 			__skb_put(newskb, len);
-			pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
-					    PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,
+						len, DMA_FROM_DEVICE);
 			memcpy(newskb->data, sd->pg_chunk.va, len);
-			pci_dma_sync_single_for_device(adap->pdev, dma_addr,
-						       len,
-						       PCI_DMA_FROMDEVICE);
+			dma_sync_single_for_device(&adap->pdev->dev, dma_addr,
+						   len, DMA_FROM_DEVICE);
 		} else if (!drop_thres)
 			return NULL;
 recycle:
@@ -882,14 +883,12 @@ recycle:
 		goto recycle;
 	}
 
-	pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,
+				DMA_FROM_DEVICE);
 	(*sd->pg_chunk.p_cnt)--;
 	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
-		pci_unmap_page(adap->pdev,
-			       sd->pg_chunk.mapping,
-			       fl->alloc_size,
-			       PCI_DMA_FROMDEVICE);
+		dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+			       fl->alloc_size, DMA_FROM_DEVICE);
 	if (!skb) {
 		__skb_put(newskb, SGE_RX_PULL_LEN);
 		memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
@@ -928,7 +927,8 @@ static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 
 	if (skb) {
 		__skb_put(skb, IMMED_PKT_SIZE);
-		skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
+		BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate));
+		skb_copy_to_linear_data(skb, &resp->immediate, IMMED_PKT_SIZE);
 	}
 	return skb;
 }
@@ -967,9 +967,9 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
 	const struct skb_shared_info *si;
 
 	if (skb_headlen(skb)) {
-		*addr = pci_map_single(pdev, skb->data, skb_headlen(skb),
-				       PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(pdev, *addr))
+		*addr = dma_map_single(&pdev->dev, skb->data,
+				       skb_headlen(skb), DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev, *addr))
 			goto out_err;
 		addr++;
 	}
@@ -980,7 +980,7 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
 	for (fp = si->frags; fp < end; fp++) {
 		*addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
 					 DMA_TO_DEVICE);
-		if (pci_dma_mapping_error(pdev, *addr))
+		if (dma_mapping_error(&pdev->dev, *addr))
 			goto unwind;
 		addr++;
 	}
@@ -991,7 +991,8 @@ unwind:
 		dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
 			       DMA_TO_DEVICE);
 
-	pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE);
+	dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),
+			 DMA_TO_DEVICE);
 out_err:
 	return -ENOMEM;
 }
@@ -1173,6 +1174,7 @@ static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
  *	@q: the Tx queue
  *	@ndesc: number of descriptors the packet will occupy
  *	@compl: the value of the COMPL bit to use
+ *	@addr: address
  *
  *	Generate a TX_PKT work request to send the supplied packet.
  */
@@ -1254,7 +1256,7 @@ static inline void t3_stop_tx_queue(struct netdev_queue *txq,
 }
 
 /**
- *	eth_xmit - add a packet to the Ethernet Tx queue
+ *	t3_eth_xmit - add a packet to the Ethernet Tx queue
  *	@skb: the packet
  *	@dev: the egress net device
  *
@@ -1516,14 +1518,15 @@ static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
 
 /**
  *	restart_ctrlq - restart a suspended control queue
- *	@qs: the queue set cotaining the control queue
+ *	@w: pointer to the work associated with this handler
  *
  *	Resumes transmission on a suspended Tx control queue.
  */
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct work_struct *w)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = container_of(w, struct sge_qset,
+					   txq[TXQ_CTRL].qresume_task);
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
 
 	spin_lock(&q->lock);
@@ -1589,13 +1592,14 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
 	p = dui->addr;
 
 	if (skb_tail_pointer(skb) - skb_transport_header(skb))
-		pci_unmap_single(dui->pdev, *p++, skb_tail_pointer(skb) -
-				 skb_transport_header(skb), PCI_DMA_TODEVICE);
+		dma_unmap_single(&dui->pdev->dev, *p++,
+				 skb_tail_pointer(skb) - skb_transport_header(skb),
+				 DMA_TO_DEVICE);
 
 	si = skb_shinfo(skb);
 	for (i = 0; i < si->nr_frags; i++)
-		pci_unmap_page(dui->pdev, *p++, skb_frag_size(&si->frags[i]),
-			       PCI_DMA_TODEVICE);
+		dma_unmap_page(&dui->pdev->dev, *p++,
+			       skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
 }
 
 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
@@ -1622,6 +1626,7 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
  *	@pidx: index of the first Tx descriptor to write
  *	@gen: the generation value to use
  *	@ndesc: number of descriptors the packet will occupy
+ *	@addr: the address
  *
  *	Write an offload work request to send the supplied packet.  The packet
  *	data already carry the work request with most fields populated.
@@ -1733,14 +1738,15 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 
 /**
  *	restart_offloadq - restart a suspended offload queue
- *	@qs: the queue set cotaining the offload queue
+ *	@w: pointer to the work associated with this handler
  *
  *	Resumes transmission on a suspended Tx offload queue.
  */
-static void restart_offloadq(unsigned long data)
+static void restart_offloadq(struct work_struct *w)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = container_of(w, struct sge_qset,
+					   txq[TXQ_OFLD].qresume_task);
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	const struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
@@ -1883,7 +1889,7 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev,
 
 /**
  *	ofld_poll - NAPI handler for offload packets in interrupt mode
- *	@dev: the network device doing the polling
+ *	@napi: the network device doing the polling
  *	@budget: polling budget
  *
  *	The NAPI handler for offload packets when a response queue is serviced
@@ -1950,7 +1956,7 @@ static int ofld_poll(struct napi_struct *napi, int budget)
  *	@rx_gather: a gather list of packets if we are building a bundle
  *	@gather_idx: index of the next available slot in the bundle
  *
- *	Process an ingress offload pakcet and add it to the offload ingress
+ *	Process an ingress offload packet and add it to the offload ingress
  *	queue. 	Returns the index of the next available slot in the bundle.
  */
 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
@@ -1995,19 +2001,23 @@ static void restart_tx(struct sge_qset *qs)
 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 		qs->txq[TXQ_OFLD].restarts++;
-		tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
+
+		/* The work can be quite lengthy so we use driver's own queue */
+		queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task);
 	}
 	if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 		qs->txq[TXQ_CTRL].restarts++;
-		tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
+
+		/* The work can be quite lengthy so we use driver's own queue */
+		queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 
 /**
  *	cxgb3_arp_process - process an ARP request probing a private IP address
- *	@adapter: the adapter
+ *	@pi: the port info
  *	@skb: the skbuff containing the ARP request
  *
  *	Check if the ARP request is probing the private IP address
@@ -2069,9 +2079,10 @@ static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
  *	@adap: the adapter
  *	@rq: the response queue that received the packet
  *	@skb: the packet
- *	@pad: amount of padding at the start of the buffer
+ *	@pad: padding
+ *	@lro: large receive offload
  *
- *	Process an ingress ethernet pakcet and deliver it to the stack.
+ *	Process an ingress ethernet packet and deliver it to the stack.
  *	The padding is 2 if the packet was delivered in an Rx buffer and 0
  *	if it was immediate data in a response.
  */
@@ -2143,17 +2154,14 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 
 	fl->credits--;
 
-	pci_dma_sync_single_for_cpu(adap->pdev,
-				    dma_unmap_addr(sd, dma_addr),
-				    fl->buf_size - SGE_PG_RSVD,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&adap->pdev->dev,
+				dma_unmap_addr(sd, dma_addr),
+				fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);
 
 	(*sd->pg_chunk.p_cnt)--;
 	if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
-		pci_unmap_page(adap->pdev,
-			       sd->pg_chunk.mapping,
-			       fl->alloc_size,
-			       PCI_DMA_FROMDEVICE);
+		dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+			       fl->alloc_size, DMA_FROM_DEVICE);
 
 	if (!skb) {
 		put_page(sd->pg_chunk.page);
@@ -2239,7 +2247,7 @@ static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
 
 /**
  *	check_ring_db - check if we need to ring any doorbells
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@qs: the queue set whose Tx queues are to be examined
  *	@sleeping: indicates which Tx queue sent GTS
  *
@@ -2372,10 +2380,7 @@ no_mem:
 			if (fl->use_pages) {
 				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
 
-				prefetch(addr);
-#if L1_CACHE_BYTES < 128
-				prefetch(addr + L1_CACHE_BYTES);
-#endif
+				net_prefetch(addr);
 				__refill_fl(adap, fl);
 				if (lro > 0) {
 					lro_add_page(adap, qs, fl,
@@ -2902,7 +2907,7 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
 
 /**
  *	sge_timer_tx - perform periodic maintenance of an SGE qset
- *	@data: the SGE queue set to maintain
+ *	@t: a timer list containing the SGE queue set to maintain
  *
  *	Runs periodically from a timer to perform maintenance of an SGE queue
  *	set.  It performs two tasks:
@@ -2946,7 +2951,7 @@ static void sge_timer_tx(struct timer_list *t)
 
 /**
  *	sge_timer_rx - perform periodic maintenance of an SGE qset
- *	@data: the SGE queue set to maintain
+ *	@t: the timer list containing the SGE queue set to maintain
  *
  *	a) Replenishes Rx queues that have run out due to memory shortage.
  *	Normally new Rx buffers are added when existing ones are consumed but
@@ -3024,7 +3029,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
  *	@p: configuration parameters for this queue set
  *	@ntxq: number of Tx queues for the queue set
- *	@netdev: net device associated with this queue set
+ *	@dev: net device associated with this queue set
  *	@netdevq: net device TX queue associated with this queue set
  *
  *	Allocate resources and initialize an SGE queue set.  A queue set
@@ -3084,10 +3089,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		skb_queue_head_init(&q->txq[i].sendq);
 	}
 
-	tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
-		     (unsigned long)q);
-	tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
-		     (unsigned long)q);
+	INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);
+	INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);
 
 	q->fl[0].gen = q->fl[1].gen = 1;
 	q->fl[0].size = p->fl_size;
@@ -3176,6 +3179,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 			  GFP_KERNEL | __GFP_COMP);
 	if (!avail) {
 		CH_ALERT(adapter, "free list queue 0 initialization failed\n");
+		ret = -ENOMEM;
 		goto err;
 	}
 	if (avail < q->fl[0].size)
@@ -3271,30 +3275,43 @@ void t3_sge_start(struct adapter *adap)
 }
 
 /**
- *	t3_sge_stop - disable SGE operation
+ *	t3_sge_stop_dma - Disable SGE DMA engine operation
  *	@adap: the adapter
  *
- *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
- *	from error interrupts) or from normal process context.  In the latter
- *	case it also disables any pending queue restart tasklets.  Note that
- *	if it is called in interrupt context it cannot disable the restart
- *	tasklets as it cannot wait, however the tasklets will have no effect
- *	since the doorbells are disabled and the driver will call this again
- *	later from process context, at which time the tasklets will be stopped
- *	if they are still running.
+ *	Can be invoked from interrupt context e.g.  error handler.
+ *
+ *	Note that this function cannot disable the restart of works as
+ *	it cannot wait if called from interrupt context, however the
+ *	works will have no effect since the doorbells are disabled. The
+ *	driver will call tg3_sge_stop() later from process context, at
+ *	which time the works will be stopped if they are still running.
  */
-void t3_sge_stop(struct adapter *adap)
+void t3_sge_stop_dma(struct adapter *adap)
 {
 	t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
-	if (!in_interrupt()) {
-		int i;
+}
 
-		for (i = 0; i < SGE_QSETS; ++i) {
-			struct sge_qset *qs = &adap->sge.qs[i];
+/**
+ *	t3_sge_stop - disable SGE operation completly
+ *	@adap: the adapter
+ *
+ *	Called from process context. Disables the DMA engine and any
+ *	pending queue restart works.
+ */
+void t3_sge_stop(struct adapter *adap)
+{
+	int i;
 
-			tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
-			tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
-		}
+	t3_sge_stop_dma(adap);
+
+	/* workqueues aren't initialized otherwise */
+	if (!(adap->flags & FULL_INIT_DONE))
+		return;
+	for (i = 0; i < SGE_QSETS; ++i) {
+		struct sge_qset *qs = &adap->sge.qs[i];
+
+		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
+		cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 
@@ -3361,7 +3378,7 @@ void t3_sge_prep(struct adapter *adap, struct sge_params *p)
 		q->coalesce_usecs = 5;
 		q->rspq_size = 1024;
 		q->fl_size = 1024;
- 		q->jumbo_size = 512;
+		q->jumbo_size = 512;
 		q->txq_size[TXQ_ETH] = 1024;
 		q->txq_size[TXQ_OFLD] = 1024;
 		q->txq_size[TXQ_CTRL] = 256;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_cpl.h b/drivers/net/ethernet/chelsio/cxgb3/t3_cpl.h
index 852c399a8b0a..68bb5f39f3f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_cpl.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_cpl.h
@@ -1448,7 +1448,7 @@ struct cpl_rdma_terminate {
 #endif
 	__be32 msn;
 	__be32 mo;
-	__u8 data[0];
+	__u8 data[];
 };
 
 /* cpl_rdma_terminate.tid_len fields */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index 0a9f2c596624..a06003bfa04b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -29,6 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include <linux/etherdevice.h>
 #include "common.h"
 #include "regs.h"
 #include "sge_defs.h"
@@ -595,81 +596,10 @@ struct t3_vpd {
 	u32 pad;		/* for multiple-of-4 sizing and alignment */
 };
 
-#define EEPROM_MAX_POLL   40
 #define EEPROM_STAT_ADDR  0x4000
 #define VPD_BASE          0xc00
 
 /**
- *	t3_seeprom_read - read a VPD EEPROM location
- *	@adapter: adapter to read
- *	@addr: EEPROM address
- *	@data: where to store the read data
- *
- *	Read a 32-bit word from a location in VPD EEPROM using the card's PCI
- *	VPD ROM capability.  A zero is written to the flag bit when the
- *	address is written to the control register.  The hardware device will
- *	set the flag to 1 when 4 bytes have been read into the data register.
- */
-int t3_seeprom_read(struct adapter *adapter, u32 addr, __le32 *data)
-{
-	u16 val;
-	int attempts = EEPROM_MAX_POLL;
-	u32 v;
-	unsigned int base = adapter->params.pci.vpd_cap_addr;
-
-	if ((addr >= EEPROMSIZE && addr != EEPROM_STAT_ADDR) || (addr & 3))
-		return -EINVAL;
-
-	pci_write_config_word(adapter->pdev, base + PCI_VPD_ADDR, addr);
-	do {
-		udelay(10);
-		pci_read_config_word(adapter->pdev, base + PCI_VPD_ADDR, &val);
-	} while (!(val & PCI_VPD_ADDR_F) && --attempts);
-
-	if (!(val & PCI_VPD_ADDR_F)) {
-		CH_ERR(adapter, "reading EEPROM address 0x%x failed\n", addr);
-		return -EIO;
-	}
-	pci_read_config_dword(adapter->pdev, base + PCI_VPD_DATA, &v);
-	*data = cpu_to_le32(v);
-	return 0;
-}
-
-/**
- *	t3_seeprom_write - write a VPD EEPROM location
- *	@adapter: adapter to write
- *	@addr: EEPROM address
- *	@data: value to write
- *
- *	Write a 32-bit word to a location in VPD EEPROM using the card's PCI
- *	VPD ROM capability.
- */
-int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data)
-{
-	u16 val;
-	int attempts = EEPROM_MAX_POLL;
-	unsigned int base = adapter->params.pci.vpd_cap_addr;
-
-	if ((addr >= EEPROMSIZE && addr != EEPROM_STAT_ADDR) || (addr & 3))
-		return -EINVAL;
-
-	pci_write_config_dword(adapter->pdev, base + PCI_VPD_DATA,
-			       le32_to_cpu(data));
-	pci_write_config_word(adapter->pdev,base + PCI_VPD_ADDR,
-			      addr | PCI_VPD_ADDR_F);
-	do {
-		msleep(1);
-		pci_read_config_word(adapter->pdev, base + PCI_VPD_ADDR, &val);
-	} while ((val & PCI_VPD_ADDR_F) && --attempts);
-
-	if (val & PCI_VPD_ADDR_F) {
-		CH_ERR(adapter, "write to EEPROM address 0x%x failed\n", addr);
-		return -EIO;
-	}
-	return 0;
-}
-
-/**
  *	t3_seeprom_wp - enable/disable EEPROM write protection
  *	@adapter: the adapter
  *	@enable: 1 to enable write protection, 0 to disable it
@@ -678,7 +608,14 @@ int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data)
  */
 int t3_seeprom_wp(struct adapter *adapter, int enable)
 {
-	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
+	u32 data = enable ? 0xc : 0;
+	int ret;
+
+	/* EEPROM_STAT_ADDR is outside VPD area, use pci_write_vpd_any() */
+	ret = pci_write_vpd_any(adapter->pdev, EEPROM_STAT_ADDR, sizeof(u32),
+				&data);
+
+	return ret < 0 ? ret : 0;
 }
 
 static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val)
@@ -708,24 +645,22 @@ static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val)
  */
 static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
-	int i, addr, ret;
 	struct t3_vpd vpd;
+	u8 base_val = 0;
+	int addr, ret;
 
 	/*
 	 * Card information is normally at VPD_BASE but some early cards had
 	 * it at 0.
 	 */
-	ret = t3_seeprom_read(adapter, VPD_BASE, (__le32 *)&vpd);
-	if (ret)
+	ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val);
+	if (ret < 0)
 		return ret;
-	addr = vpd.id_tag == 0x82 ? VPD_BASE : 0;
+	addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : 0;
 
-	for (i = 0; i < sizeof(vpd); i += 4) {
-		ret = t3_seeprom_read(adapter, addr + i,
-				      (__le32 *)((u8 *)&vpd + i));
-		if (ret)
-			return ret;
-	}
+	ret = pci_read_vpd(adapter->pdev, addr, sizeof(vpd), &vpd);
+	if (ret < 0)
+		return ret;
 
 	ret = vpdstrtouint(vpd.cclk_data, vpd.cclk_len, 10, &p->cclk);
 	if (ret)
@@ -2195,7 +2130,7 @@ static int t3_sge_write_context(struct adapter *adapter, unsigned int id,
 
 /**
  *	clear_sge_ctxt - completely clear an SGE context
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@id: the context id
  *	@type: the context type
  *
@@ -2484,6 +2419,7 @@ int t3_sge_disable_cqcntxt(struct adapter *adapter, unsigned int id)
  *	@adapter: the adapter
  *	@id: the context id
  *	@op: the operation to perform
+ *	@credits: credit value to write
  *
  *	Perform the selected operation on an SGE completion queue context.
  *	The caller is responsible for ensuring only one context operation
@@ -2885,7 +2821,7 @@ static void init_cong_ctrl(unsigned short *a, unsigned short *b)
  *	t3_load_mtus - write the MTU and congestion control HW tables
  *	@adap: the adapter
  *	@mtus: the unrestricted values for the MTU table
- *	@alphs: the values for the congestion control alpha parameter
+ *	@alpha: the values for the congestion control alpha parameter
  *	@beta: the values for the congestion control beta parameter
  *	@mtu_cap: the maximum permitted effective MTU
  *
@@ -2966,7 +2902,7 @@ static void ulp_config(struct adapter *adap, const struct tp_params *p)
 
 /**
  *	t3_set_proto_sram - set the contents of the protocol sram
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@data: the protocol image
  *
  *	Write the contents of the protocol SRAM.
@@ -3483,7 +3419,7 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
 /**
  *	init_link_config - initialize a link's SW state
  *	@lc: structure holding the link state
- *	@ai: information about the current card
+ *	@caps: information about the current card
  *
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/duplex/flow-control/autonegotiation
@@ -3677,6 +3613,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
 	    MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10);
 	adapter->params.pci.vpd_cap_addr =
 	    pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD);
+	if (!adapter->params.pci.vpd_cap_addr)
+		return -ENODEV;
 	ret = get_vpd_params(adapter, &adapter->params.vpd);
 	if (ret < 0)
 		return ret;
@@ -3757,8 +3695,7 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
 		memcpy(hw_addr, adapter->params.vpd.eth_base, 5);
 		hw_addr[5] = adapter->params.vpd.eth_base[5] + i;
 
-		memcpy(adapter->port[i]->dev_addr, hw_addr,
-		       ETH_ALEN);
+		eth_hw_addr_set(adapter->port[i], hw_addr);
 		init_link_config(&p->link_config, p->phy.caps);
 		p->phy.ops->power_down(&p->phy, 1);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/version.h b/drivers/net/ethernet/chelsio/cxgb3/version.h
index 165bfb91487a..b4b2547efc86 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/version.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/version.h
@@ -34,8 +34,6 @@
 #define __CHELSIO_VERSION_H
 #define DRV_DESC "Chelsio T3 Network Driver"
 #define DRV_NAME "cxgb3"
-/* Driver version */
-#define DRV_VERSION "1.1.5-ko"
 
 /* Firmware version */
 #define FW_VERSION_MAJOR 7
diff --git a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
index 3af19a550372..1bdc6cad1e49 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
@@ -240,7 +240,7 @@ static void set_addr_filter(struct cmac *mac, int idx, const u8 * addr)
 }
 
 /* Set one of the station's unicast MAC addresses. */
-int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6])
+int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6])
 {
 	if (idx >= mac->nucast)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index ce28820c57c9..163efab27e9b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -106,8 +106,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 	if (!list_empty(&ctbl->ce_free_head)) {
 		ce = list_first_entry(&ctbl->ce_free_head,
 				      struct clip_entry, list);
-		list_del(&ce->list);
-		INIT_LIST_HEAD(&ce->list);
+		list_del_init(&ce->list);
 		spin_lock_init(&ce->lock);
 		refcount_set(&ce->refcnt, 0);
 		atomic_dec(&ctbl->nfree);
@@ -179,8 +178,7 @@ found:
 	write_lock_bh(&ctbl->lock);
 	spin_lock_bh(&ce->lock);
 	if (refcount_dec_and_test(&ce->refcnt)) {
-		list_del(&ce->list);
-		INIT_LIST_HEAD(&ce->list);
+		list_del_init(&ce->list);
 		list_add_tail(&ce->list, &ctbl->ce_free_head);
 		atomic_inc(&ctbl->nfree);
 		if (v6)
@@ -323,8 +321,7 @@ void t4_cleanup_clip_tbl(struct adapter *adap)
 	struct clip_tbl *ctbl = adap->clipt;
 
 	if (ctbl) {
-		if (ctbl->cl_list)
-			kvfree(ctbl->cl_list);
+		kvfree(ctbl->cl_list);
 		kvfree(ctbl);
 	}
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
index a0e0ae19649f..290c1058069a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
@@ -29,7 +29,7 @@ struct clip_tbl {
 	atomic_t nfree;
 	struct list_head ce_free_head;
 	void *cl_list;
-	struct list_head hash_list[0];
+	struct list_head hash_list[];
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index f5be3ee1bdb4..d5218e74284c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -82,7 +82,7 @@ struct cudbg_ulprx_la {
 struct cudbg_tp_la {
 	u32 size;
 	u32 mode;
-	u8 data[0];
+	u8 data[];
 };
 
 static const char * const cudbg_region[] = {
@@ -134,7 +134,7 @@ struct cudbg_meminfo {
 
 struct cudbg_cim_pif_la {
 	int size;
-	u8 data[0];
+	u8 data[];
 };
 
 struct cudbg_clk_info {
@@ -220,9 +220,6 @@ struct cudbg_mps_tcam {
 	u8 reserved[2];
 };
 
-#define CUDBG_VPD_PF_SIZE 0x800
-#define CUDBG_SCFG_VER_ADDR 0x06
-#define CUDBG_SCFG_VER_LEN 4
 #define CUDBG_VPD_VER_ADDR 0x18c7
 #define CUDBG_VPD_VER_LEN 2
 
@@ -339,178 +336,17 @@ struct cudbg_qdesc_entry {
 	u32 qid;
 	u32 desc_size;
 	u32 num_desc;
-	u8 data[0]; /* Must be last */
+	u8 data[]; /* Must be last */
 };
 
 struct cudbg_qdesc_info {
 	u32 qdesc_entry_size;
 	u32 num_queues;
-	u8 data[0]; /* Must be last */
+	u8 data[]; /* Must be last */
 };
 
 #define IREG_NUM_ELEM 4
 
-static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {
-	{0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */
-	{0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */
-	{0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */
-	{0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */
-	{0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */
-	{0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */
-	{0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */
-	{0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */
-	{0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */
-	{0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */
-	{0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */
-	{0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */
-};
-
-static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = {
-	{0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */
-	{0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */
-	{0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */
-	{0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */
-	{0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */
-	{0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */
-	{0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */
-	{0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */
-	{0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */
-	{0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */
-	{0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */
-};
-
-static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = {
-	{0x7e18, 0x7e1c, 0x0, 12}
-};
-
-static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = {
-	{0x7e18, 0x7e1c, 0x0, 12}
-};
-
-static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = {
-	{0x7e50, 0x7e54, 0x0, 13},
-	{0x7e50, 0x7e54, 0x10, 6},
-	{0x7e50, 0x7e54, 0x18, 21},
-	{0x7e50, 0x7e54, 0x30, 32},
-	{0x7e50, 0x7e54, 0x50, 22},
-	{0x7e50, 0x7e54, 0x68, 12}
-};
-
-static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = {
-	{0x7e50, 0x7e54, 0x0, 13},
-	{0x7e50, 0x7e54, 0x10, 6},
-	{0x7e50, 0x7e54, 0x18, 8},
-	{0x7e50, 0x7e54, 0x20, 13},
-	{0x7e50, 0x7e54, 0x30, 16},
-	{0x7e50, 0x7e54, 0x40, 16},
-	{0x7e50, 0x7e54, 0x50, 16},
-	{0x7e50, 0x7e54, 0x60, 6},
-	{0x7e50, 0x7e54, 0x68, 4}
-};
-
-static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = {
-	{0x10cc, 0x10d0, 0x0, 16},
-	{0x10cc, 0x10d4, 0x0, 16},
-};
-
-static const u32 t6_sge_qbase_index_array[] = {
-	/* 1 addr reg SGE_QBASE_INDEX and 4 data reg SGE_QBASE_MAP[0-3] */
-	0x1250, 0x1240, 0x1244, 0x1248, 0x124c,
-};
-
-static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = {
-	{0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */
-	{0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */
-	{0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */
-};
-
-static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = {
-	{0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */
-	{0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */
-};
-
-static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = {
-	{0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */
-	{0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */
-};
-
-static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = {
-	{0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */
-	{0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */
-};
-
 #define CUDBG_NUM_PCIE_CONFIG_REGS 0x61
 
-static const u32 t5_pcie_config_array[][2] = {
-	{0x0, 0x34},
-	{0x3c, 0x40},
-	{0x50, 0x64},
-	{0x70, 0x80},
-	{0x94, 0xa0},
-	{0xb0, 0xb8},
-	{0xd0, 0xd4},
-	{0x100, 0x128},
-	{0x140, 0x148},
-	{0x150, 0x164},
-	{0x170, 0x178},
-	{0x180, 0x194},
-	{0x1a0, 0x1b8},
-	{0x1c0, 0x208},
-};
-
-static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = {
-	{0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */
-	{0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */
-	{0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */
-};
-
-static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = {
-	{0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */
-	{0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */
-};
-
-static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
-	{0x7b50, 0x7b54, 0x2000, 0x20, 0}, /* up_cim_2000_to_207c */
-	{0x7b50, 0x7b54, 0x2080, 0x1d, 0}, /* up_cim_2080_to_20fc */
-	{0x7b50, 0x7b54, 0x00, 0x20, 0}, /* up_cim_00_to_7c */
-	{0x7b50, 0x7b54, 0x80, 0x20, 0}, /* up_cim_80_to_fc */
-	{0x7b50, 0x7b54, 0x100, 0x11, 0}, /* up_cim_100_to_14c */
-	{0x7b50, 0x7b54, 0x200, 0x10, 0}, /* up_cim_200_to_23c */
-	{0x7b50, 0x7b54, 0x240, 0x2, 0}, /* up_cim_240_to_244 */
-	{0x7b50, 0x7b54, 0x250, 0x2, 0}, /* up_cim_250_to_254 */
-	{0x7b50, 0x7b54, 0x260, 0x2, 0}, /* up_cim_260_to_264 */
-	{0x7b50, 0x7b54, 0x270, 0x2, 0}, /* up_cim_270_to_274 */
-	{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
-	{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
-	{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-	{0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
-	{0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
-	{0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
-	{0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
-	{0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
-	{0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
-	{0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
-	{0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
-	{0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
-};
-
-static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
-	{0x7b50, 0x7b54, 0x2000, 0x20, 0}, /* up_cim_2000_to_207c */
-	{0x7b50, 0x7b54, 0x2080, 0x19, 0}, /* up_cim_2080_to_20ec */
-	{0x7b50, 0x7b54, 0x00, 0x20, 0}, /* up_cim_00_to_7c */
-	{0x7b50, 0x7b54, 0x80, 0x20, 0}, /* up_cim_80_to_fc */
-	{0x7b50, 0x7b54, 0x100, 0x11, 0}, /* up_cim_100_to_14c */
-	{0x7b50, 0x7b54, 0x200, 0x10, 0}, /* up_cim_200_to_23c */
-	{0x7b50, 0x7b54, 0x240, 0x2, 0}, /* up_cim_240_to_244 */
-	{0x7b50, 0x7b54, 0x250, 0x2, 0}, /* up_cim_250_to_254 */
-	{0x7b50, 0x7b54, 0x260, 0x2, 0}, /* up_cim_260_to_264 */
-	{0x7b50, 0x7b54, 0x270, 0x2, 0}, /* up_cim_270_to_274 */
-	{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
-	{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
-	{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
-};
-
-static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
-	{0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */
-};
 #endif /* __CUDBG_ENTITY_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
index fc3813050f0d..c84719e3ca08 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
@@ -70,7 +70,8 @@ enum cudbg_dbg_entity_type {
 	CUDBG_HMA_INDIRECT = 67,
 	CUDBG_HMA = 68,
 	CUDBG_QDESC = 70,
-	CUDBG_MAX_ENTITY = 71,
+	CUDBG_FLASH = 71,
+	CUDBG_MAX_ENTITY = 72,
 };
 
 struct cudbg_init {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 19c11568113a..557c591a6ce3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -14,6 +14,413 @@
 #include "cudbg_entity.h"
 #include "cudbg_lib.h"
 #include "cudbg_zlib.h"
+#include "cxgb4_tc_mqprio.h"
+
+static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */
+	{0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */
+	{0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */
+	{0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */
+	{0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */
+	{0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */
+	{0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */
+	{0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */
+	{0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */
+	{0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */
+	{0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */
+	{0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */
+};
+
+static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */
+	{0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */
+	{0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */
+	{0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */
+	{0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */
+	{0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */
+	{0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */
+	{0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */
+	{0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */
+	{0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */
+	{0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */
+};
+
+static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = {
+	{0x7e50, 0x7e54, 0x0, 13},
+	{0x7e50, 0x7e54, 0x10, 6},
+	{0x7e50, 0x7e54, 0x18, 21},
+	{0x7e50, 0x7e54, 0x30, 32},
+	{0x7e50, 0x7e54, 0x50, 22},
+	{0x7e50, 0x7e54, 0x68, 12}
+};
+
+static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = {
+	{0x7e50, 0x7e54, 0x0, 13},
+	{0x7e50, 0x7e54, 0x10, 6},
+	{0x7e50, 0x7e54, 0x18, 8},
+	{0x7e50, 0x7e54, 0x20, 13},
+	{0x7e50, 0x7e54, 0x30, 16},
+	{0x7e50, 0x7e54, 0x40, 16},
+	{0x7e50, 0x7e54, 0x50, 16},
+	{0x7e50, 0x7e54, 0x60, 6},
+	{0x7e50, 0x7e54, 0x68, 4}
+};
+
+static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = {
+	{0x10cc, 0x10d0, 0x0, 16},
+	{0x10cc, 0x10d4, 0x0, 16},
+};
+
+static const u32 t6_sge_qbase_index_array[] = {
+	/* 1 addr reg SGE_QBASE_INDEX and 4 data reg SGE_QBASE_MAP[0-3] */
+	0x1250, 0x1240, 0x1244, 0x1248, 0x124c,
+};
+
+static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = {
+	{0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */
+	{0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */
+	{0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */
+};
+
+static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = {
+	{0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */
+	{0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */
+};
+
+static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = {
+	{0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */
+	{0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */
+};
+
+static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = {
+	{0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */
+	{0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */
+};
+
+static const u32 t5_pcie_config_array[][2] = {
+	{0x0, 0x34},
+	{0x3c, 0x40},
+	{0x50, 0x64},
+	{0x70, 0x80},
+	{0x94, 0xa0},
+	{0xb0, 0xb8},
+	{0xd0, 0xd4},
+	{0x100, 0x128},
+	{0x140, 0x148},
+	{0x150, 0x164},
+	{0x170, 0x178},
+	{0x180, 0x194},
+	{0x1a0, 0x1b8},
+	{0x1c0, 0x208},
+};
+
+static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = {
+	{0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */
+	{0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */
+	{0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */
+};
+
+static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = {
+	{0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */
+	{0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */
+};
+
+static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
+	{0x7b50, 0x7b54, 0x2000, 0x20, 0}, /* up_cim_2000_to_207c */
+	{0x7b50, 0x7b54, 0x2080, 0x1d, 0}, /* up_cim_2080_to_20fc */
+	{0x7b50, 0x7b54, 0x00, 0x20, 0}, /* up_cim_00_to_7c */
+	{0x7b50, 0x7b54, 0x80, 0x20, 0}, /* up_cim_80_to_fc */
+	{0x7b50, 0x7b54, 0x100, 0x11, 0}, /* up_cim_100_to_14c */
+	{0x7b50, 0x7b54, 0x200, 0x10, 0}, /* up_cim_200_to_23c */
+	{0x7b50, 0x7b54, 0x240, 0x2, 0}, /* up_cim_240_to_244 */
+	{0x7b50, 0x7b54, 0x250, 0x2, 0}, /* up_cim_250_to_254 */
+	{0x7b50, 0x7b54, 0x260, 0x2, 0}, /* up_cim_260_to_264 */
+	{0x7b50, 0x7b54, 0x270, 0x2, 0}, /* up_cim_270_to_274 */
+	{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
+	{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
+	{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
+	{0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */
+	{0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */
+	{0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */
+	{0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */
+	{0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */
+	{0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */
+	{0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */
+	{0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */
+	{0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */
+};
+
+static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = {
+	{0x7b50, 0x7b54, 0x2000, 0x20, 0}, /* up_cim_2000_to_207c */
+	{0x7b50, 0x7b54, 0x2080, 0x19, 0}, /* up_cim_2080_to_20ec */
+	{0x7b50, 0x7b54, 0x00, 0x20, 0}, /* up_cim_00_to_7c */
+	{0x7b50, 0x7b54, 0x80, 0x20, 0}, /* up_cim_80_to_fc */
+	{0x7b50, 0x7b54, 0x100, 0x11, 0}, /* up_cim_100_to_14c */
+	{0x7b50, 0x7b54, 0x200, 0x10, 0}, /* up_cim_200_to_23c */
+	{0x7b50, 0x7b54, 0x240, 0x2, 0}, /* up_cim_240_to_244 */
+	{0x7b50, 0x7b54, 0x250, 0x2, 0}, /* up_cim_250_to_254 */
+	{0x7b50, 0x7b54, 0x260, 0x2, 0}, /* up_cim_260_to_264 */
+	{0x7b50, 0x7b54, 0x270, 0x2, 0}, /* up_cim_270_to_274 */
+	{0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */
+	{0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */
+	{0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */
+};
+
+static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
+	{0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */
+};
+
+u32 cudbg_get_entity_length(struct adapter *adap, u32 entity)
+{
+	struct cudbg_tcam tcam_region = { 0 };
+	u32 value, n = 0, len = 0;
+
+	switch (entity) {
+	case CUDBG_REG_DUMP:
+		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+		case CHELSIO_T4:
+			len = T4_REGMAP_SIZE;
+			break;
+		case CHELSIO_T5:
+		case CHELSIO_T6:
+			len = T5_REGMAP_SIZE;
+			break;
+		default:
+			break;
+		}
+		break;
+	case CUDBG_DEV_LOG:
+		len = adap->params.devlog.size;
+		break;
+	case CUDBG_CIM_LA:
+		if (is_t6(adap->params.chip)) {
+			len = adap->params.cim_la_size / 10 + 1;
+			len *= 10 * sizeof(u32);
+		} else {
+			len = adap->params.cim_la_size / 8;
+			len *= 8 * sizeof(u32);
+		}
+		len += sizeof(u32); /* for reading CIM LA configuration */
+		break;
+	case CUDBG_CIM_MA_LA:
+		len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32);
+		break;
+	case CUDBG_CIM_QCFG:
+		len = sizeof(struct cudbg_cim_qcfg);
+		break;
+	case CUDBG_CIM_IBQ_TP0:
+	case CUDBG_CIM_IBQ_TP1:
+	case CUDBG_CIM_IBQ_ULP:
+	case CUDBG_CIM_IBQ_SGE0:
+	case CUDBG_CIM_IBQ_SGE1:
+	case CUDBG_CIM_IBQ_NCSI:
+		len = CIM_IBQ_SIZE * 4 * sizeof(u32);
+		break;
+	case CUDBG_CIM_OBQ_ULP0:
+		len = cudbg_cim_obq_size(adap, 0);
+		break;
+	case CUDBG_CIM_OBQ_ULP1:
+		len = cudbg_cim_obq_size(adap, 1);
+		break;
+	case CUDBG_CIM_OBQ_ULP2:
+		len = cudbg_cim_obq_size(adap, 2);
+		break;
+	case CUDBG_CIM_OBQ_ULP3:
+		len = cudbg_cim_obq_size(adap, 3);
+		break;
+	case CUDBG_CIM_OBQ_SGE:
+		len = cudbg_cim_obq_size(adap, 4);
+		break;
+	case CUDBG_CIM_OBQ_NCSI:
+		len = cudbg_cim_obq_size(adap, 5);
+		break;
+	case CUDBG_CIM_OBQ_RXQ0:
+		len = cudbg_cim_obq_size(adap, 6);
+		break;
+	case CUDBG_CIM_OBQ_RXQ1:
+		len = cudbg_cim_obq_size(adap, 7);
+		break;
+	case CUDBG_EDC0:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EDRAM0_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EDRAM0_BAR_A);
+			len = EDRAM0_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_EDC1:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EDRAM1_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EDRAM1_BAR_A);
+			len = EDRAM1_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_MC0:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EXT_MEM0_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
+			len = EXT_MEM0_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_MC1:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EXT_MEM1_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+			len = EXT_MEM1_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_RSS:
+		len = t4_chip_rss_size(adap) * sizeof(u16);
+		break;
+	case CUDBG_RSS_VF_CONF:
+		len = adap->params.arch.vfcount *
+		      sizeof(struct cudbg_rss_vf_conf);
+		break;
+	case CUDBG_PATH_MTU:
+		len = NMTUS * sizeof(u16);
+		break;
+	case CUDBG_PM_STATS:
+		len = sizeof(struct cudbg_pm_stats);
+		break;
+	case CUDBG_HW_SCHED:
+		len = sizeof(struct cudbg_hw_sched);
+		break;
+	case CUDBG_TP_INDIRECT:
+		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+		case CHELSIO_T5:
+			n = sizeof(t5_tp_pio_array) +
+			    sizeof(t5_tp_tm_pio_array) +
+			    sizeof(t5_tp_mib_index_array);
+			break;
+		case CHELSIO_T6:
+			n = sizeof(t6_tp_pio_array) +
+			    sizeof(t6_tp_tm_pio_array) +
+			    sizeof(t6_tp_mib_index_array);
+			break;
+		default:
+			break;
+		}
+		n = n / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n;
+		break;
+	case CUDBG_SGE_INDIRECT:
+		len = sizeof(struct ireg_buf) * 2 +
+		      sizeof(struct sge_qbase_reg_field);
+		break;
+	case CUDBG_ULPRX_LA:
+		len = sizeof(struct cudbg_ulprx_la);
+		break;
+	case CUDBG_TP_LA:
+		len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64);
+		break;
+	case CUDBG_MEMINFO:
+		len = sizeof(struct cudbg_ver_hdr) +
+		      sizeof(struct cudbg_meminfo);
+		break;
+	case CUDBG_CIM_PIF_LA:
+		len = sizeof(struct cudbg_cim_pif_la);
+		len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32);
+		break;
+	case CUDBG_CLK:
+		len = sizeof(struct cudbg_clk_info);
+		break;
+	case CUDBG_PCIE_INDIRECT:
+		n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n * 2;
+		break;
+	case CUDBG_PM_INDIRECT:
+		n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n * 2;
+		break;
+	case CUDBG_TID_INFO:
+		len = sizeof(struct cudbg_tid_info_region_rev1);
+		break;
+	case CUDBG_PCIE_CONFIG:
+		len = sizeof(u32) * CUDBG_NUM_PCIE_CONFIG_REGS;
+		break;
+	case CUDBG_DUMP_CONTEXT:
+		len = cudbg_dump_context_size(adap);
+		break;
+	case CUDBG_MPS_TCAM:
+		len = sizeof(struct cudbg_mps_tcam) *
+		      adap->params.arch.mps_tcam_size;
+		break;
+	case CUDBG_VPD_DATA:
+		len = sizeof(struct cudbg_vpd_data);
+		break;
+	case CUDBG_LE_TCAM:
+		cudbg_fill_le_tcam_info(adap, &tcam_region);
+		len = sizeof(struct cudbg_tcam) +
+		      sizeof(struct cudbg_tid_data) * tcam_region.max_tid;
+		break;
+	case CUDBG_CCTRL:
+		len = sizeof(u16) * NMTUS * NCCTRL_WIN;
+		break;
+	case CUDBG_MA_INDIRECT:
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+			n = sizeof(t6_ma_ireg_array) /
+			    (IREG_NUM_ELEM * sizeof(u32));
+			len = sizeof(struct ireg_buf) * n * 2;
+		}
+		break;
+	case CUDBG_ULPTX_LA:
+		len = sizeof(struct cudbg_ver_hdr) +
+		      sizeof(struct cudbg_ulptx_la);
+		break;
+	case CUDBG_UP_CIM_INDIRECT:
+		n = 0;
+		if (is_t5(adap->params.chip))
+			n = sizeof(t5_up_cim_reg_array) /
+			    ((IREG_NUM_ELEM + 1) * sizeof(u32));
+		else if (is_t6(adap->params.chip))
+			n = sizeof(t6_up_cim_reg_array) /
+			    ((IREG_NUM_ELEM + 1) * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n;
+		break;
+	case CUDBG_PBT_TABLE:
+		len = sizeof(struct cudbg_pbt_tables);
+		break;
+	case CUDBG_MBOX_LOG:
+		len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size;
+		break;
+	case CUDBG_HMA_INDIRECT:
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+			n = sizeof(t6_hma_ireg_array) /
+			    (IREG_NUM_ELEM * sizeof(u32));
+			len = sizeof(struct ireg_buf) * n;
+		}
+		break;
+	case CUDBG_HMA:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & HMA_MUX_F) {
+			/* In T6, there's no MC1.  So, HMA shares MC1
+			 * address space.
+			 */
+			value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+			len = EXT_MEM1_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_QDESC:
+		cudbg_fill_qdesc_num_and_size(adap, NULL, &len);
+		break;
+	default:
+		break;
+	}
+
+	return len;
+}
 
 static int cudbg_do_compression(struct cudbg_init *pdbg_init,
 				struct cudbg_buffer *pin_buff,
@@ -1049,9 +1456,9 @@ static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
 	}
 }
 
-static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init,
-					   struct cudbg_error *cudbg_err,
-					   u8 mem_type)
+static int cudbg_mem_region_size(struct cudbg_init *pdbg_init,
+				 struct cudbg_error *cudbg_err,
+				 u8 mem_type, unsigned long *region_size)
 {
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_meminfo mem_info;
@@ -1060,15 +1467,23 @@ static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init,
 
 	memset(&mem_info, 0, sizeof(struct cudbg_meminfo));
 	rc = cudbg_fill_meminfo(padap, &mem_info);
-	if (rc)
+	if (rc) {
+		cudbg_err->sys_err = rc;
 		return rc;
+	}
 
 	cudbg_t4_fwcache(pdbg_init, cudbg_err);
 	rc = cudbg_meminfo_get_mem_index(padap, &mem_info, mem_type, &mc_idx);
-	if (rc)
+	if (rc) {
+		cudbg_err->sys_err = rc;
 		return rc;
+	}
+
+	if (region_size)
+		*region_size = mem_info.avail[mc_idx].limit -
+			       mem_info.avail[mc_idx].base;
 
-	return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+	return 0;
 }
 
 static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
@@ -1076,7 +1491,12 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
 				    struct cudbg_error *cudbg_err,
 				    u8 mem_type)
 {
-	unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type);
+	unsigned long size = 0;
+	int rc;
+
+	rc = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type, &size);
+	if (rc)
+		return rc;
 
 	return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size,
 				 cudbg_err);
@@ -1375,11 +1795,25 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
 	struct cudbg_buffer temp_buff = { 0 };
 	struct sge_qbase_reg_field *sge_qbase;
 	struct ireg_buf *ch_sge_dbg;
+	u8 padap_running = 0;
 	int i, rc;
+	u32 size;
 
-	rc = cudbg_get_buff(pdbg_init, dbg_buff,
-			    sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase),
-			    &temp_buff);
+	/* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can
+	 * lead to SGE missing doorbells under heavy traffic. So, only
+	 * collect them when adapter is idle.
+	 */
+	for_each_port(padap, i) {
+		padap_running = netif_running(padap->port[i]);
+		if (padap_running)
+			break;
+	}
+
+	size = sizeof(*ch_sge_dbg) * 2;
+	if (!padap_running)
+		size += sizeof(*sge_qbase);
+
+	rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff);
 	if (rc)
 		return rc;
 
@@ -1401,7 +1835,8 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
 		ch_sge_dbg++;
 	}
 
-	if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) {
+	if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 &&
+	    !padap_running) {
 		sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg;
 		/* 1 addr reg SGE_QBASE_INDEX and 4 data reg
 		 * SGE_QBASE_MAP[0-3]
@@ -1962,7 +2397,6 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
 	u8 mem_type[CTXT_INGRESS + 1] = { 0 };
 	struct cudbg_buffer temp_buff = { 0 };
 	struct cudbg_ch_cntxt *buff;
-	u64 *dst_off, *src_off;
 	u8 *ctx_buf;
 	u8 i, k;
 	int rc;
@@ -2031,8 +2465,11 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
 		}
 
 		for (j = 0; j < max_ctx_qid; j++) {
+			__be64 *dst_off;
+			u64 *src_off;
+
 			src_off = (u64 *)(ctx_buf + j * SGE_CTXT_SIZE);
-			dst_off = (u64 *)buff->data;
+			dst_off = (__be64 *)buff->data;
 
 			/* The data is stored in 64-bit cpu order.  Convert it
 			 * to big endian before parsing.
@@ -2265,10 +2702,10 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init,
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_buffer temp_buff = { 0 };
 	char vpd_str[CUDBG_VPD_VER_LEN + 1];
-	u32 scfg_vers, vpd_vers, fw_vers;
 	struct cudbg_vpd_data *vpd_data;
 	struct vpd_params vpd = { 0 };
-	int rc, ret;
+	u32 vpd_vers, fw_vers;
+	int rc;
 
 	rc = t4_get_raw_vpd_params(padap, &vpd);
 	if (rc)
@@ -2278,24 +2715,6 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init,
 	if (rc)
 		return rc;
 
-	/* Serial Configuration Version is located beyond the PF's vpd size.
-	 * Temporarily give access to entire EEPROM to get it.
-	 */
-	rc = pci_set_vpd_size(padap->pdev, EEPROMVSIZE);
-	if (rc < 0)
-		return rc;
-
-	ret = cudbg_read_vpd_reg(padap, CUDBG_SCFG_VER_ADDR, CUDBG_SCFG_VER_LEN,
-				 &scfg_vers);
-
-	/* Restore back to original PF's vpd size */
-	rc = pci_set_vpd_size(padap->pdev, CUDBG_VPD_PF_SIZE);
-	if (rc < 0)
-		return rc;
-
-	if (ret)
-		return ret;
-
 	rc = cudbg_read_vpd_reg(padap, CUDBG_VPD_VER_ADDR, CUDBG_VPD_VER_LEN,
 				vpd_str);
 	if (rc)
@@ -2316,7 +2735,7 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init,
 	memcpy(vpd_data->bn, vpd.pn, PN_LEN + 1);
 	memcpy(vpd_data->na, vpd.na, MACADDR_LEN + 1);
 	memcpy(vpd_data->mn, vpd.id, ID_LEN + 1);
-	vpd_data->scfg_vers = scfg_vers;
+	vpd_data->scfg_vers = t4_read_reg(padap, PCIE_STATIC_SPARE2_A);
 	vpd_data->vpd_vers = vpd_vers;
 	vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(fw_vers);
 	vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers);
@@ -3040,7 +3459,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 			for (i = 0; i < utxq->ntxq; i++)
 				QDESC_GET_TXQ(&utxq->uldtxq[i].q,
 					      cudbg_uld_txq_to_qtype(j),
-					      out_unlock);
+					      out_unlock_uld);
 		}
 	}
 
@@ -3057,7 +3476,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 			for (i = 0; i < urxq->nrxq; i++)
 				QDESC_GET_RXQ(&urxq->uldrxq[i].rspq,
 					      cudbg_uld_rxq_to_qtype(j),
-					      out_unlock);
+					      out_unlock_uld);
 		}
 
 		/* ULD FLQ */
@@ -3069,7 +3488,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 			for (i = 0; i < urxq->nrxq; i++)
 				QDESC_GET_FLQ(&urxq->uldrxq[i].fl,
 					      cudbg_uld_flq_to_qtype(j),
-					      out_unlock);
+					      out_unlock_uld);
 		}
 
 		/* ULD CIQ */
@@ -3082,29 +3501,34 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 			for (i = 0; i < urxq->nciq; i++)
 				QDESC_GET_RXQ(&urxq->uldrxq[base + i].rspq,
 					      cudbg_uld_ciq_to_qtype(j),
-					      out_unlock);
+					      out_unlock_uld);
 		}
 	}
+	mutex_unlock(&uld_mutex);
+
+	if (!padap->tc_mqprio)
+		goto out;
 
+	mutex_lock(&padap->tc_mqprio->mqprio_mutex);
 	/* ETHOFLD TXQ */
 	if (s->eohw_txq)
 		for (i = 0; i < s->eoqsets; i++)
 			QDESC_GET_TXQ(&s->eohw_txq[i].q,
-				      CUDBG_QTYPE_ETHOFLD_TXQ, out);
+				      CUDBG_QTYPE_ETHOFLD_TXQ, out_unlock_mqprio);
 
 	/* ETHOFLD RXQ and FLQ */
 	if (s->eohw_rxq) {
 		for (i = 0; i < s->eoqsets; i++)
 			QDESC_GET_RXQ(&s->eohw_rxq[i].rspq,
-				      CUDBG_QTYPE_ETHOFLD_RXQ, out);
+				      CUDBG_QTYPE_ETHOFLD_RXQ, out_unlock_mqprio);
 
 		for (i = 0; i < s->eoqsets; i++)
 			QDESC_GET_FLQ(&s->eohw_rxq[i].fl,
-				      CUDBG_QTYPE_ETHOFLD_FLQ, out);
+				      CUDBG_QTYPE_ETHOFLD_FLQ, out_unlock_mqprio);
 	}
 
-out_unlock:
-	mutex_unlock(&uld_mutex);
+out_unlock_mqprio:
+	mutex_unlock(&padap->tc_mqprio->mqprio_mutex);
 
 out:
 	qdesc_info->qdesc_entry_size = sizeof(*qdesc_entry);
@@ -3133,8 +3557,7 @@ out:
 	}
 
 out_free:
-	if (data)
-		kvfree(data);
+	kvfree(data);
 
 #undef QDESC_GET_FLQ
 #undef QDESC_GET_RXQ
@@ -3142,4 +3565,45 @@ out_free:
 #undef QDESC_GET
 
 	return rc;
+
+out_unlock_uld:
+	mutex_unlock(&uld_mutex);
+	goto out;
+}
+
+int cudbg_collect_flash(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	u32 count = padap->params.sf_size, n;
+	struct cudbg_buffer temp_buff = {0};
+	u32 addr, i;
+	int rc;
+
+	addr = FLASH_EXP_ROM_START;
+
+	for (i = 0; i < count; i += SF_PAGE_SIZE) {
+		n = min_t(u32, count - i, SF_PAGE_SIZE);
+
+		rc = cudbg_get_buff(pdbg_init, dbg_buff, n, &temp_buff);
+		if (rc) {
+			cudbg_err->sys_warn = CUDBG_STATUS_PARTIAL_DATA;
+			goto out;
+		}
+		rc = t4_read_flash(padap, addr, n, (u32 *)temp_buff.data, 0);
+		if (rc)
+			goto out;
+
+		addr += (n * 4);
+		rc = cudbg_write_and_release_buff(pdbg_init, &temp_buff,
+						  dbg_buff);
+		if (rc) {
+			cudbg_err->sys_warn = CUDBG_STATUS_PARTIAL_DATA;
+			goto out;
+		}
+	}
+
+out:
+	return rc;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
index 10ee6ed1d932..d6d6cd298930 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
@@ -162,7 +162,11 @@ int cudbg_collect_hma_meminfo(struct cudbg_init *pdbg_init,
 int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 			struct cudbg_buffer *dbg_buff,
 			struct cudbg_error *cudbg_err);
+int cudbg_collect_flash(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err);
 
+u32 cudbg_get_entity_length(struct adapter *adap, u32 entity);
 struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i);
 void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
 			      struct cudbg_entity_hdr *entity_hdr);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 8b7d156f79d3..5657ac8cfca0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -39,6 +39,7 @@
 
 #include <linux/bitops.h>
 #include <linux/cache.h>
+#include <linux/ethtool.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
@@ -60,6 +61,7 @@
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
 extern struct list_head adapter_list;
+extern struct list_head uld_list;
 extern struct mutex uld_mutex;
 
 /* Suspend an Ethernet Tx queue with fewer available descriptors than this.
@@ -82,7 +84,6 @@ extern struct mutex uld_mutex;
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
-	EC_LEN		= 16,    /* E/C length */
 	ID_LEN		= 16,    /* ID length */
 	PN_LEN		= 16,    /* Part Number length */
 	MACADDR_LEN	= 12,    /* MAC Address length */
@@ -138,6 +139,69 @@ enum cc_fec {
 	FEC_BASER_RS  = 1 << 2   /* BaseR/Reed-Solomon */
 };
 
+enum {
+	CXGB4_ETHTOOL_FLASH_FW = 1,
+	CXGB4_ETHTOOL_FLASH_PHY = 2,
+	CXGB4_ETHTOOL_FLASH_BOOT = 3,
+	CXGB4_ETHTOOL_FLASH_BOOTCFG = 4
+};
+
+enum cxgb4_netdev_tls_ops {
+	CXGB4_TLSDEV_OPS  = 1,
+	CXGB4_XFRMDEV_OPS
+};
+
+struct cxgb4_bootcfg_data {
+	__le16 signature;
+	__u8 reserved[2];
+};
+
+struct cxgb4_pcir_data {
+	__le32 signature;	/* Signature. The string "PCIR" */
+	__le16 vendor_id;	/* Vendor Identification */
+	__le16 device_id;	/* Device Identification */
+	__u8 vital_product[2];	/* Pointer to Vital Product Data */
+	__u8 length[2];		/* PCIR Data Structure Length */
+	__u8 revision;		/* PCIR Data Structure Revision */
+	__u8 class_code[3];	/* Class Code */
+	__u8 image_length[2];	/* Image Length. Multiple of 512B */
+	__u8 code_revision[2];	/* Revision Level of Code/Data */
+	__u8 code_type;
+	__u8 indicator;
+	__u8 reserved[2];
+};
+
+/* BIOS boot headers */
+struct cxgb4_pci_exp_rom_header {
+	__le16 signature;	/* ROM Signature. Should be 0xaa55 */
+	__u8 reserved[22];	/* Reserved per processor Architecture data */
+	__le16 pcir_offset;	/* Offset to PCI Data Structure */
+};
+
+/* Legacy PCI Expansion ROM Header */
+struct legacy_pci_rom_hdr {
+	__u8 signature[2];	/* ROM Signature. Should be 0xaa55 */
+	__u8 size512;		/* Current Image Size in units of 512 bytes */
+	__u8 initentry_point[4];
+	__u8 cksum;		/* Checksum computed on the entire Image */
+	__u8 reserved[16];	/* Reserved */
+	__le16 pcir_offset;	/* Offset to PCI Data Struture */
+};
+
+#define CXGB4_HDR_CODE1 0x00
+#define CXGB4_HDR_CODE2 0x03
+#define CXGB4_HDR_INDI 0x80
+
+/* BOOT constants */
+enum {
+	BOOT_CFG_SIG = 0x4243,
+	BOOT_SIZE_INC = 512,
+	BOOT_SIGNATURE = 0xaa55,
+	BOOT_MIN_SIZE = sizeof(struct cxgb4_pci_exp_rom_header),
+	BOOT_MAX_SIZE = 1024 * BOOT_SIZE_INC,
+	PCIR_SIGNATURE = 0x52494350
+};
+
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
 	u64 tx_frames;            /* all good frames */
@@ -326,7 +390,6 @@ struct tp_params {
 
 struct vpd_params {
 	unsigned int cclk;
-	u8 ec[EC_LEN + 1];
 	u8 sn[SERNUM_LEN + 1];
 	u8 id[ID_LEN + 1];
 	u8 pn[PN_LEN + 1];
@@ -349,7 +412,6 @@ struct pf_resources {
 };
 
 struct pci_params {
-	unsigned int vpd_cap_addr;
 	unsigned char speed;
 	unsigned char width;
 };
@@ -466,8 +528,6 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log,
 	return &((struct mbox_cmd *)&(log)[1])[entry_idx];
 }
 
-#include "t4fw_api.h"
-
 #define FW_VERSION(chip) ( \
 		FW_HDR_FW_VER_MAJOR_G(chip##FW_VERSION_MAJOR) | \
 		FW_HDR_FW_VER_MINOR_G(chip##FW_VERSION_MINOR) | \
@@ -475,6 +535,12 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log,
 		FW_HDR_FW_VER_BUILD_G(chip##FW_VERSION_BUILD))
 #define FW_INTFVER(chip, intf) (FW_HDR_INTFVER_##intf)
 
+struct cxgb4_ethtool_lb_test {
+	struct completion completion;
+	int result;
+	int loopback;
+};
+
 struct fw_info {
 	u8 chip;
 	char *fs_name;
@@ -493,6 +559,11 @@ struct trace_params {
 	unsigned char port;
 };
 
+struct cxgb4_fw_data {
+	__be32 signature;
+	__u8 reserved[4];
+};
+
 /* Firmware Port Capabilities types. */
 
 typedef u16 fw_port_cap16_t;	/* 16-bit Port Capabilities integral value */
@@ -617,6 +688,13 @@ struct port_info {
 	u8 rx_cchan;
 
 	bool tc_block_shared;
+
+	/* Mirror VI information */
+	u16 viid_mirror;
+	u16 nmirrorqsets;
+	u32 vi_mirror_count;
+	struct mutex vi_mirror_mutex; /* Sync access to Mirror VI info */
+	struct cxgb4_ethtool_lb_test ethtool_lb;
 };
 
 struct dentry;
@@ -640,8 +718,16 @@ enum {                                 /* adapter flags */
 enum {
 	ULP_CRYPTO_LOOKASIDE = 1 << 0,
 	ULP_CRYPTO_IPSEC_INLINE = 1 << 1,
+	ULP_CRYPTO_KTLS_INLINE  = 1 << 3,
 };
 
+#define CXGB4_MIRROR_RXQ_DEFAULT_DESC_NUM 1024
+#define CXGB4_MIRROR_RXQ_DEFAULT_DESC_SIZE 64
+#define CXGB4_MIRROR_RXQ_DEFAULT_INTR_USEC 5
+#define CXGB4_MIRROR_RXQ_DEFAULT_PKT_CNT 8
+
+#define CXGB4_MIRROR_FLQ_DEFAULT_DESC_NUM 72
+
 struct rx_sw_desc;
 
 struct sge_fl {                     /* SGE free-buffer queue state */
@@ -823,6 +909,13 @@ struct sge_uld_txq_info {
 	u16 ntxq;		/* # of egress uld queues */
 };
 
+/* struct to maintain ULD list to reallocate ULD resources on hotplug */
+struct cxgb4_uld_list {
+	struct cxgb4_uld_info uld_info;
+	struct list_head list_node;
+	enum cxgb4_uld uld_type;
+};
+
 enum sge_eosw_state {
 	CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */
 	CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */
@@ -884,12 +977,15 @@ struct sge {
 	struct sge_eohw_txq *eohw_txq;
 	struct sge_ofld_rxq *eohw_rxq;
 
+	struct sge_eth_rxq *mirror_rxq[NCHAN];
+
 	u16 max_ethqsets;           /* # of available Ethernet queue sets */
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 ofldqsets;              /* # of active ofld queue sets */
 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
 	u16 eoqsets;                /* # of ETHOFLD queues */
+	u16 mirrorqsets;            /* # of Mirror queues */
 
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
@@ -996,6 +1092,17 @@ struct mps_entries_ref {
 	refcount_t refcnt;
 };
 
+struct cxgb4_ethtool_filter_info {
+	u32 *loc_array; /* Array holding the actual TIDs set to filters */
+	unsigned long *bmap; /* Bitmap for managing filters in use */
+	u32 in_use; /* # of filters in use */
+};
+
+struct cxgb4_ethtool_filter {
+	u32 nentries; /* Adapter wide number of supported filters */
+	struct cxgb4_ethtool_filter_info *port; /* Per port entry */
+};
+
 struct adapter {
 	void __iomem *regs;
 	void __iomem *bar2;
@@ -1012,9 +1119,7 @@ struct adapter {
 
 	int msg_enable;
 	__be16 vxlan_port;
-	u8 vxlan_port_cnt;
 	__be16 geneve_port;
-	u8 geneve_port_cnt;
 
 	struct adapter_params params;
 	struct cxgb4_virt_res vres;
@@ -1092,7 +1197,14 @@ struct adapter {
 
 	/* TC u32 offload */
 	struct cxgb4_tc_u32_table *tc_u32;
+	struct chcr_ktls chcr_ktls;
 	struct chcr_stats_debug chcr_stats;
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	struct ch_ktls_stats_debug ch_ktls_stats;
+#endif
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	struct ch_ipsec_stats_debug ch_ipsec_stats;
+#endif
 
 	/* TC flower offload */
 	bool tc_flower_initialized;
@@ -1120,25 +1232,29 @@ struct adapter {
 
 	/* TC MATCHALL classifier offload */
 	struct cxgb4_tc_matchall *tc_matchall;
+
+	/* Ethtool n-tuple */
+	struct cxgb4_ethtool_filter *ethtool_filters;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
  * programmed with various parameters.
  */
 struct ch_sched_params {
-	s8   type;                     /* packet or flow */
+	u8   type;                     /* packet or flow */
 	union {
 		struct {
-			s8   level;    /* scheduler hierarchy level */
-			s8   mode;     /* per-class or per-flow */
-			s8   rateunit; /* bit or packet rate */
-			s8   ratemode; /* %port relative or kbps absolute */
-			s8   channel;  /* scheduler channel [0..N] */
-			s8   class;    /* scheduler class [0..N] */
-			s32  minrate;  /* minimum rate */
-			s32  maxrate;  /* maximum rate */
-			s16  weight;   /* percent weight */
-			s16  pktsize;  /* average packet size */
+			u8   level;    /* scheduler hierarchy level */
+			u8   mode;     /* per-class or per-flow */
+			u8   rateunit; /* bit or packet rate */
+			u8   ratemode; /* %port relative or kbps absolute */
+			u8   channel;  /* scheduler channel [0..N] */
+			u8   class;    /* scheduler class [0..N] */
+			u32  minrate;  /* minimum rate */
+			u32  maxrate;  /* maximum rate */
+			u16  weight;   /* percent weight */
+			u16  pktsize;  /* average packet size */
+			u16  burstsize;  /* burst buffer size */
 		} params;
 	} u;
 };
@@ -1331,6 +1447,8 @@ enum {
 	NAT_MODE_ALL		/* NAT on entire 4-tuple */
 };
 
+#define CXGB4_FILTER_TYPE_MAX 2
+
 /* Host shadow copy of ingress filter entry.  This is in host native format
  * and doesn't match the ordering or bit order, etc. of the hardware of the
  * firmware command.  The use of bit-field structure elements is purely to
@@ -1427,7 +1545,7 @@ static inline void t4_write_reg64(struct adapter *adap, u32 reg_addr, u64 val)
 static inline void t4_set_hw_addr(struct adapter *adapter, int port_idx,
 				  u8 hw_addr[])
 {
-	ether_addr_copy(adapter->port[port_idx]->dev_addr, hw_addr);
+	eth_hw_addr_set(adapter->port[port_idx], hw_addr);
 	ether_addr_copy(adapter->port[port_idx]->perm_addr, hw_addr);
 }
 
@@ -1485,9 +1603,8 @@ static inline unsigned int qtimer_val(const struct adapter *adap,
 	return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
 }
 
-/* driver version & name used for ethtool_drvinfo */
+/* driver name used for ethtool_drvinfo */
 extern char cxgb4_driver_name[];
-extern const char cxgb4_driver_version[];
 
 void t4_os_portmod_changed(struct adapter *adap, int port_id);
 void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
@@ -1496,6 +1613,7 @@ void t4_free_sge_resources(struct adapter *adap);
 void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q);
 irq_handler_t t4_intr_handler(struct adapter *adap);
 netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int cxgb4_selftest_lb_pkt(struct net_device *netdev);
 int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 		     const struct pkt_gl *gl);
 int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
@@ -1728,8 +1846,7 @@ int t4_get_pfres(struct adapter *adapter);
 int t4_read_flash(struct adapter *adapter, unsigned int addr,
 		  unsigned int nwords, u32 *data, int byte_oriented);
 int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
-int t4_load_phy_fw(struct adapter *adap,
-		   int win, spinlock_t *lock,
+int t4_load_phy_fw(struct adapter *adap, int win,
 		   int (*phy_fw_version)(const u8 *, size_t),
 		   const u8 *phy_fw_data, size_t phy_fw_size);
 int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver);
@@ -1773,6 +1890,8 @@ int t4_init_rss_mode(struct adapter *adap, int mbox);
 int t4_init_portinfo(struct port_info *pi, int mbox,
 		     int port, int pf, int vf, u8 mac[]);
 int t4_port_init(struct adapter *adap, int mbox, int pf, int vf);
+int t4_init_port_mirror(struct port_info *pi, u8 mbox, u8 port, u8 pf, u8 vf,
+			u16 *mirror_viid);
 void t4_fatal_err(struct adapter *adapter);
 unsigned int t4_chip_rss_size(struct adapter *adapter);
 int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
@@ -1882,8 +2001,8 @@ int t4_free_vi(struct adapter *adap, unsigned int mbox,
 	       unsigned int pf, unsigned int vf,
 	       unsigned int viid);
 int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
-		int mtu, int promisc, int all_multi, int bcast, int vlanex,
-		bool sleep_ok);
+		  unsigned int viid_mirror, int mtu, int promisc, int all_multi,
+		  int bcast, int vlanex, bool sleep_ok);
 int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
 			 const u8 *addr, const u8 *mask, unsigned int idx,
 			 u8 lookup_type, u8 port_id, bool sleep_ok);
@@ -1954,9 +2073,10 @@ int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
 		   enum ctxt_type ctype, u32 *data);
 int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 		      enum ctxt_type ctype, u32 *data);
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize);
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 void t4_idma_monitor_init(struct adapter *adapter,
 			  struct sge_idma_monitor_state *idma);
@@ -1979,13 +2099,17 @@ void t4_register_netevent_notifier(void);
 int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
 	      unsigned int devid, unsigned int offset,
 	      unsigned int len, u8 *buf);
+int t4_load_boot(struct adapter *adap, u8 *boot_data,
+		 unsigned int boot_addr, unsigned int size);
+int t4_load_bootcfg(struct adapter *adap,
+		    const u8 *cfg_data, unsigned int size);
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
 void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *txq,
 			      u32 ndesc);
 int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc);
-void cxgb4_ethofld_restart(unsigned long data);
+void cxgb4_ethofld_restart(struct tasklet_struct *t);
 int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
 			     const struct pkt_gl *si);
 void free_txq(struct adapter *adap, struct sge_txq *q);
@@ -1998,6 +2122,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
 void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
 		     const dma_addr_t *addr);
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+			     struct ulptx_sgl *sgl, u64 *end,
+			     const dma_addr_t *addr, u32 start, u32 send_len);
 void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
 int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
 		    u16 vlan);
@@ -2048,8 +2175,11 @@ int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
 			  bool persistent, u8 *smt_idx);
 int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
 void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
-int cxgb_open(struct net_device *dev);
-int cxgb_close(struct net_device *dev);
 void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
 void cxgb4_quiesce_rx(struct sge_rspq *q);
+int cxgb4_port_mirror_alloc(struct net_device *dev);
+void cxgb4_port_mirror_free(struct net_device *dev);
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+int cxgb4_set_ktls_feature(struct adapter *adap, bool enable);
+#endif
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index e374b413d9ac..dd66b244466d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -66,249 +66,9 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
 	{ CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect },
 };
 
-static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
-{
-	struct cudbg_tcam tcam_region = { 0 };
-	u32 value, n = 0, len = 0;
-
-	switch (entity) {
-	case CUDBG_REG_DUMP:
-		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
-		case CHELSIO_T4:
-			len = T4_REGMAP_SIZE;
-			break;
-		case CHELSIO_T5:
-		case CHELSIO_T6:
-			len = T5_REGMAP_SIZE;
-			break;
-		default:
-			break;
-		}
-		break;
-	case CUDBG_DEV_LOG:
-		len = adap->params.devlog.size;
-		break;
-	case CUDBG_CIM_LA:
-		if (is_t6(adap->params.chip)) {
-			len = adap->params.cim_la_size / 10 + 1;
-			len *= 10 * sizeof(u32);
-		} else {
-			len = adap->params.cim_la_size / 8;
-			len *= 8 * sizeof(u32);
-		}
-		len += sizeof(u32); /* for reading CIM LA configuration */
-		break;
-	case CUDBG_CIM_MA_LA:
-		len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32);
-		break;
-	case CUDBG_CIM_QCFG:
-		len = sizeof(struct cudbg_cim_qcfg);
-		break;
-	case CUDBG_CIM_IBQ_TP0:
-	case CUDBG_CIM_IBQ_TP1:
-	case CUDBG_CIM_IBQ_ULP:
-	case CUDBG_CIM_IBQ_SGE0:
-	case CUDBG_CIM_IBQ_SGE1:
-	case CUDBG_CIM_IBQ_NCSI:
-		len = CIM_IBQ_SIZE * 4 * sizeof(u32);
-		break;
-	case CUDBG_CIM_OBQ_ULP0:
-		len = cudbg_cim_obq_size(adap, 0);
-		break;
-	case CUDBG_CIM_OBQ_ULP1:
-		len = cudbg_cim_obq_size(adap, 1);
-		break;
-	case CUDBG_CIM_OBQ_ULP2:
-		len = cudbg_cim_obq_size(adap, 2);
-		break;
-	case CUDBG_CIM_OBQ_ULP3:
-		len = cudbg_cim_obq_size(adap, 3);
-		break;
-	case CUDBG_CIM_OBQ_SGE:
-		len = cudbg_cim_obq_size(adap, 4);
-		break;
-	case CUDBG_CIM_OBQ_NCSI:
-		len = cudbg_cim_obq_size(adap, 5);
-		break;
-	case CUDBG_CIM_OBQ_RXQ0:
-		len = cudbg_cim_obq_size(adap, 6);
-		break;
-	case CUDBG_CIM_OBQ_RXQ1:
-		len = cudbg_cim_obq_size(adap, 7);
-		break;
-	case CUDBG_EDC0:
-		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
-		if (value & EDRAM0_ENABLE_F) {
-			value = t4_read_reg(adap, MA_EDRAM0_BAR_A);
-			len = EDRAM0_SIZE_G(value);
-		}
-		len = cudbg_mbytes_to_bytes(len);
-		break;
-	case CUDBG_EDC1:
-		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
-		if (value & EDRAM1_ENABLE_F) {
-			value = t4_read_reg(adap, MA_EDRAM1_BAR_A);
-			len = EDRAM1_SIZE_G(value);
-		}
-		len = cudbg_mbytes_to_bytes(len);
-		break;
-	case CUDBG_MC0:
-		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
-		if (value & EXT_MEM0_ENABLE_F) {
-			value = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
-			len = EXT_MEM0_SIZE_G(value);
-		}
-		len = cudbg_mbytes_to_bytes(len);
-		break;
-	case CUDBG_MC1:
-		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
-		if (value & EXT_MEM1_ENABLE_F) {
-			value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
-			len = EXT_MEM1_SIZE_G(value);
-		}
-		len = cudbg_mbytes_to_bytes(len);
-		break;
-	case CUDBG_RSS:
-		len = t4_chip_rss_size(adap) * sizeof(u16);
-		break;
-	case CUDBG_RSS_VF_CONF:
-		len = adap->params.arch.vfcount *
-		      sizeof(struct cudbg_rss_vf_conf);
-		break;
-	case CUDBG_PATH_MTU:
-		len = NMTUS * sizeof(u16);
-		break;
-	case CUDBG_PM_STATS:
-		len = sizeof(struct cudbg_pm_stats);
-		break;
-	case CUDBG_HW_SCHED:
-		len = sizeof(struct cudbg_hw_sched);
-		break;
-	case CUDBG_TP_INDIRECT:
-		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
-		case CHELSIO_T5:
-			n = sizeof(t5_tp_pio_array) +
-			    sizeof(t5_tp_tm_pio_array) +
-			    sizeof(t5_tp_mib_index_array);
-			break;
-		case CHELSIO_T6:
-			n = sizeof(t6_tp_pio_array) +
-			    sizeof(t6_tp_tm_pio_array) +
-			    sizeof(t6_tp_mib_index_array);
-			break;
-		default:
-			break;
-		}
-		n = n / (IREG_NUM_ELEM * sizeof(u32));
-		len = sizeof(struct ireg_buf) * n;
-		break;
-	case CUDBG_SGE_INDIRECT:
-		len = sizeof(struct ireg_buf) * 2 +
-		      sizeof(struct sge_qbase_reg_field);
-		break;
-	case CUDBG_ULPRX_LA:
-		len = sizeof(struct cudbg_ulprx_la);
-		break;
-	case CUDBG_TP_LA:
-		len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64);
-		break;
-	case CUDBG_MEMINFO:
-		len = sizeof(struct cudbg_ver_hdr) +
-		      sizeof(struct cudbg_meminfo);
-		break;
-	case CUDBG_CIM_PIF_LA:
-		len = sizeof(struct cudbg_cim_pif_la);
-		len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32);
-		break;
-	case CUDBG_CLK:
-		len = sizeof(struct cudbg_clk_info);
-		break;
-	case CUDBG_PCIE_INDIRECT:
-		n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
-		len = sizeof(struct ireg_buf) * n * 2;
-		break;
-	case CUDBG_PM_INDIRECT:
-		n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
-		len = sizeof(struct ireg_buf) * n * 2;
-		break;
-	case CUDBG_TID_INFO:
-		len = sizeof(struct cudbg_tid_info_region_rev1);
-		break;
-	case CUDBG_PCIE_CONFIG:
-		len = sizeof(u32) * CUDBG_NUM_PCIE_CONFIG_REGS;
-		break;
-	case CUDBG_DUMP_CONTEXT:
-		len = cudbg_dump_context_size(adap);
-		break;
-	case CUDBG_MPS_TCAM:
-		len = sizeof(struct cudbg_mps_tcam) *
-		      adap->params.arch.mps_tcam_size;
-		break;
-	case CUDBG_VPD_DATA:
-		len = sizeof(struct cudbg_vpd_data);
-		break;
-	case CUDBG_LE_TCAM:
-		cudbg_fill_le_tcam_info(adap, &tcam_region);
-		len = sizeof(struct cudbg_tcam) +
-		      sizeof(struct cudbg_tid_data) * tcam_region.max_tid;
-		break;
-	case CUDBG_CCTRL:
-		len = sizeof(u16) * NMTUS * NCCTRL_WIN;
-		break;
-	case CUDBG_MA_INDIRECT:
-		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
-			n = sizeof(t6_ma_ireg_array) /
-			    (IREG_NUM_ELEM * sizeof(u32));
-			len = sizeof(struct ireg_buf) * n * 2;
-		}
-		break;
-	case CUDBG_ULPTX_LA:
-		len = sizeof(struct cudbg_ver_hdr) +
-		      sizeof(struct cudbg_ulptx_la);
-		break;
-	case CUDBG_UP_CIM_INDIRECT:
-		n = 0;
-		if (is_t5(adap->params.chip))
-			n = sizeof(t5_up_cim_reg_array) /
-			    ((IREG_NUM_ELEM + 1) * sizeof(u32));
-		else if (is_t6(adap->params.chip))
-			n = sizeof(t6_up_cim_reg_array) /
-			    ((IREG_NUM_ELEM + 1) * sizeof(u32));
-		len = sizeof(struct ireg_buf) * n;
-		break;
-	case CUDBG_PBT_TABLE:
-		len = sizeof(struct cudbg_pbt_tables);
-		break;
-	case CUDBG_MBOX_LOG:
-		len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size;
-		break;
-	case CUDBG_HMA_INDIRECT:
-		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
-			n = sizeof(t6_hma_ireg_array) /
-			    (IREG_NUM_ELEM * sizeof(u32));
-			len = sizeof(struct ireg_buf) * n;
-		}
-		break;
-	case CUDBG_HMA:
-		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
-		if (value & HMA_MUX_F) {
-			/* In T6, there's no MC1.  So, HMA shares MC1
-			 * address space.
-			 */
-			value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
-			len = EXT_MEM1_SIZE_G(value);
-		}
-		len = cudbg_mbytes_to_bytes(len);
-		break;
-	case CUDBG_QDESC:
-		cudbg_fill_qdesc_num_and_size(adap, NULL, &len);
-		break;
-	default:
-		break;
-	}
-
-	return len;
-}
+static const struct cxgb4_collect_entity cxgb4_collect_flash_dump[] = {
+	{ CUDBG_FLASH, cudbg_collect_flash },
+};
 
 u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag)
 {
@@ -319,17 +79,20 @@ u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag)
 	if (flag & CXGB4_ETH_DUMP_HW) {
 		for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) {
 			entity = cxgb4_collect_hw_dump[i].entity;
-			len += cxgb4_get_entity_length(adap, entity);
+			len += cudbg_get_entity_length(adap, entity);
 		}
 	}
 
 	if (flag & CXGB4_ETH_DUMP_MEM) {
 		for (i = 0; i < ARRAY_SIZE(cxgb4_collect_mem_dump); i++) {
 			entity = cxgb4_collect_mem_dump[i].entity;
-			len += cxgb4_get_entity_length(adap, entity);
+			len += cudbg_get_entity_length(adap, entity);
 		}
 	}
 
+	if (flag & CXGB4_ETH_DUMP_FLASH)
+		len += adap->params.sf_size;
+
 	/* If compression is enabled, a smaller destination buffer is enough */
 	wsize = cudbg_get_workspace_size();
 	if (wsize && len > CUDBG_DUMP_BUFF_SIZE)
@@ -394,8 +157,7 @@ static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init)
 
 static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init)
 {
-	if (pdbg_init->compress_buff)
-		vfree(pdbg_init->compress_buff);
+	vfree(pdbg_init->compress_buff);
 }
 
 int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
@@ -468,6 +230,13 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
 					   buf,
 					   &total_size);
 
+	if (flag & CXGB4_ETH_DUMP_FLASH)
+		cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
+					   cxgb4_collect_flash_dump,
+					   ARRAY_SIZE(cxgb4_collect_flash_dump),
+					   buf,
+					   &total_size);
+
 	cudbg_free_compress_buff(&cudbg_init);
 	cudbg_hdr->data_len = total_size;
 	if (cudbg_init.compress_type != CUDBG_COMPRESSION_NONE)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
index 66b805c7a92c..c04a49b6378d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
@@ -27,6 +27,7 @@ enum CXGB4_ETHTOOL_DUMP_FLAGS {
 	CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE,
 	CXGB4_ETH_DUMP_MEM = (1 << 0), /* On-Chip Memory Dumps */
 	CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */
+	CXGB4_ETH_DUMP_FLASH = (1 << 2), /* Dump flash memory */
 };
 
 #define CXGB4_ETH_DUMP_ALL (CXGB4_ETH_DUMP_MEM | CXGB4_ETH_DUMP_HW)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index 4a872f328fea..7d5204834ee2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
@@ -85,7 +85,7 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev)
 
 		if (err) {
 			dev_err(adap->pdev_dev,
-				"Failed DCB Clear %s Application Priority: sel=%d, prot=%d, , err=%d\n",
+				"Failed DCB Clear %s Application Priority: sel=%d, prot=%d, err=%d\n",
 				dcb_ver_array[dcb->dcb_version], app.selector,
 				app.protocol, -err);
 			break;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index d3c654b9989b..80c6627fe981 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
@@ -136,6 +136,9 @@ static inline __u8 bitswap_1(unsigned char val)
 	       ((val & 0x02) << 5) |
 	       ((val & 0x01) << 7);
 }
+
+extern const char * const dcb_ver_array[];
+
 #define CXGB4_DCB_ENABLED true
 
 #else /* !CONFIG_CHELSIO_T4_DCB */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index de30d61af065..14e0d989c3ba 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -49,6 +49,7 @@
 #include "cudbg_lib_common.h"
 #include "cudbg_entity.h"
 #include "cudbg_lib.h"
+#include "cxgb4_tc_mqprio.h"
 
 /* generic seq_file support for showing a table of size rows x width. */
 static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos)
@@ -984,7 +985,7 @@ static const char * const devlog_facility_strings[] = {
 struct devlog_info {
 	unsigned int nentries;		/* number of entries in log[] */
 	unsigned int first;		/* first [temporal] entry in log[] */
-	struct fw_devlog_e log[0];	/* Firmware Device Log */
+	struct fw_devlog_e log[];	/* Firmware Device Log */
 };
 
 /* Dump a Firmaware Device Log entry.
@@ -1812,12 +1813,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 			/* Inner header lookup */
 			if (lookup_type && (lookup_type != DATALKPTYPE_M)) {
 				seq_printf(seq,
-					   "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-					   "%012llx %06x %06x    -    -   %3c"
-					   "      'I'  %4x   "
-					   "%3c   %#x%4u%4d", idx, addr[0],
-					   addr[1], addr[2], addr[3],
-					   addr[4], addr[5],
+					   "%3u %pM %012llx %06x %06x    -    -   %3c      'I'  %4x   %3c   %#x%4u%4d",
+					   idx, addr,
 					   (unsigned long long)mask,
 					   vniy, (vnix | vniy),
 					   dip_hit ? 'Y' : 'N',
@@ -1829,10 +1826,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 					   T6_VF_G(cls_lo) : -1);
 			} else {
 				seq_printf(seq,
-					   "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-					   "%012llx    -       -   ",
-					   idx, addr[0], addr[1], addr[2],
-					   addr[3], addr[4], addr[5],
+					   "%3u %pM %012llx    -       -   ",
+					   idx, addr,
 					   (unsigned long long)mask);
 
 				if (vlan_vld)
@@ -1850,10 +1845,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 					   T6_VF_G(cls_lo) : -1);
 			}
 		} else
-			seq_printf(seq, "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-				   "%012llx%3c   %#x%4u%4d",
-				   idx, addr[0], addr[1], addr[2], addr[3],
-				   addr[4], addr[5], (unsigned long long)mask,
+			seq_printf(seq, "%3u %pM %012llx%3c   %#x%4u%4d",
+				   idx, addr, (unsigned long long)mask,
 				   (cls_lo & SRAM_VLD_F) ? 'Y' : 'N',
 				   PORTMAP_G(cls_hi),
 				   PF_G(cls_lo),
@@ -2386,7 +2379,6 @@ static const struct file_operations rss_vf_config_debugfs_fops = {
 };
 
 #ifdef CONFIG_CHELSIO_T4_DCB
-extern char *dcb_ver_array[];
 
 /* Data Center Briging information for each port.
  */
@@ -2657,32 +2649,19 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
 
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
-	int eth_entries, ctrl_entries, eo_entries = 0;
+	int eth_entries, ctrl_entries, eohw_entries = 0, eosw_entries = 0;
 	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
 	const struct sge_uld_txq_info *utxq_info;
 	const struct sge_uld_rxq_info *urxq_info;
+	struct cxgb4_tc_port_mqprio *port_mqprio;
 	struct adapter *adap = seq->private;
-	int i, n, r = (uintptr_t)v - 1;
+	int i, j, n, r = (uintptr_t)v - 1;
 	struct sge *s = &adap->sge;
 
 	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
-	if (adap->sge.eohw_txq)
-		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
-
-	mutex_lock(&uld_mutex);
-	if (s->uld_txq_info)
-		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
-			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
-
-	if (s->uld_rxq_info) {
-		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
-			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
-			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
-		}
-	}
 
 	if (r)
 		seq_putc(seq, '\n');
@@ -2692,7 +2671,7 @@ do { \
 	seq_printf(seq, "%-12s", s); \
 	for (i = 0; i < n; ++i) \
 		seq_printf(seq, " %16" fmt_spec, v); \
-		seq_putc(seq, '\n'); \
+	seq_putc(seq, '\n'); \
 } while (0)
 #define S(s, v) S3("s", s, v)
 #define T3(fmt_spec, s, v) S3(fmt_spec, s, tx[i].v)
@@ -2759,11 +2738,73 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= eth_entries;
-	if (r < eo_entries) {
+	for_each_port(adap, j) {
+		struct port_info *pi = adap2pinfo(adap, j);
+		const struct sge_eth_rxq *rx;
+
+		mutex_lock(&pi->vi_mirror_mutex);
+		if (!pi->vi_mirror_count) {
+			mutex_unlock(&pi->vi_mirror_mutex);
+			continue;
+		}
+
+		if (r >= DIV_ROUND_UP(pi->nmirrorqsets, 4)) {
+			r -= DIV_ROUND_UP(pi->nmirrorqsets, 4);
+			mutex_unlock(&pi->vi_mirror_mutex);
+			continue;
+		}
+
+		rx = &s->mirror_rxq[j][r * 4];
+		n = min(4, pi->nmirrorqsets - 4 * r);
+
+		S("QType:", "Mirror-Rxq");
+		S("Interface:",
+		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
+		R("RspQ ID:", rspq.abs_id);
+		R("RspQ size:", rspq.size);
+		R("RspQE size:", rspq.iqe_len);
+		R("RspQ CIDX:", rspq.cidx);
+		R("RspQ Gen:", rspq.gen);
+		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+		S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+		R("FL ID:", fl.cntxt_id);
+		R("FL size:", fl.size - 8);
+		R("FL pend:", fl.pend_cred);
+		R("FL avail:", fl.avail);
+		R("FL PIDX:", fl.pidx);
+		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxCSO:", stats.rx_cso);
+		RL("VLANxtract:", stats.vlan_ex);
+		RL("LROmerged:", stats.lro_merged);
+		RL("LROpackets:", stats.lro_pkts);
+		RL("RxDrops:", stats.rx_drops);
+		RL("RxBadPkts:", stats.bad_rx_pkts);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
+		RL("FLStarving:", fl.starving);
+
+		mutex_unlock(&pi->vi_mirror_mutex);
+		goto out;
+	}
+
+	if (!adap->tc_mqprio)
+		goto skip_mqprio;
+
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto skip_mqprio;
+	}
+
+	eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+	if (r < eohw_entries) {
 		int base_qset = r * 4;
 		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
 		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
@@ -2808,10 +2849,71 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto out;
+	}
+
+	r -= eohw_entries;
+	for (j = 0; j < adap->params.nports; j++) {
+		int entries;
+		u8 tc;
+
+		port_mqprio = &adap->tc_mqprio->port_mqprio[j];
+		entries = 0;
+		for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+			entries += port_mqprio->mqprio.qopt.count[tc];
+
+		if (!entries)
+			continue;
+
+		eosw_entries = DIV_ROUND_UP(entries, 4);
+		if (r < eosw_entries) {
+			const struct sge_eosw_txq *tx;
+
+			n = min(4, entries - 4 * r);
+			tx = &port_mqprio->eosw_txq[4 * r];
+
+			S("QType:", "EOSW-TXQ");
+			S("Interface:",
+			  adap->port[j] ? adap->port[j]->name : "N/A");
+			T("EOTID:", hwtid);
+			T("HWQID:", hwqid);
+			T("State:", state);
+			T("Size:", ndesc);
+			T("In-Use:", inuse);
+			T("Credits:", cred);
+			T("Compl:", ncompl);
+			T("Last-Compl:", last_compl);
+			T("PIDX:", pidx);
+			T("Last-PIDX:", last_pidx);
+			T("CIDX:", cidx);
+			T("Last-CIDX:", last_cidx);
+			T("FLOWC-IDX:", flowc_idx);
+
+			mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+			goto out;
+		}
+
+		r -= eosw_entries;
+	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+
+skip_mqprio:
+	if (!is_uld(adap))
+		goto skip_uld;
+
+	mutex_lock(&uld_mutex);
+	if (s->uld_txq_info)
+		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
+			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
+
+	if (s->uld_rxq_info) {
+		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
+			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
+			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
+		}
 	}
 
-	r -= eo_entries;
 	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
 		const struct sge_uld_txq *tx;
 
@@ -2994,6 +3096,9 @@ do { \
 	}
 
 	r -= uld_txq_entries[CXGB4_TX_CRYPTO];
+	mutex_unlock(&uld_mutex);
+
+skip_uld:
 	if (r < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &s->ctrlq[r * 4];
 
@@ -3008,7 +3113,7 @@ do { \
 		TL("TxQFull:", q.stops);
 		TL("TxQRestarts:", q.restarts);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= ctrl_entries;
@@ -3026,11 +3131,9 @@ do { \
 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
 			   s->counter_val[evtq->pktcnt_idx]);
 
-		goto unlock;
+		goto out;
 	}
 
-unlock:
-	mutex_unlock(&uld_mutex);
 #undef R
 #undef RL
 #undef T
@@ -3039,13 +3142,48 @@ unlock:
 #undef R3
 #undef T3
 #undef S3
+out:
+	return 0;
+
+unlock:
+	mutex_unlock(&uld_mutex);
 	return 0;
 }
 
-static int sge_queue_entries(const struct adapter *adap)
+static int sge_queue_entries(struct adapter *adap)
 {
-	int tot_uld_entries = 0;
-	int i;
+	int i, tot_uld_entries = 0, eohw_entries = 0, eosw_entries = 0;
+	int mirror_rxq_entries = 0;
+
+	if (adap->tc_mqprio) {
+		struct cxgb4_tc_port_mqprio *port_mqprio;
+		u8 tc;
+
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+		if (adap->sge.eohw_txq)
+			eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+
+		for (i = 0; i < adap->params.nports; i++) {
+			u32 entries = 0;
+
+			port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+			for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+				entries += port_mqprio->mqprio.qopt.count[tc];
+
+			if (entries)
+				eosw_entries += DIV_ROUND_UP(entries, 4);
+		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+	}
+
+	for_each_port(adap, i) {
+		struct port_info *pi = adap2pinfo(adap, i);
+
+		mutex_lock(&pi->vi_mirror_mutex);
+		if (pi->vi_mirror_count)
+			mirror_rxq_entries += DIV_ROUND_UP(pi->nmirrorqsets, 4);
+		mutex_unlock(&pi->vi_mirror_mutex);
+	}
 
 	if (!is_uld(adap))
 		goto lld_only;
@@ -3061,9 +3199,8 @@ static int sge_queue_entries(const struct adapter *adap)
 	mutex_unlock(&uld_mutex);
 
 lld_only:
-	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
-	       tot_uld_entries +
+	return DIV_ROUND_UP(adap->sge.ethqsets, 4) + mirror_rxq_entries +
+	       eohw_entries + eosw_entries + tot_uld_entries +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
@@ -3244,6 +3381,10 @@ static int tid_info_show(struct seq_file *seq, void *v)
 	if (t->nhpftids)
 		seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
 			   t->hpftid_base + t->nhpftids - 1);
+	if (t->neotids)
+		seq_printf(seq, "EOTID range: %u..%u, in use: %u\n",
+			   t->eotid_base, t->eotid_base + t->neotids - 1,
+			   atomic_read(&t->eotids_in_use));
 	if (t->ntids)
 		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
 			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
@@ -3277,7 +3418,7 @@ static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf,
 		       adap->sge.egr_sz, adap->sge.blocked_fl);
 	len += sprintf(buf + len, "\n");
 	size = simple_read_from_buffer(ubuf, count, ppos, buf, len);
-	kvfree(buf);
+	kfree(buf);
 	return size;
 }
 
@@ -3288,18 +3429,18 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf,
 	unsigned long *t;
 	struct adapter *adap = filp->private_data;
 
-	t = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz), sizeof(long), GFP_KERNEL);
+	t = bitmap_zalloc(adap->sge.egr_sz, GFP_KERNEL);
 	if (!t)
 		return -ENOMEM;
 
 	err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz);
 	if (err) {
-		kvfree(t);
+		bitmap_free(t);
 		return err;
 	}
 
 	bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
-	kvfree(t);
+	bitmap_free(t);
 	return count;
 }
 
@@ -3386,6 +3527,10 @@ DEFINE_SHOW_ATTRIBUTE(meminfo);
 
 static int chcr_stats_show(struct seq_file *seq, void *v)
 {
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	struct ch_ktls_port_stats_debug *ktls_port;
+	int i = 0;
+#endif
 	struct adapter *adap = seq->private;
 
 	seq_puts(seq, "Chelsio Crypto Accelerator Stats \n");
@@ -3401,15 +3546,47 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
 		   atomic_read(&adap->chcr_stats.error));
 	seq_printf(seq, "Fallback: %10u \n",
 		   atomic_read(&adap->chcr_stats.fallback));
-	seq_printf(seq, "IPSec PDU: %10u\n",
-		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	seq_printf(seq, "TLS PDU Tx: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_pdu_tx));
 	seq_printf(seq, "TLS PDU Rx: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_pdu_rx));
 	seq_printf(seq, "TLS Keys (DDR) Count: %10u\n",
 		   atomic_read(&adap->chcr_stats.tls_key));
-
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	seq_puts(seq, "\nChelsio Inline IPsec Crypto Accelerator Stats\n");
+	seq_printf(seq, "IPSec PDU: %10u\n",
+		   atomic_read(&adap->ch_ipsec_stats.ipsec_cnt));
+#endif
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	seq_puts(seq, "\nChelsio KTLS Crypto Accelerator Stats\n");
+	seq_printf(seq, "Tx TLS offload refcount:          %20u\n",
+		   refcount_read(&adap->chcr_ktls.ktls_refcount));
+	seq_printf(seq, "Tx records send:                  %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_send_records));
+	seq_printf(seq, "Tx partial start of records:      %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_start_pkts));
+	seq_printf(seq, "Tx partial middle of records:     %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_middle_pkts));
+	seq_printf(seq, "Tx partial end of record:         %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_end_pkts));
+	seq_printf(seq, "Tx complete records:              %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
+	seq_printf(seq, "TX trim pkts :                    %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
+	seq_printf(seq, "TX sw fallback :                  %20llu\n",
+		   atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback));
+	while (i < MAX_NPORTS) {
+		ktls_port = &adap->ch_ktls_stats.ktls_port[i];
+		seq_printf(seq, "Port %d\n", i);
+		seq_printf(seq, "Tx connection created:            %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_open));
+		seq_printf(seq, "Tx connection failed:             %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_fail));
+		seq_printf(seq, "Tx connection closed:             %20llu\n",
+			   atomic64_read(&ktls_port->ktls_tx_connection_close));
+		i++;
+	}
+#endif
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(chcr_stats);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
index ba95e13d52da..1471cf0deb58 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
@@ -49,7 +49,7 @@ struct seq_tab {
 	unsigned int rows;        /* # of entries */
 	unsigned char width;      /* size in bytes of each entry */
 	unsigned char skip_first; /* whether the first line is a header */
-	char data[0];             /* the table data */
+	char data[];             /* the table data */
 };
 
 static inline unsigned int hex2val(char c)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index c837382ee522..8477a93cee6b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -10,6 +10,8 @@
 #include "t4_regs.h"
 #include "t4fw_api.h"
 #include "cxgb4_cudbg.h"
+#include "cxgb4_filter.h"
+#include "cxgb4_tc_flower.h"
 
 #define EEPROM_MAGIC 0x38E2F10C
 
@@ -23,6 +25,23 @@ static void set_msglevel(struct net_device *dev, u32 val)
 	netdev2adap(dev)->msg_enable = val;
 }
 
+enum cxgb4_ethtool_tests {
+	CXGB4_ETHTOOL_LB_TEST,
+	CXGB4_ETHTOOL_MAX_TEST,
+};
+
+static const char cxgb4_selftest_strings[CXGB4_ETHTOOL_MAX_TEST][ETH_GSTRING_LEN] = {
+	"Loop back test (offline)",
+};
+
+static const char * const flash_region_strings[] = {
+	"All",
+	"Firmware",
+	"PHY Firmware",
+	"Boot",
+	"Boot CFG",
+};
+
 static const char stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_octets_ok           ",
 	"tx_frames_ok           ",
@@ -98,6 +117,15 @@ static const char stats_strings[][ETH_GSTRING_LEN] = {
 	"vlan_insertions        ",
 	"gro_packets            ",
 	"gro_merged             ",
+#if  IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	"tx_tls_encrypted_packets",
+	"tx_tls_encrypted_bytes  ",
+	"tx_tls_ctx              ",
+	"tx_tls_ooo              ",
+	"tx_tls_skip_no_sync_data",
+	"tx_tls_drop_no_sync_data",
+	"tx_tls_drop_bypass_req  ",
+#endif
 };
 
 static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
@@ -147,6 +175,8 @@ static int get_sset_count(struct net_device *dev, int sset)
 		       ARRAY_SIZE(loopback_stats_strings);
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(cxgb4_priv_flags_strings);
+	case ETH_SS_TEST:
+		return ARRAY_SIZE(cxgb4_selftest_strings);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -169,16 +199,12 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	struct adapter *adapter = netdev2adap(dev);
 	u32 exprom_vers;
 
-	strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
-	strlcpy(info->version, cxgb4_driver_version,
-		sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(adapter->pdev),
+	strscpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(adapter->pdev),
 		sizeof(info->bus_info));
 	info->regdump_len = get_regs_len(dev);
 
-	if (!adapter->params.fw_vers)
-		strcpy(info->fw_version, "N/A");
-	else
+	if (adapter->params.fw_vers)
 		snprintf(info->fw_version, sizeof(info->fw_version),
 			 "%u.%u.%u.%u, TP %u.%u.%u.%u",
 			 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
@@ -213,6 +239,9 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	} else if (stringset == ETH_SS_PRIV_FLAGS) {
 		memcpy(data, cxgb4_priv_flags_strings,
 		       sizeof(cxgb4_priv_flags_strings));
+	} else if (stringset == ETH_SS_TEST) {
+		memcpy(data, cxgb4_selftest_strings,
+		       sizeof(cxgb4_selftest_strings));
 	}
 }
 
@@ -228,6 +257,15 @@ struct queue_port_stats {
 	u64 vlan_ins;
 	u64 gro_pkts;
 	u64 gro_merged;
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	u64 tx_tls_encrypted_packets;
+	u64 tx_tls_encrypted_bytes;
+	u64 tx_tls_ctx;
+	u64 tx_tls_ooo;
+	u64 tx_tls_skip_no_sync_data;
+	u64 tx_tls_drop_no_sync_data;
+	u64 tx_tls_drop_bypass_req;
+#endif
 };
 
 struct adapter_stats {
@@ -244,6 +282,9 @@ static void collect_sge_port_stats(const struct adapter *adap,
 {
 	const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
 	const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	const struct ch_ktls_port_stats_debug *ktls_stats;
+#endif
 	struct sge_eohw_txq *eohw_tx;
 	unsigned int i;
 
@@ -268,6 +309,21 @@ static void collect_sge_port_stats(const struct adapter *adap,
 			s->vlan_ins += eohw_tx->vlan_ins;
 		}
 	}
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	ktls_stats = &adap->ch_ktls_stats.ktls_port[p->port_id];
+	s->tx_tls_encrypted_packets =
+		atomic64_read(&ktls_stats->ktls_tx_encrypted_packets);
+	s->tx_tls_encrypted_bytes =
+		atomic64_read(&ktls_stats->ktls_tx_encrypted_bytes);
+	s->tx_tls_ctx = atomic64_read(&ktls_stats->ktls_tx_ctx);
+	s->tx_tls_ooo = atomic64_read(&ktls_stats->ktls_tx_ooo);
+	s->tx_tls_skip_no_sync_data =
+		atomic64_read(&ktls_stats->ktls_tx_skip_no_sync_data);
+	s->tx_tls_drop_no_sync_data =
+		atomic64_read(&ktls_stats->ktls_tx_drop_no_sync_data);
+	s->tx_tls_drop_bypass_req =
+		atomic64_read(&ktls_stats->ktls_tx_drop_bypass_req);
+#endif
 }
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
@@ -574,7 +630,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 /**
  *	lmm_to_fw_caps - translate ethtool Link Mode Mask to Firmware
  *	capabilities
- *	@et_lmm: ethtool Link Mode Mask
+ *	@link_mode_mask: ethtool Link Mode Mask
  *
  *	Translate ethtool Link Mode Mask into a Firmware Port capabilities
  *	value.
@@ -834,7 +890,9 @@ static int set_pauseparam(struct net_device *dev,
 	return 0;
 }
 
-static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			  struct kernel_ethtool_ringparam *kernel_e,
+			  struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	const struct sge *s = &pi->adapter->sge;
@@ -850,7 +908,9 @@ static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 	e->tx_pending = s->ethtxq[pi->first_qset].q.size;
 }
 
-static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
+static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e,
+			 struct kernel_ethtool_ringparam *kernel_e,
+			 struct netlink_ext_ack *extack)
 {
 	int i;
 	const struct port_info *pi = netdev_priv(dev);
@@ -1091,7 +1151,9 @@ static int set_dbqtimer_tickval(struct net_device *dev,
 }
 
 static int set_coalesce(struct net_device *dev,
-			struct ethtool_coalesce *coalesce)
+			struct ethtool_coalesce *coalesce,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	int ret;
 
@@ -1107,7 +1169,9 @@ static int set_coalesce(struct net_device *dev,
 				    coalesce->tx_coalesce_usecs);
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+			struct kernel_ethtool_coalesce *kernel_coal,
+			struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	const struct adapter *adap = pi->adapter;
@@ -1221,15 +1285,225 @@ out:
 	return err;
 }
 
-static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
+static int cxgb4_ethtool_flash_bootcfg(struct net_device *netdev,
+				       const u8 *data, u32 size)
 {
+	struct adapter *adap = netdev2adap(netdev);
 	int ret;
-	const struct firmware *fw;
+
+	ret = t4_load_bootcfg(adap, data, size);
+	if (ret)
+		dev_err(adap->pdev_dev, "Failed to load boot cfg image\n");
+
+	return ret;
+}
+
+static int cxgb4_ethtool_flash_boot(struct net_device *netdev,
+				    const u8 *bdata, u32 size)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	unsigned int offset;
+	u8 *data;
+	int ret;
+
+	data = kmemdup(bdata, size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	offset = OFFSET_G(t4_read_reg(adap, PF_REG(0, PCIE_PF_EXPROM_OFST_A)));
+
+	ret = t4_load_boot(adap, data, offset, size);
+	if (ret)
+		dev_err(adap->pdev_dev, "Failed to load boot image\n");
+
+	kfree(data);
+	return ret;
+}
+
+#define CXGB4_PHY_SIG 0x130000ea
+
+static int cxgb4_validate_phy_image(const u8 *data, u32 *size)
+{
+	struct cxgb4_fw_data *header;
+
+	header = (struct cxgb4_fw_data *)data;
+	if (be32_to_cpu(header->signature) != CXGB4_PHY_SIG)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
+				   const u8 *data, u32 size)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	int ret;
+
+	ret = cxgb4_validate_phy_image(data, NULL);
+	if (ret) {
+		dev_err(adap->pdev_dev, "PHY signature mismatch\n");
+		return ret;
+	}
+
+	/* We have to RESET the chip/firmware because we need the
+	 * chip in uninitialized state for loading new PHY image.
+	 * Otherwise, the running firmware will only store the PHY
+	 * image in local RAM which will be lost after next reset.
+	 */
+	ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
+	if (ret < 0) {
+		dev_err(adap->pdev_dev,
+			"Set FW to RESET for flashing PHY FW failed. ret: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
+	if (ret < 0) {
+		dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
+				  const u8 *data, u32 size)
+{
 	struct adapter *adap = netdev2adap(netdev);
 	unsigned int mbox = PCIE_FW_MASTER_M + 1;
-	u32 pcie_fw;
+	int ret;
+
+	/* If the adapter has been fully initialized then we'll go ahead and
+	 * try to get the firmware's cooperation in upgrading to the new
+	 * firmware image otherwise we'll try to do the entire job from the
+	 * host ... and we always "force" the operation in this path.
+	 */
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
+		mbox = adap->mbox;
+
+	ret = t4_fw_upgrade(adap, mbox, data, size, 1);
+	if (ret)
+		dev_err(adap->pdev_dev,
+			"Failed to flash firmware\n");
+
+	return ret;
+}
+
+static int cxgb4_ethtool_flash_region(struct net_device *netdev,
+				      const u8 *data, u32 size, u32 region)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	int ret;
+
+	switch (region) {
+	case CXGB4_ETHTOOL_FLASH_FW:
+		ret = cxgb4_ethtool_flash_fw(netdev, data, size);
+		break;
+	case CXGB4_ETHTOOL_FLASH_PHY:
+		ret = cxgb4_ethtool_flash_phy(netdev, data, size);
+		break;
+	case CXGB4_ETHTOOL_FLASH_BOOT:
+		ret = cxgb4_ethtool_flash_boot(netdev, data, size);
+		break;
+	case CXGB4_ETHTOOL_FLASH_BOOTCFG:
+		ret = cxgb4_ethtool_flash_bootcfg(netdev, data, size);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	if (!ret)
+		dev_info(adap->pdev_dev,
+			 "loading %s successful, reload cxgb4 driver\n",
+			 flash_region_strings[region]);
+	return ret;
+}
+
+#define CXGB4_FW_SIG 0x4368656c
+#define CXGB4_FW_SIG_OFFSET 0x160
+
+static int cxgb4_validate_fw_image(const u8 *data, u32 *size)
+{
+	struct cxgb4_fw_data *header;
+
+	header = (struct cxgb4_fw_data *)&data[CXGB4_FW_SIG_OFFSET];
+	if (be32_to_cpu(header->signature) != CXGB4_FW_SIG)
+		return -EINVAL;
+
+	if (size)
+		*size = be16_to_cpu(((struct fw_hdr *)data)->len512) * 512;
+
+	return 0;
+}
+
+static int cxgb4_validate_bootcfg_image(const u8 *data, u32 *size)
+{
+	struct cxgb4_bootcfg_data *header;
+
+	header = (struct cxgb4_bootcfg_data *)data;
+	if (le16_to_cpu(header->signature) != BOOT_CFG_SIG)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cxgb4_validate_boot_image(const u8 *data, u32 *size)
+{
+	struct cxgb4_pci_exp_rom_header *exp_header;
+	struct cxgb4_pcir_data *pcir_header;
+	struct legacy_pci_rom_hdr *header;
+	const u8 *cur_header = data;
+	u16 pcir_offset;
+
+	exp_header = (struct cxgb4_pci_exp_rom_header *)data;
+
+	if (le16_to_cpu(exp_header->signature) != BOOT_SIGNATURE)
+		return -EINVAL;
+
+	if (size) {
+		do {
+			header = (struct legacy_pci_rom_hdr *)cur_header;
+			pcir_offset = le16_to_cpu(header->pcir_offset);
+			pcir_header = (struct cxgb4_pcir_data *)(cur_header +
+				      pcir_offset);
+
+			*size += header->size512 * 512;
+			cur_header += header->size512 * 512;
+		} while (!(pcir_header->indicator & CXGB4_HDR_INDI));
+	}
+
+	return 0;
+}
+
+static int cxgb4_ethtool_get_flash_region(const u8 *data, u32 *size)
+{
+	if (!cxgb4_validate_fw_image(data, size))
+		return CXGB4_ETHTOOL_FLASH_FW;
+	if (!cxgb4_validate_boot_image(data, size))
+		return CXGB4_ETHTOOL_FLASH_BOOT;
+	if (!cxgb4_validate_phy_image(data, size))
+		return CXGB4_ETHTOOL_FLASH_PHY;
+	if (!cxgb4_validate_bootcfg_image(data, size))
+		return CXGB4_ETHTOOL_FLASH_BOOTCFG;
+
+	return -EOPNOTSUPP;
+}
+
+static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	const struct firmware *fw;
 	unsigned int master;
 	u8 master_vld = 0;
+	const u8 *fw_data;
+	size_t fw_size;
+	u32 size = 0;
+	u32 pcie_fw;
+	int region;
+	int ret;
 
 	pcie_fw = t4_read_reg(adap, PCIE_FW_A);
 	master = PCIE_FW_MASTER_G(pcie_fw);
@@ -1247,19 +1521,32 @@ static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
 	if (ret < 0)
 		return ret;
 
-	/* If the adapter has been fully initialized then we'll go ahead and
-	 * try to get the firmware's cooperation in upgrading to the new
-	 * firmware image otherwise we'll try to do the entire job from the
-	 * host ... and we always "force" the operation in this path.
-	 */
-	if (adap->flags & CXGB4_FULL_INIT_DONE)
-		mbox = adap->mbox;
+	fw_data = fw->data;
+	fw_size = fw->size;
+	if (ef->region == ETHTOOL_FLASH_ALL_REGIONS) {
+		while (fw_size > 0) {
+			size = 0;
+			region = cxgb4_ethtool_get_flash_region(fw_data, &size);
+			if (region < 0 || !size) {
+				ret = region;
+				goto out_free_fw;
+			}
+
+			ret = cxgb4_ethtool_flash_region(netdev, fw_data, size,
+							 region);
+			if (ret)
+				goto out_free_fw;
+
+			fw_data += size;
+			fw_size -= size;
+		}
+	} else {
+		ret = cxgb4_ethtool_flash_region(netdev, fw_data, fw_size,
+						 ef->region);
+	}
 
-	ret = t4_fw_upgrade(adap, mbox, fw->data, fw->size, 1);
+out_free_fw:
 	release_firmware(fw);
-	if (!ret)
-		dev_info(adap->pdev_dev,
-			 "loaded firmware %s, reload cxgb4 driver\n", ef->data);
 	return ret;
 }
 
@@ -1341,10 +1628,118 @@ static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key,
 	return -EPERM;
 }
 
+static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
+						   u32 ftid)
+{
+	struct tid_info *t = &adap->tids;
+
+	if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
+		return &t->hpftid_tab[ftid - t->hpftid_base];
+
+	if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
+		return &t->ftid_tab[ftid - t->ftid_base];
+
+	return lookup_tid(t, ftid);
+}
+
+static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
+				   struct ch_filter_specification *dfs)
+{
+	switch (dfs->val.proto) {
+	case IPPROTO_TCP:
+		if (dfs->type)
+			fs->flow_type = TCP_V6_FLOW;
+		else
+			fs->flow_type = TCP_V4_FLOW;
+		break;
+	case IPPROTO_UDP:
+		if (dfs->type)
+			fs->flow_type = UDP_V6_FLOW;
+		else
+			fs->flow_type = UDP_V4_FLOW;
+		break;
+	}
+
+	if (dfs->type) {
+		fs->h_u.tcp_ip6_spec.psrc = cpu_to_be16(dfs->val.fport);
+		fs->m_u.tcp_ip6_spec.psrc = cpu_to_be16(dfs->mask.fport);
+		fs->h_u.tcp_ip6_spec.pdst = cpu_to_be16(dfs->val.lport);
+		fs->m_u.tcp_ip6_spec.pdst = cpu_to_be16(dfs->mask.lport);
+		memcpy(&fs->h_u.tcp_ip6_spec.ip6src, &dfs->val.fip[0],
+		       sizeof(fs->h_u.tcp_ip6_spec.ip6src));
+		memcpy(&fs->m_u.tcp_ip6_spec.ip6src, &dfs->mask.fip[0],
+		       sizeof(fs->m_u.tcp_ip6_spec.ip6src));
+		memcpy(&fs->h_u.tcp_ip6_spec.ip6dst, &dfs->val.lip[0],
+		       sizeof(fs->h_u.tcp_ip6_spec.ip6dst));
+		memcpy(&fs->m_u.tcp_ip6_spec.ip6dst, &dfs->mask.lip[0],
+		       sizeof(fs->m_u.tcp_ip6_spec.ip6dst));
+		fs->h_u.tcp_ip6_spec.tclass = dfs->val.tos;
+		fs->m_u.tcp_ip6_spec.tclass = dfs->mask.tos;
+	} else {
+		fs->h_u.tcp_ip4_spec.psrc = cpu_to_be16(dfs->val.fport);
+		fs->m_u.tcp_ip4_spec.psrc = cpu_to_be16(dfs->mask.fport);
+		fs->h_u.tcp_ip4_spec.pdst = cpu_to_be16(dfs->val.lport);
+		fs->m_u.tcp_ip4_spec.pdst = cpu_to_be16(dfs->mask.lport);
+		memcpy(&fs->h_u.tcp_ip4_spec.ip4src, &dfs->val.fip[0],
+		       sizeof(fs->h_u.tcp_ip4_spec.ip4src));
+		memcpy(&fs->m_u.tcp_ip4_spec.ip4src, &dfs->mask.fip[0],
+		       sizeof(fs->m_u.tcp_ip4_spec.ip4src));
+		memcpy(&fs->h_u.tcp_ip4_spec.ip4dst, &dfs->val.lip[0],
+		       sizeof(fs->h_u.tcp_ip4_spec.ip4dst));
+		memcpy(&fs->m_u.tcp_ip4_spec.ip4dst, &dfs->mask.lip[0],
+		       sizeof(fs->m_u.tcp_ip4_spec.ip4dst));
+		fs->h_u.tcp_ip4_spec.tos = dfs->val.tos;
+		fs->m_u.tcp_ip4_spec.tos = dfs->mask.tos;
+	}
+	fs->h_ext.vlan_tci = cpu_to_be16(dfs->val.ivlan);
+	fs->m_ext.vlan_tci = cpu_to_be16(dfs->mask.ivlan);
+	fs->flow_type |= FLOW_EXT;
+
+	if (dfs->action == FILTER_DROP)
+		fs->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fs->ring_cookie = dfs->iq;
+}
+
+static int cxgb4_ntuple_get_filter(struct net_device *dev,
+				   struct ethtool_rxnfc *cmd,
+				   unsigned int loc)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct filter_entry *f;
+	int ftid;
+
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
+		return -EAGAIN;
+
+	/* Check for maximum filter range */
+	if (!adap->ethtool_filters)
+		return -EOPNOTSUPP;
+
+	if (loc >= adap->ethtool_filters->nentries)
+		return -ERANGE;
+
+	if (!test_bit(loc, adap->ethtool_filters->port[pi->port_id].bmap))
+		return -ENOENT;
+
+	ftid = adap->ethtool_filters->port[pi->port_id].loc_array[loc];
+
+	/* Fetch filter_entry */
+	f = cxgb4_get_filter_entry(adap, ftid);
+
+	cxgb4_fill_filter_rule(&cmd->fs, &f->fs);
+
+	return 0;
+}
+
 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 		     u32 *rules)
 {
 	const struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = netdev2adap(dev);
+	unsigned int count = 0, index = 0;
+	int ret = 0;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXFH: {
@@ -1400,10 +1795,154 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 	case ETHTOOL_GRXRINGS:
 		info->data = pi->nqsets;
 		return 0;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt =
+		       adap->ethtool_filters->port[pi->port_id].in_use;
+		return 0;
+	case ETHTOOL_GRXCLSRULE:
+		return cxgb4_ntuple_get_filter(dev, info, info->fs.location);
+	case ETHTOOL_GRXCLSRLALL:
+		info->data = adap->ethtool_filters->nentries;
+		while (count < info->rule_cnt) {
+			ret = cxgb4_ntuple_get_filter(dev, info, index);
+			if (!ret)
+				rules[count++] = index;
+			index++;
+		}
+		return 0;
 	}
+
 	return -EOPNOTSUPP;
 }
 
+static int cxgb4_ntuple_del_filter(struct net_device *dev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct cxgb4_ethtool_filter_info *filter_info;
+	struct adapter *adapter = netdev2adap(dev);
+	struct port_info *pi = netdev_priv(dev);
+	struct filter_entry *f;
+	u32 filter_id;
+	int ret;
+
+	if (!(adapter->flags & CXGB4_FULL_INIT_DONE))
+		return -EAGAIN;  /* can still change nfilters */
+
+	if (!adapter->ethtool_filters)
+		return -EOPNOTSUPP;
+
+	if (cmd->fs.location >= adapter->ethtool_filters->nentries) {
+		dev_err(adapter->pdev_dev,
+			"Location must be < %u",
+			adapter->ethtool_filters->nentries);
+		return -ERANGE;
+	}
+
+	filter_info = &adapter->ethtool_filters->port[pi->port_id];
+
+	if (!test_bit(cmd->fs.location, filter_info->bmap))
+		return -ENOENT;
+
+	filter_id = filter_info->loc_array[cmd->fs.location];
+	f = cxgb4_get_filter_entry(adapter, filter_id);
+
+	if (f->fs.prio)
+		filter_id -= adapter->tids.hpftid_base;
+	else if (!f->fs.hash)
+		filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
+	ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
+	if (ret)
+		goto err;
+
+	clear_bit(cmd->fs.location, filter_info->bmap);
+	filter_info->in_use--;
+
+err:
+	return ret;
+}
+
+/* Add Ethtool n-tuple filters. */
+static int cxgb4_ntuple_set_filter(struct net_device *netdev,
+				   struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec_input input = {};
+	struct cxgb4_ethtool_filter_info *filter_info;
+	struct adapter *adapter = netdev2adap(netdev);
+	struct port_info *pi = netdev_priv(netdev);
+	struct ch_filter_specification fs;
+	struct ethtool_rx_flow_rule *flow;
+	u32 tid;
+	int ret;
+
+	if (!(adapter->flags & CXGB4_FULL_INIT_DONE))
+		return -EAGAIN;  /* can still change nfilters */
+
+	if (!adapter->ethtool_filters)
+		return -EOPNOTSUPP;
+
+	if (cmd->fs.location >= adapter->ethtool_filters->nentries) {
+		dev_err(adapter->pdev_dev,
+			"Location must be < %u",
+			adapter->ethtool_filters->nentries);
+		return -ERANGE;
+	}
+
+	if (test_bit(cmd->fs.location,
+		     adapter->ethtool_filters->port[pi->port_id].bmap))
+		return -EEXIST;
+
+	memset(&fs, 0, sizeof(fs));
+
+	input.fs = &cmd->fs;
+	flow = ethtool_rx_flow_rule_create(&input);
+	if (IS_ERR(flow)) {
+		ret = PTR_ERR(flow);
+		goto exit;
+	}
+
+	fs.hitcnts = 1;
+
+	ret = cxgb4_flow_rule_replace(netdev, flow->rule, cmd->fs.location,
+				      NULL, &fs, &tid);
+	if (ret)
+		goto free;
+
+	filter_info = &adapter->ethtool_filters->port[pi->port_id];
+
+	if (fs.prio)
+		tid += adapter->tids.hpftid_base;
+	else if (!fs.hash)
+		tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
+	filter_info->loc_array[cmd->fs.location] = tid;
+	set_bit(cmd->fs.location, filter_info->bmap);
+	filter_info->in_use++;
+
+free:
+	ethtool_rx_flow_rule_destroy(flow);
+exit:
+	return ret;
+}
+
+static int set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = cxgb4_ntuple_set_filter(dev, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = cxgb4_ntuple_del_filter(dev, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump)
 {
 	struct adapter *adapter = netdev2adap(dev);
@@ -1454,6 +1993,15 @@ static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
 	return 0;
 }
 
+static bool cxgb4_fw_mod_type_info_available(unsigned int fw_mod_type)
+{
+	/* Read port module EEPROM as long as it is plugged-in and
+	 * safe to read.
+	 */
+	return (fw_mod_type != FW_PORT_MOD_TYPE_NONE &&
+		fw_mod_type != FW_PORT_MOD_TYPE_ERROR);
+}
+
 static int cxgb4_get_module_info(struct net_device *dev,
 				 struct ethtool_modinfo *modinfo)
 {
@@ -1462,7 +2010,7 @@ static int cxgb4_get_module_info(struct net_device *dev,
 	struct adapter *adapter = pi->adapter;
 	int ret;
 
-	if (!t4_is_inserted_mod_type(pi->mod_type))
+	if (!cxgb4_fw_mod_type_info_available(pi->mod_type))
 		return -EINVAL;
 
 	switch (pi->port_type) {
@@ -1480,12 +2028,15 @@ static int cxgb4_get_module_info(struct net_device *dev,
 		if (ret)
 			return ret;
 
-		if (!sff8472_comp || (sff_diag_type & 4)) {
+		if (!sff8472_comp || (sff_diag_type & SFP_DIAG_ADDRMODE)) {
 			modinfo->type = ETH_MODULE_SFF_8079;
 			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
 		} else {
 			modinfo->type = ETH_MODULE_SFF_8472;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+			if (sff_diag_type & SFP_DIAG_IMPLEMENTED)
+				modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+			else
+				modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2;
 		}
 		break;
 
@@ -1579,7 +2130,49 @@ static int cxgb4_set_priv_flags(struct net_device *netdev, u32 flags)
 	return 0;
 }
 
+static void cxgb4_lb_test(struct net_device *netdev, u64 *lb_status)
+{
+	int dev_state = netif_running(netdev);
+
+	if (dev_state) {
+		netif_tx_stop_all_queues(netdev);
+		netif_carrier_off(netdev);
+	}
+
+	*lb_status = cxgb4_selftest_lb_pkt(netdev);
+
+	if (dev_state) {
+		netif_tx_start_all_queues(netdev);
+		netif_carrier_on(netdev);
+	}
+}
+
+static void cxgb4_self_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct port_info *pi = netdev_priv(netdev);
+	struct adapter *adap = pi->adapter;
+
+	memset(data, 0, sizeof(u64) * CXGB4_ETHTOOL_MAX_TEST);
+
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE) ||
+	    !(adap->flags & CXGB4_FW_OK)) {
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+	if (eth_test->flags & ETH_TEST_FL_OFFLINE)
+		cxgb4_lb_test(netdev, &data[CXGB4_ETHTOOL_LB_TEST]);
+
+	if (data[CXGB4_ETHTOOL_LB_TEST])
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+}
+
 static const struct ethtool_ops cxgb_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_RX_MAX_FRAMES |
+				     ETHTOOL_COALESCE_TX_USECS_IRQ |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
 	.get_link_ksettings = get_link_ksettings,
 	.set_link_ksettings = set_link_ksettings,
 	.get_fecparam      = get_fecparam,
@@ -1605,9 +2198,11 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_regs_len      = get_regs_len,
 	.get_regs          = get_regs,
 	.get_rxnfc         = get_rxnfc,
+	.set_rxnfc         = set_rxnfc,
 	.get_rxfh_indir_size = get_rss_table_size,
 	.get_rxfh	   = get_rss_table,
 	.set_rxfh	   = set_rss_table,
+	.self_test	   = cxgb4_self_test,
 	.flash_device      = set_flash,
 	.get_ts_info       = get_ts_info,
 	.set_dump          = set_dump,
@@ -1619,6 +2214,85 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.set_priv_flags    = cxgb4_set_priv_flags,
 };
 
+void cxgb4_cleanup_ethtool_filters(struct adapter *adap)
+{
+	struct cxgb4_ethtool_filter_info *eth_filter_info;
+	u8 i;
+
+	if (!adap->ethtool_filters)
+		return;
+
+	eth_filter_info = adap->ethtool_filters->port;
+
+	if (eth_filter_info) {
+		for (i = 0; i < adap->params.nports; i++) {
+			kvfree(eth_filter_info[i].loc_array);
+			bitmap_free(eth_filter_info[i].bmap);
+		}
+		kfree(eth_filter_info);
+	}
+
+	kfree(adap->ethtool_filters);
+}
+
+int cxgb4_init_ethtool_filters(struct adapter *adap)
+{
+	struct cxgb4_ethtool_filter_info *eth_filter_info;
+	struct cxgb4_ethtool_filter *eth_filter;
+	struct tid_info *tids = &adap->tids;
+	u32 nentries, i;
+	int ret;
+
+	eth_filter = kzalloc(sizeof(*eth_filter), GFP_KERNEL);
+	if (!eth_filter)
+		return -ENOMEM;
+
+	eth_filter_info = kcalloc(adap->params.nports,
+				  sizeof(*eth_filter_info),
+				  GFP_KERNEL);
+	if (!eth_filter_info) {
+		ret = -ENOMEM;
+		goto free_eth_filter;
+	}
+
+	eth_filter->port = eth_filter_info;
+
+	nentries = tids->nhpftids + tids->nftids;
+	if (is_hashfilter(adap))
+		nentries += tids->nhash +
+			    (adap->tids.stid_base - adap->tids.tid_base);
+	eth_filter->nentries = nentries;
+
+	for (i = 0; i < adap->params.nports; i++) {
+		eth_filter->port[i].loc_array = kvzalloc(nentries, GFP_KERNEL);
+		if (!eth_filter->port[i].loc_array) {
+			ret = -ENOMEM;
+			goto free_eth_finfo;
+		}
+
+		eth_filter->port[i].bmap = bitmap_zalloc(nentries, GFP_KERNEL);
+		if (!eth_filter->port[i].bmap) {
+			ret = -ENOMEM;
+			goto free_eth_finfo;
+		}
+	}
+
+	adap->ethtool_filters = eth_filter;
+	return 0;
+
+free_eth_finfo:
+	while (i-- > 0) {
+		bitmap_free(eth_filter->port[i].bmap);
+		kvfree(eth_filter->port[i].loc_array);
+	}
+	kfree(eth_filter_info);
+
+free_eth_filter:
+	kfree(eth_filter);
+
+	return ret;
+}
+
 void cxgb4_set_ethtool_ops(struct net_device *netdev)
 {
 	netdev->ethtool_ops = &cxgb_ethtool_ops;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 2a2938bbb93a..786ceae34488 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
 	int err;
 
 	/* do a set-tcb for smac-sel and CWR bit.. */
-	err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
-	if (err)
-		goto smac_err;
-
 	err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
 			    TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
 			    TCB_SMAC_SEL_V(f->smt->idx), 1);
+	if (err)
+		goto smac_err;
+
+	err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
 	if (!err)
 		return 0;
 
@@ -165,37 +165,40 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 			   unsigned int tid, bool dip, bool sip, bool dp,
 			   bool sp)
 {
+	u8 *nat_lp = (u8 *)&f->fs.nat_lport;
+	u8 *nat_fp = (u8 *)&f->fs.nat_fport;
+
 	if (dip) {
 		if (f->fs.type) {
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W,
 				      WORD_MASK, f->fs.nat_lip[15] |
 				      f->fs.nat_lip[14] << 8 |
 				      f->fs.nat_lip[13] << 16 |
-				      f->fs.nat_lip[12] << 24, 1);
+				      (u64)f->fs.nat_lip[12] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1,
 				      WORD_MASK, f->fs.nat_lip[11] |
 				      f->fs.nat_lip[10] << 8 |
 				      f->fs.nat_lip[9] << 16 |
-				      f->fs.nat_lip[8] << 24, 1);
+				      (u64)f->fs.nat_lip[8] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2,
 				      WORD_MASK, f->fs.nat_lip[7] |
 				      f->fs.nat_lip[6] << 8 |
 				      f->fs.nat_lip[5] << 16 |
-				      f->fs.nat_lip[4] << 24, 1);
+				      (u64)f->fs.nat_lip[4] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3,
 				      WORD_MASK, f->fs.nat_lip[3] |
 				      f->fs.nat_lip[2] << 8 |
 				      f->fs.nat_lip[1] << 16 |
-				      f->fs.nat_lip[0] << 24, 1);
+				      (u64)f->fs.nat_lip[0] << 24, 1);
 		} else {
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W,
 				      WORD_MASK, f->fs.nat_lip[3] |
 				      f->fs.nat_lip[2] << 8 |
 				      f->fs.nat_lip[1] << 16 |
-				      f->fs.nat_lip[0] << 24, 1);
+				      (u64)f->fs.nat_lip[0] << 24, 1);
 		}
 	}
 
@@ -205,25 +208,25 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_fip[15] |
 				      f->fs.nat_fip[14] << 8 |
 				      f->fs.nat_fip[13] << 16 |
-				      f->fs.nat_fip[12] << 24, 1);
+				      (u64)f->fs.nat_fip[12] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1,
 				      WORD_MASK, f->fs.nat_fip[11] |
 				      f->fs.nat_fip[10] << 8 |
 				      f->fs.nat_fip[9] << 16 |
-				      f->fs.nat_fip[8] << 24, 1);
+				      (u64)f->fs.nat_fip[8] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2,
 				      WORD_MASK, f->fs.nat_fip[7] |
 				      f->fs.nat_fip[6] << 8 |
 				      f->fs.nat_fip[5] << 16 |
-				      f->fs.nat_fip[4] << 24, 1);
+				      (u64)f->fs.nat_fip[4] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3,
 				      WORD_MASK, f->fs.nat_fip[3] |
 				      f->fs.nat_fip[2] << 8 |
 				      f->fs.nat_fip[1] << 16 |
-				      f->fs.nat_fip[0] << 24, 1);
+				      (u64)f->fs.nat_fip[0] << 24, 1);
 
 		} else {
 			set_tcb_field(adap, f, tid,
@@ -231,13 +234,14 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_fip[3] |
 				      f->fs.nat_fip[2] << 8 |
 				      f->fs.nat_fip[1] << 16 |
-				      f->fs.nat_fip[0] << 24, 1);
+				      (u64)f->fs.nat_fip[0] << 24, 1);
 		}
 	}
 
 	set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK,
-		      (dp ? f->fs.nat_lport : 0) |
-		      (sp ? f->fs.nat_fport << 16 : 0), 1);
+		      (dp ? (nat_lp[1] | nat_lp[0] << 8) : 0) |
+		      (sp ? (nat_fp[1] << 16 | (u64)nat_fp[0] << 24) : 0),
+		      1);
 }
 
 /* Validate filter spec against configuration done on the card. */
@@ -438,13 +442,118 @@ int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx,
 	return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash);
 }
 
-int cxgb4_get_free_ftid(struct net_device *dev, int family)
+static bool cxgb4_filter_prio_in_range(struct tid_info *t, u32 idx, u8 nslots,
+				       u32 prio)
+{
+	struct filter_entry *prev_tab, *next_tab, *prev_fe, *next_fe;
+	u32 prev_ftid, next_ftid;
+
+	/* Only insert the rule if both of the following conditions
+	 * are met:
+	 * 1. The immediate previous rule has priority <= @prio.
+	 * 2. The immediate next rule has priority >= @prio.
+	 */
+
+	/* High Priority (HPFILTER) region always has higher priority
+	 * than normal FILTER region. So, all rules in HPFILTER region
+	 * must have prio value <= rules in normal FILTER region.
+	 */
+	if (idx < t->nhpftids) {
+		/* Don't insert if there's a rule already present at @idx
+		 * in HPFILTER region.
+		 */
+		if (test_bit(idx, t->hpftid_bmap))
+			return false;
+
+		next_tab = t->hpftid_tab;
+		next_ftid = find_next_bit(t->hpftid_bmap, t->nhpftids, idx);
+		if (next_ftid >= t->nhpftids) {
+			/* No next entry found in HPFILTER region.
+			 * See if there's any next entry in normal
+			 * FILTER region.
+			 */
+			next_ftid = find_first_bit(t->ftid_bmap, t->nftids);
+			if (next_ftid >= t->nftids)
+				next_ftid = idx;
+			else
+				next_tab = t->ftid_tab;
+		}
+
+		/* Search for the closest previous filter entry in HPFILTER
+		 * region. No need to search in normal FILTER region because
+		 * there can never be any entry in normal FILTER region whose
+		 * prio value is < last entry in HPFILTER region.
+		 */
+		prev_ftid = find_last_bit(t->hpftid_bmap, idx);
+		if (prev_ftid >= idx)
+			prev_ftid = idx;
+
+		prev_tab = t->hpftid_tab;
+	} else {
+		idx -= t->nhpftids;
+
+		/* Don't insert if there's a rule already present at @idx
+		 * in normal FILTER region.
+		 */
+		if (test_bit(idx, t->ftid_bmap))
+			return false;
+
+		prev_tab = t->ftid_tab;
+		prev_ftid = find_last_bit(t->ftid_bmap, idx);
+		if (prev_ftid >= idx) {
+			/* No previous entry found in normal FILTER
+			 * region. See if there's any previous entry
+			 * in HPFILTER region.
+			 */
+			prev_ftid = find_last_bit(t->hpftid_bmap, t->nhpftids);
+			if (prev_ftid >= t->nhpftids)
+				prev_ftid = idx;
+			else
+				prev_tab = t->hpftid_tab;
+		}
+
+		/* Search for the closest next filter entry in normal
+		 * FILTER region. No need to search in HPFILTER region
+		 * because there can never be any entry in HPFILTER
+		 * region whose prio value is > first entry in normal
+		 * FILTER region.
+		 */
+		next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx);
+		if (next_ftid >= t->nftids)
+			next_ftid = idx;
+
+		next_tab = t->ftid_tab;
+	}
+
+	next_fe = &next_tab[next_ftid];
+
+	/* See if the filter entry belongs to an IPv6 rule, which
+	 * occupy 4 slots on T5 and 2 slots on T6. Adjust the
+	 * reference to the previously inserted filter entry
+	 * accordingly.
+	 */
+	prev_fe = &prev_tab[prev_ftid & ~(nslots - 1)];
+	if (!prev_fe->fs.type)
+		prev_fe = &prev_tab[prev_ftid];
+
+	if ((prev_fe->valid && prev_fe->fs.tc_prio > prio) ||
+	    (next_fe->valid && next_fe->fs.tc_prio < prio))
+		return false;
+
+	return true;
+}
+
+int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en,
+			u32 tc_prio)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct tid_info *t = &adap->tids;
+	u32 bmap_ftid, max_ftid;
+	struct filter_entry *f;
+	unsigned long *bmap;
 	bool found = false;
-	u8 i, n, cnt;
-	int ftid;
+	u8 i, cnt, n;
+	int ftid = 0;
 
 	/* IPv4 occupy 1 slot. IPv6 occupy 2 slots on T6 and 4 slots
 	 * on T5.
@@ -456,34 +565,124 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family)
 			n += 2;
 	}
 
-	if (n > t->nftids)
-		return -ENOMEM;
-
-	/* Find free filter slots from the end of TCAM. Appropriate
-	 * checks must be done by caller later to ensure the prio
-	 * passed by TC doesn't conflict with prio saved by existing
-	 * rules in the TCAM.
+	/* There are 3 filter regions available in hardware in
+	 * following order of priority:
+	 *
+	 * 1. High Priority (HPFILTER) region (Highest Priority).
+	 * 2. HASH region.
+	 * 3. Normal FILTER region (Lowest Priority).
+	 *
+	 * Entries in HPFILTER and normal FILTER region have index
+	 * 0 as the highest priority and the rules will be scanned
+	 * in ascending order until either a rule hits or end of
+	 * the region is reached.
+	 *
+	 * All HASH region entries have same priority. The set of
+	 * fields to match in headers are pre-determined. The same
+	 * set of header match fields must be compulsorily specified
+	 * in all the rules wanting to get inserted in HASH region.
+	 * Hence, HASH region is an exact-match region. A HASH is
+	 * generated for a rule based on the values in the
+	 * pre-determined set of header match fields. The generated
+	 * HASH serves as an index into the HASH region. There can
+	 * never be 2 rules having the same HASH. Hardware will
+	 * compute a HASH for every incoming packet based on the
+	 * values in the pre-determined set of header match fields
+	 * and uses it as an index to check if there's a rule
+	 * inserted in the HASH region at the specified index. If
+	 * there's a rule inserted, then it's considered as a filter
+	 * hit. Otherwise, it's a filter miss and normal FILTER region
+	 * is scanned afterwards.
 	 */
+
 	spin_lock_bh(&t->ftid_lock);
-	ftid = t->nftids - 1;
-	while (ftid >= n - 1) {
+
+	ftid = (tc_prio <= t->nhpftids) ? 0 : t->nhpftids;
+	max_ftid = t->nftids + t->nhpftids;
+	while (ftid < max_ftid) {
+		if (ftid < t->nhpftids) {
+			/* If the new rule wants to get inserted into
+			 * HPFILTER region, but its prio is greater
+			 * than the rule with the highest prio in HASH
+			 * region, or if there's not enough slots
+			 * available in HPFILTER region, then skip
+			 * trying to insert this rule into HPFILTER
+			 * region and directly go to the next region.
+			 */
+			if ((t->tc_hash_tids_max_prio &&
+			     tc_prio > t->tc_hash_tids_max_prio) ||
+			     (ftid + n) > t->nhpftids) {
+				ftid = t->nhpftids;
+				continue;
+			}
+
+			bmap = t->hpftid_bmap;
+			bmap_ftid = ftid;
+		} else if (hash_en) {
+			/* Ensure priority is >= last rule in HPFILTER
+			 * region.
+			 */
+			ftid = find_last_bit(t->hpftid_bmap, t->nhpftids);
+			if (ftid < t->nhpftids) {
+				f = &t->hpftid_tab[ftid];
+				if (f->valid && tc_prio < f->fs.tc_prio)
+					break;
+			}
+
+			/* Ensure priority is <= first rule in normal
+			 * FILTER region.
+			 */
+			ftid = find_first_bit(t->ftid_bmap, t->nftids);
+			if (ftid < t->nftids) {
+				f = &t->ftid_tab[ftid];
+				if (f->valid && tc_prio > f->fs.tc_prio)
+					break;
+			}
+
+			found = true;
+			ftid = t->nhpftids;
+			goto out_unlock;
+		} else {
+			/* If the new rule wants to get inserted into
+			 * normal FILTER region, but its prio is less
+			 * than the rule with the highest prio in HASH
+			 * region, then reject the rule.
+			 */
+			if (t->tc_hash_tids_max_prio &&
+			    tc_prio < t->tc_hash_tids_max_prio)
+				break;
+
+			if (ftid + n > max_ftid)
+				break;
+
+			bmap = t->ftid_bmap;
+			bmap_ftid = ftid - t->nhpftids;
+		}
+
 		cnt = 0;
 		for (i = 0; i < n; i++) {
-			if (test_bit(ftid - i, t->ftid_bmap))
+			if (test_bit(bmap_ftid + i, bmap))
 				break;
 			cnt++;
 		}
+
 		if (cnt == n) {
-			ftid &= ~(n - 1);
-			found = true;
-			break;
+			/* Ensure the new rule's prio doesn't conflict
+			 * with existing rules.
+			 */
+			if (cxgb4_filter_prio_in_range(t, ftid, n,
+						       tc_prio)) {
+				ftid &= ~(n - 1);
+				found = true;
+				break;
+			}
 		}
 
-		ftid -= n;
+		ftid += n;
 	}
-	spin_unlock_bh(&t->ftid_lock);
-	ftid += t->nhpftids;
 
+out_unlock:
+	spin_unlock_bh(&t->ftid_lock);
 	return found ? ftid : -ENOMEM;
 }
 
@@ -555,73 +754,6 @@ static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family)
 	spin_unlock_bh(&t->ftid_lock);
 }
 
-bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
-{
-	struct filter_entry *prev_fe, *next_fe, *tab;
-	struct adapter *adap = netdev2adap(dev);
-	u32 prev_ftid, next_ftid, max_tid;
-	struct tid_info *t = &adap->tids;
-	unsigned long *bmap;
-	bool valid = true;
-
-	if (idx < t->nhpftids) {
-		bmap = t->hpftid_bmap;
-		tab = t->hpftid_tab;
-		max_tid = t->nhpftids;
-	} else {
-		idx -= t->nhpftids;
-		bmap = t->ftid_bmap;
-		tab = t->ftid_tab;
-		max_tid = t->nftids;
-	}
-
-	/* Only insert the rule if both of the following conditions
-	 * are met:
-	 * 1. The immediate previous rule has priority <= @prio.
-	 * 2. The immediate next rule has priority >= @prio.
-	 */
-	spin_lock_bh(&t->ftid_lock);
-
-	/* Don't insert if there's a rule already present at @idx. */
-	if (test_bit(idx, bmap)) {
-		valid = false;
-		goto out_unlock;
-	}
-
-	next_ftid = find_next_bit(bmap, max_tid, idx);
-	if (next_ftid >= max_tid)
-		next_ftid = idx;
-
-	next_fe = &tab[next_ftid];
-
-	prev_ftid = find_last_bit(bmap, idx);
-	if (prev_ftid >= idx)
-		prev_ftid = idx;
-
-	/* See if the filter entry belongs to an IPv6 rule, which
-	 * occupy 4 slots on T5 and 2 slots on T6. Adjust the
-	 * reference to the previously inserted filter entry
-	 * accordingly.
-	 */
-	if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) {
-		prev_fe = &tab[prev_ftid & ~0x3];
-		if (!prev_fe->fs.type)
-			prev_fe = &tab[prev_ftid];
-	} else {
-		prev_fe = &tab[prev_ftid & ~0x1];
-		if (!prev_fe->fs.type)
-			prev_fe = &tab[prev_ftid];
-	}
-
-	if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) ||
-	    (next_fe->valid && prio > next_fe->fs.tc_prio))
-		valid = false;
-
-out_unlock:
-	spin_unlock_bh(&t->ftid_lock);
-	return valid;
-}
-
 /* Delete the filter at a specified index. */
 static int del_filter_wr(struct adapter *adapter, int fidx)
 {
@@ -730,6 +862,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
 		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
 		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
+		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
 		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
 					     f->fs.newvlan == VLAN_REWRITE) |
 		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
@@ -747,7 +880,8 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
 		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
 		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
-	fwr->smac_sel = 0;
+	if (f->fs.newsmac)
+		fwr->smac_sel = f->smt->idx;
 	fwr->rx_chan_rx_rpl_iq =
 		htons(FW_FILTER_WR_RX_CHAN_V(0) |
 		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
@@ -778,6 +912,9 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 	fwr->fpm = htons(f->fs.mask.fport);
 
 	if (adapter->params.filter2_wr_support) {
+		u8 *nat_lp = (u8 *)&f->fs.nat_lport;
+		u8 *nat_fp = (u8 *)&f->fs.nat_fport;
+
 		fwr->natmode_to_ulp_type =
 			FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ?
 						 ULP_MODE_TCPDDP :
@@ -785,8 +922,8 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 			FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode);
 		memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip));
 		memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip));
-		fwr->newlport = htons(f->fs.nat_lport);
-		fwr->newfport = htons(f->fs.nat_fport);
+		fwr->newlport = htons(nat_lp[1] | nat_lp[0] << 8);
+		fwr->newfport = htons(nat_fp[1] | nat_fp[0] << 8);
 	}
 
 	/* Mark the filter as "pending" and ship off the Filter Work Request.
@@ -842,7 +979,7 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
 {
 	struct port_info *pi = netdev_priv(f->dev);
 
-	/* If the new or old filter have loopback rewriteing rules then we'll
+	/* If the new or old filter have loopback rewriting rules then we'll
 	 * need to free any existing L2T, SMT, CLIP entries of filter
 	 * rule.
 	 */
@@ -902,15 +1039,15 @@ void clear_all_filters(struct adapter *adapter)
 				adapter->tids.tid_tab[i];
 
 			if (f && (f->valid || f->pending))
-				cxgb4_del_filter(dev, i, &f->fs);
+				cxgb4_del_filter(dev, f->tid, &f->fs);
 		}
 
-		sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
+		sb = adapter->tids.stid_base;
 		for (i = 0; i < sb; i++) {
 			f = (struct filter_entry *)adapter->tids.tid_tab[i];
 
 			if (f && (f->valid || f->pending))
-				cxgb4_del_filter(dev, i, &f->fs);
+				cxgb4_del_filter(dev, f->tid, &f->fs);
 		}
 	}
 }
@@ -974,16 +1111,16 @@ static bool is_addr_all_mask(u8 *ipmask, int family)
 		struct in_addr *addr;
 
 		addr = (struct in_addr *)ipmask;
-		if (addr->s_addr == 0xffffffff)
+		if (addr->s_addr == htonl(0xffffffff))
 			return true;
 	} else if (family == AF_INET6) {
 		struct in6_addr *addr6;
 
 		addr6 = (struct in6_addr *)ipmask;
-		if (addr6->s6_addr32[0] == 0xffffffff &&
-		    addr6->s6_addr32[1] == 0xffffffff &&
-		    addr6->s6_addr32[2] == 0xffffffff &&
-		    addr6->s6_addr32[3] == 0xffffffff)
+		if (addr6->s6_addr32[0] == htonl(0xffffffff) &&
+		    addr6->s6_addr32[1] == htonl(0xffffffff) &&
+		    addr6->s6_addr32[2] == htonl(0xffffffff) &&
+		    addr6->s6_addr32[3] == htonl(0xffffffff))
 			return true;
 	}
 	return false;
@@ -1021,6 +1158,11 @@ bool is_filter_exact_match(struct adapter *adap,
 	if (!is_hashfilter(adap))
 		return false;
 
+	if ((atomic_read(&adap->tids.hash_tids_in_use) +
+	     atomic_read(&adap->tids.tids_in_use)) >=
+	    (adap->tids.nhash + (adap->tids.stid_base - adap->tids.tid_base)))
+		return false;
+
 	 /* Keep tunnel VNI match disabled for hash-filters for now */
 	if (fs->mask.encap_vld)
 		return false;
@@ -1183,11 +1325,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
 			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
-			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
-					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
-				   ((f->fs.dirsteerhash) << 1)) |
-			    CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+				   ((f->fs.dirsteerhash) << 1)));
 }
 
 static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
@@ -1223,11 +1362,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
 			    TX_QUEUE_V(f->fs.nat_mode) |
 			    T5_OPT_2_VALID_F |
 			    RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
-			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
-					 (f->fs.dirsteer << 1)) |
 			    PACE_V((f->fs.maskhash) |
-				   ((f->fs.dirsteerhash) << 1)) |
-			    CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+				   ((f->fs.dirsteerhash) << 1)));
 }
 
 static int cxgb4_set_hash_filter(struct net_device *dev,
@@ -1305,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
 	} else if (iconf & USE_ENC_IDX_F) {
 		if (f->fs.val.encap_vld) {
 			struct port_info *pi = netdev_priv(f->dev);
-			u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+			static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
 			/* allocate MPS TCAM entry */
 			ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1552,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid,
 	} else if (iconf & USE_ENC_IDX_F) {
 		if (f->fs.val.encap_vld) {
 			struct port_info *pi = netdev_priv(f->dev);
-			u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+			static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
 			/* allocate MPS TCAM entry */
 			ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1768,13 +1904,16 @@ out:
 static int configure_filter_tcb(struct adapter *adap, unsigned int tid,
 				struct filter_entry *f)
 {
-	if (f->fs.hitcnts)
+	if (f->fs.hitcnts) {
 		set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W,
-			      TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) |
+			      TCB_TIMESTAMP_V(TCB_TIMESTAMP_M),
+			      TCB_TIMESTAMP_V(0ULL),
+			      1);
+		set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W,
 			      TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M),
-			      TCB_TIMESTAMP_V(0ULL) |
 			      TCB_RTT_TS_RECENT_AGE_V(0ULL),
 			      1);
+	}
 
 	if (f->fs.newdmac)
 		set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1,
@@ -1896,6 +2035,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
 			}
 			return;
 		}
+		switch (f->fs.action) {
+		case FILTER_PASS:
+			if (f->fs.dirsteer)
+				set_tcb_tflag(adap, f, tid,
+					      TF_DIRECT_STEER_S, 1, 1);
+			break;
+		case FILTER_DROP:
+			set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1);
+			break;
+		case FILTER_SWITCH:
+			set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1);
+			break;
+		}
+
 		break;
 
 	default:
@@ -1963,22 +2116,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 			if (ctx)
 				ctx->result = 0;
 		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
-			int err = 0;
-
-			if (f->fs.newsmac)
-				err = configure_filter_smac(adap, f);
-
-			if (!err) {
-				f->pending = 0;  /* async setup completed */
-				f->valid = 1;
-				if (ctx) {
-					ctx->result = 0;
-					ctx->tid = idx;
-				}
-			} else {
-				clear_filter(adap, f);
-				if (ctx)
-					ctx->result = err;
+			f->pending = 0;  /* async setup completed */
+			f->valid = 1;
+			if (ctx) {
+				ctx->result = 0;
+				ctx->tid = idx;
 			}
 		} else {
 			/* Something went wrong.  Issue a warning about the
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index b3e4a645043d..807a8dafec45 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -53,5 +53,6 @@ void clear_all_filters(struct adapter *adapter);
 void init_hash_filter(struct adapter *adap);
 bool is_filter_exact_match(struct adapter *adap,
 			   struct ch_filter_specification *fs);
-bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio);
+void cxgb4_cleanup_ethtool_filters(struct adapter *adap);
+int cxgb4_init_ethtool_filters(struct adapter *adap);
 #endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 97f90edbc068..9cbce1faab26 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -66,6 +66,9 @@
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
 #include <net/xfrm.h>
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+#include <net/tls.h>
+#endif
 
 #include "cxgb4.h"
 #include "cxgb4_filter.h"
@@ -90,11 +93,6 @@
 
 char cxgb4_driver_name[] = KBUILD_MODNAME;
 
-#ifdef DRV_VERSION
-#undef DRV_VERSION
-#endif
-#define DRV_VERSION "2.0.0-ko"
-const char cxgb4_driver_version[] = DRV_VERSION;
 #define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
 
 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
@@ -137,7 +135,6 @@ const char cxgb4_driver_version[] = DRV_VERSION;
 MODULE_DESCRIPTION(DRV_DESC);
 MODULE_AUTHOR("Chelsio Communications");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
 MODULE_FIRMWARE(FW4_FNAME);
 MODULE_FIRMWARE(FW5_FNAME);
@@ -186,6 +183,7 @@ static struct dentry *cxgb4_debugfs_root;
 
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
+LIST_HEAD(uld_list);
 
 static int cfg_queues(struct adapter *adap);
 
@@ -437,8 +435,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 	__dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
 	__dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
 
-	return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
-			     (dev->flags & IFF_PROMISC) ? 1 : 0,
+	return t4_set_rxmode(adapter, adapter->mbox, pi->viid, pi->viid_mirror,
+			     mtu, (dev->flags & IFF_PROMISC) ? 1 : 0,
 			     (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
 			     sleep_ok);
 }
@@ -451,7 +449,7 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
  *		   or -1
  *	@addr: the new MAC address value
  *	@persist: whether a new MAC allocation should be persistent
- *	@add_smt: if true also add the address to the HW SMT
+ *	@smt_idx: the destination to store the new SMT index.
  *
  *	Modifies an MPS filter and sets it to the new MAC address if
  *	@tcam_idx >= 0, or adds the MAC address to a new filter if
@@ -505,15 +503,16 @@ set_hash:
  */
 static int link_start(struct net_device *dev)
 {
-	int ret;
 	struct port_info *pi = netdev_priv(dev);
-	unsigned int mb = pi->adapter->pf;
+	unsigned int mb = pi->adapter->mbox;
+	int ret;
 
 	/*
 	 * We do not set address filters and promiscuity here, the stack does
 	 * that step explicitly.
 	 */
-	ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
+	ret = t4_set_rxmode(pi->adapter, mb, pi->viid, pi->viid_mirror,
+			    dev->mtu, -1, -1, -1,
 			    !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
 	if (ret == 0)
 		ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
@@ -824,6 +823,31 @@ static void adap_config_hpfilter(struct adapter *adapter)
 			"HP filter region isn't supported by FW\n");
 }
 
+static int cxgb4_config_rss(const struct port_info *pi, u16 *rss,
+			    u16 rss_size, u16 viid)
+{
+	struct adapter *adap = pi->adapter;
+	int ret;
+
+	ret = t4_config_rss_range(adap, adap->mbox, viid, 0, rss_size, rss,
+				  rss_size);
+	if (ret)
+		return ret;
+
+	/* If Tunnel All Lookup isn't specified in the global RSS
+	 * Configuration, then we need to specify a default Ingress
+	 * Queue for any ingress packets which aren't hashed.  We'll
+	 * use our first ingress queue ...
+	 */
+	return t4_config_vi_rss(adap, adap->mbox, viid,
+				FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F |
+				FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F |
+				FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F |
+				FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F |
+				FW_RSS_VI_CONFIG_CMD_UDPEN_F,
+				rss[0]);
+}
+
 /**
  *	cxgb4_write_rss - write the RSS table for a given port
  *	@pi: the port
@@ -835,10 +859,10 @@ static void adap_config_hpfilter(struct adapter *adapter)
  */
 int cxgb4_write_rss(const struct port_info *pi, const u16 *queues)
 {
-	u16 *rss;
-	int i, err;
 	struct adapter *adapter = pi->adapter;
 	const struct sge_eth_rxq *rxq;
+	int i, err;
+	u16 *rss;
 
 	rxq = &adapter->sge.ethrxq[pi->first_qset];
 	rss = kmalloc_array(pi->rss_size, sizeof(u16), GFP_KERNEL);
@@ -849,21 +873,7 @@ int cxgb4_write_rss(const struct port_info *pi, const u16 *queues)
 	for (i = 0; i < pi->rss_size; i++, queues++)
 		rss[i] = rxq[*queues].rspq.abs_id;
 
-	err = t4_config_rss_range(adapter, adapter->pf, pi->viid, 0,
-				  pi->rss_size, rss, pi->rss_size);
-	/* If Tunnel All Lookup isn't specified in the global RSS
-	 * Configuration, then we need to specify a default Ingress
-	 * Queue for any ingress packets which aren't hashed.  We'll
-	 * use our first ingress queue ...
-	 */
-	if (!err)
-		err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid,
-				       FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F |
-				       FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F |
-				       FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F |
-				       FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F |
-				       FW_RSS_VI_CONFIG_CMD_UDPEN_F,
-				       rss[0]);
+	err = cxgb4_config_rss(pi, rss, pi->rss_size, pi->viid);
 	kfree(rss);
 	return err;
 }
@@ -1166,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 		txq = netdev_pick_tx(dev, skb, sb_dev);
 		if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
 		    skb->encapsulation ||
+		    cxgb4_is_ktls_skb(skb) ||
 		    (proto != IPPROTO_TCP && proto != IPPROTO_UDP))
 			txq = txq % pi->nqsets;
 
@@ -1261,15 +1272,15 @@ int cxgb4_set_rspq_intr_params(struct sge_rspq *q,
 
 static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 {
-	const struct port_info *pi = netdev_priv(dev);
 	netdev_features_t changed = dev->features ^ features;
+	const struct port_info *pi = netdev_priv(dev);
 	int err;
 
 	if (!(changed & NETIF_F_HW_VLAN_CTAG_RX))
 		return 0;
 
-	err = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, -1,
-			    -1, -1, -1,
+	err = t4_set_rxmode(pi->adapter, pi->adapter->mbox, pi->viid,
+			    pi->viid_mirror, -1, -1, -1, -1,
 			    !!(features & NETIF_F_HW_VLAN_CTAG_RX), true);
 	if (unlikely(err))
 		dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX;
@@ -1287,6 +1298,292 @@ static int setup_debugfs(struct adapter *adap)
 	return 0;
 }
 
+static void cxgb4_port_mirror_free_rxq(struct adapter *adap,
+				       struct sge_eth_rxq *mirror_rxq)
+{
+	if ((adap->flags & CXGB4_FULL_INIT_DONE) &&
+	    !(adap->flags & CXGB4_SHUTTING_DOWN))
+		cxgb4_quiesce_rx(&mirror_rxq->rspq);
+
+	if (adap->flags & CXGB4_USING_MSIX) {
+		cxgb4_clear_msix_aff(mirror_rxq->msix->vec,
+				     mirror_rxq->msix->aff_mask);
+		free_irq(mirror_rxq->msix->vec, &mirror_rxq->rspq);
+		cxgb4_free_msix_idx_in_bmap(adap, mirror_rxq->msix->idx);
+	}
+
+	free_rspq_fl(adap, &mirror_rxq->rspq, &mirror_rxq->fl);
+}
+
+static int cxgb4_port_mirror_alloc_queues(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eth_rxq *mirror_rxq;
+	struct sge *s = &adap->sge;
+	int ret = 0, msix = 0;
+	u16 i, rxqid;
+	u16 *rss;
+
+	if (!pi->vi_mirror_count)
+		return 0;
+
+	if (s->mirror_rxq[pi->port_id])
+		return 0;
+
+	mirror_rxq = kcalloc(pi->nmirrorqsets, sizeof(*mirror_rxq), GFP_KERNEL);
+	if (!mirror_rxq)
+		return -ENOMEM;
+
+	s->mirror_rxq[pi->port_id] = mirror_rxq;
+
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		msix = -((int)adap->sge.intrq.abs_id + 1);
+
+	for (i = 0, rxqid = 0; i < pi->nmirrorqsets; i++, rxqid++) {
+		mirror_rxq = &s->mirror_rxq[pi->port_id][i];
+
+		/* Allocate Mirror Rxqs */
+		if (msix >= 0) {
+			msix = cxgb4_get_msix_idx_from_bmap(adap);
+			if (msix < 0) {
+				ret = msix;
+				goto out_free_queues;
+			}
+
+			mirror_rxq->msix = &adap->msix_info[msix];
+			snprintf(mirror_rxq->msix->desc,
+				 sizeof(mirror_rxq->msix->desc),
+				 "%s-mirrorrxq%d", dev->name, i);
+		}
+
+		init_rspq(adap, &mirror_rxq->rspq,
+			  CXGB4_MIRROR_RXQ_DEFAULT_INTR_USEC,
+			  CXGB4_MIRROR_RXQ_DEFAULT_PKT_CNT,
+			  CXGB4_MIRROR_RXQ_DEFAULT_DESC_NUM,
+			  CXGB4_MIRROR_RXQ_DEFAULT_DESC_SIZE);
+
+		mirror_rxq->fl.size = CXGB4_MIRROR_FLQ_DEFAULT_DESC_NUM;
+
+		ret = t4_sge_alloc_rxq(adap, &mirror_rxq->rspq, false,
+				       dev, msix, &mirror_rxq->fl,
+				       t4_ethrx_handler, NULL, 0);
+		if (ret)
+			goto out_free_msix_idx;
+
+		/* Setup MSI-X vectors for Mirror Rxqs */
+		if (adap->flags & CXGB4_USING_MSIX) {
+			ret = request_irq(mirror_rxq->msix->vec,
+					  t4_sge_intr_msix, 0,
+					  mirror_rxq->msix->desc,
+					  &mirror_rxq->rspq);
+			if (ret)
+				goto out_free_rxq;
+
+			cxgb4_set_msix_aff(adap, mirror_rxq->msix->vec,
+					   &mirror_rxq->msix->aff_mask, i);
+		}
+
+		/* Start NAPI for Mirror Rxqs */
+		cxgb4_enable_rx(adap, &mirror_rxq->rspq);
+	}
+
+	/* Setup RSS for Mirror Rxqs */
+	rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
+	if (!rss) {
+		ret = -ENOMEM;
+		goto out_free_queues;
+	}
+
+	mirror_rxq = &s->mirror_rxq[pi->port_id][0];
+	for (i = 0; i < pi->rss_size; i++)
+		rss[i] = mirror_rxq[i % pi->nmirrorqsets].rspq.abs_id;
+
+	ret = cxgb4_config_rss(pi, rss, pi->rss_size, pi->viid_mirror);
+	kfree(rss);
+	if (ret)
+		goto out_free_queues;
+
+	return 0;
+
+out_free_rxq:
+	free_rspq_fl(adap, &mirror_rxq->rspq, &mirror_rxq->fl);
+
+out_free_msix_idx:
+	cxgb4_free_msix_idx_in_bmap(adap, mirror_rxq->msix->idx);
+
+out_free_queues:
+	while (rxqid-- > 0)
+		cxgb4_port_mirror_free_rxq(adap,
+					   &s->mirror_rxq[pi->port_id][rxqid]);
+
+	kfree(s->mirror_rxq[pi->port_id]);
+	s->mirror_rxq[pi->port_id] = NULL;
+	return ret;
+}
+
+static void cxgb4_port_mirror_free_queues(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge *s = &adap->sge;
+	u16 i;
+
+	if (!pi->vi_mirror_count)
+		return;
+
+	if (!s->mirror_rxq[pi->port_id])
+		return;
+
+	for (i = 0; i < pi->nmirrorqsets; i++)
+		cxgb4_port_mirror_free_rxq(adap,
+					   &s->mirror_rxq[pi->port_id][i]);
+
+	kfree(s->mirror_rxq[pi->port_id]);
+	s->mirror_rxq[pi->port_id] = NULL;
+}
+
+static int cxgb4_port_mirror_start(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret, idx = -1;
+
+	if (!pi->vi_mirror_count)
+		return 0;
+
+	/* Mirror VIs can be created dynamically after stack had
+	 * already setup Rx modes like MTU, promisc, allmulti, etc.
+	 * on main VI. So, parse what the stack had setup on the
+	 * main VI and update the same on the mirror VI.
+	 */
+	ret = t4_set_rxmode(adap, adap->mbox, pi->viid, pi->viid_mirror,
+			    dev->mtu, (dev->flags & IFF_PROMISC) ? 1 : 0,
+			    (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1,
+			    !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
+	if (ret) {
+		dev_err(adap->pdev_dev,
+			"Failed start up Rx mode for Mirror VI 0x%x, ret: %d\n",
+			pi->viid_mirror, ret);
+		return ret;
+	}
+
+	/* Enable replication bit for the device's MAC address
+	 * in MPS TCAM, so that the packets for the main VI are
+	 * replicated to mirror VI.
+	 */
+	ret = cxgb4_update_mac_filt(pi, pi->viid_mirror, &idx,
+				    dev->dev_addr, true, NULL);
+	if (ret) {
+		dev_err(adap->pdev_dev,
+			"Failed updating MAC filter for Mirror VI 0x%x, ret: %d\n",
+			pi->viid_mirror, ret);
+		return ret;
+	}
+
+	/* Enabling a Virtual Interface can result in an interrupt
+	 * during the processing of the VI Enable command and, in some
+	 * paths, result in an attempt to issue another command in the
+	 * interrupt context. Thus, we disable interrupts during the
+	 * course of the VI Enable command ...
+	 */
+	local_bh_disable();
+	ret = t4_enable_vi_params(adap, adap->mbox, pi->viid_mirror, true, true,
+				  false);
+	local_bh_enable();
+	if (ret)
+		dev_err(adap->pdev_dev,
+			"Failed starting Mirror VI 0x%x, ret: %d\n",
+			pi->viid_mirror, ret);
+
+	return ret;
+}
+
+static void cxgb4_port_mirror_stop(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!pi->vi_mirror_count)
+		return;
+
+	t4_enable_vi_params(adap, adap->mbox, pi->viid_mirror, false, false,
+			    false);
+}
+
+int cxgb4_port_mirror_alloc(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret = 0;
+
+	if (!pi->nmirrorqsets)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&pi->vi_mirror_mutex);
+	if (pi->viid_mirror) {
+		pi->vi_mirror_count++;
+		goto out_unlock;
+	}
+
+	ret = t4_init_port_mirror(pi, adap->mbox, pi->port_id, adap->pf, 0,
+				  &pi->viid_mirror);
+	if (ret)
+		goto out_unlock;
+
+	pi->vi_mirror_count = 1;
+
+	if (adap->flags & CXGB4_FULL_INIT_DONE) {
+		ret = cxgb4_port_mirror_alloc_queues(dev);
+		if (ret)
+			goto out_free_vi;
+
+		ret = cxgb4_port_mirror_start(dev);
+		if (ret)
+			goto out_free_queues;
+	}
+
+	mutex_unlock(&pi->vi_mirror_mutex);
+	return 0;
+
+out_free_queues:
+	cxgb4_port_mirror_free_queues(dev);
+
+out_free_vi:
+	pi->vi_mirror_count = 0;
+	t4_free_vi(adap, adap->mbox, adap->pf, 0, pi->viid_mirror);
+	pi->viid_mirror = 0;
+
+out_unlock:
+	mutex_unlock(&pi->vi_mirror_mutex);
+	return ret;
+}
+
+void cxgb4_port_mirror_free(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	mutex_lock(&pi->vi_mirror_mutex);
+	if (!pi->viid_mirror)
+		goto out_unlock;
+
+	if (pi->vi_mirror_count > 1) {
+		pi->vi_mirror_count--;
+		goto out_unlock;
+	}
+
+	cxgb4_port_mirror_stop(dev);
+	cxgb4_port_mirror_free_queues(dev);
+
+	pi->vi_mirror_count = 0;
+	t4_free_vi(adap, adap->mbox, adap->pf, 0, pi->viid_mirror);
+	pi->viid_mirror = 0;
+
+out_unlock:
+	mutex_unlock(&pi->vi_mirror_mutex);
+}
+
 /*
  * upper-layer driver support
  */
@@ -1585,6 +1882,7 @@ static int tid_init(struct tid_info *t)
 	atomic_set(&t->tids_in_use, 0);
 	atomic_set(&t->conns_in_use, 0);
 	atomic_set(&t->hash_tids_in_use, 0);
+	atomic_set(&t->eotids_in_use, 0);
 
 	/* Setup the free list for atid_tab and clear the stid bitmap. */
 	if (natids) {
@@ -1616,6 +1914,7 @@ static int tid_init(struct tid_info *t)
  *	@stid: the server TID
  *	@sip: local IP address to bind server to
  *	@sport: the server's TCP port
+ *	@vlan: the VLAN header information
  *	@queue: queue to direct messages from this server to
  *
  *	Create an IP server for the given port and address.
@@ -2344,6 +2643,9 @@ static void detach_ulds(struct adapter *adap)
 {
 	unsigned int i;
 
+	if (!is_uld(adap))
+		return;
+
 	mutex_lock(&uld_mutex);
 	list_del(&adap->list_node);
 
@@ -2535,7 +2837,7 @@ static void cxgb_down(struct adapter *adapter)
 /*
  * net_device operations
  */
-int cxgb_open(struct net_device *dev)
+static int cxgb_open(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2557,12 +2859,33 @@ int cxgb_open(struct net_device *dev)
 		return err;
 
 	err = link_start(dev);
-	if (!err)
-		netif_tx_start_all_queues(dev);
+	if (err)
+		return err;
+
+	if (pi->nmirrorqsets) {
+		mutex_lock(&pi->vi_mirror_mutex);
+		err = cxgb4_port_mirror_alloc_queues(dev);
+		if (err)
+			goto out_unlock;
+
+		err = cxgb4_port_mirror_start(dev);
+		if (err)
+			goto out_free_queues;
+		mutex_unlock(&pi->vi_mirror_mutex);
+	}
+
+	netif_tx_start_all_queues(dev);
+	return 0;
+
+out_free_queues:
+	cxgb4_port_mirror_free_queues(dev);
+
+out_unlock:
+	mutex_unlock(&pi->vi_mirror_mutex);
 	return err;
 }
 
-int cxgb_close(struct net_device *dev)
+static int cxgb_close(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2576,7 +2899,17 @@ int cxgb_close(struct net_device *dev)
 	cxgb4_dcb_reset(dev);
 	dcb_tx_queue_prio_enable(dev, false);
 #endif
-	return ret;
+	if (ret)
+		return ret;
+
+	if (pi->nmirrorqsets) {
+		mutex_lock(&pi->vi_mirror_mutex);
+		cxgb4_port_mirror_stop(dev);
+		cxgb4_port_mirror_free_queues(dev);
+		mutex_unlock(&pi->vi_mirror_mutex);
+	}
+
+	return 0;
 }
 
 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
@@ -2610,7 +2943,7 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
 
 	/* Clear out filter specifications */
 	memset(&f->fs, 0, sizeof(struct ch_filter_specification));
-	f->fs.val.lport = cpu_to_be16(sport);
+	f->fs.val.lport = be16_to_cpu(sport);
 	f->fs.mask.lport  = ~0;
 	val = (u8 *)&sip;
 	if ((val[0] | val[1] | val[2] | val[3]) != 0) {
@@ -2842,11 +3175,11 @@ static void cxgb_set_rxmode(struct net_device *dev)
 
 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 {
-	int ret;
 	struct port_info *pi = netdev_priv(dev);
+	int ret;
 
-	ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, new_mtu, -1,
-			    -1, -1, -1, true);
+	ret = t4_set_rxmode(pi->adapter, pi->adapter->mbox, pi->viid,
+			    pi->viid_mirror, new_mtu, -1, -1, -1, -1, true);
 	if (!ret)
 		dev->mtu = new_mtu;
 	return ret;
@@ -2871,8 +3204,6 @@ static void cxgb4_mgmt_fill_vf_station_mac_addr(struct adapter *adap)
 	int err;
 	u8 *na;
 
-	adap->params.pci.vpd_cap_addr = pci_find_capability(adap->pdev,
-							    PCI_CAP_ID_VPD);
 	err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
 	if (err)
 		return;
@@ -3027,7 +3358,7 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
 			      SCHED_CLASS_RATEUNIT_BITS,
 			      SCHED_CLASS_RATEMODE_ABS,
 			      pi->tx_chan, class_id, 0,
-			      max_tx_rate * 1000, 0, pktsize);
+			      max_tx_rate * 1000, 0, pktsize, 0);
 	if (ret) {
 		dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
 			ret);
@@ -3137,8 +3468,7 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 	if (ret < 0)
 		return ret;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-	pi->xact_addr_filt = ret;
+	eth_hw_addr_set(dev, addr->sa_data);
 	return 0;
 }
 
@@ -3404,129 +3734,71 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
-static void cxgb_del_udp_tunnel(struct net_device *netdev,
-				struct udp_tunnel_info *ti)
+static int cxgb_udp_tunnel_unset_port(struct net_device *netdev,
+				      unsigned int table, unsigned int entry,
+				      struct udp_tunnel_info *ti)
 {
 	struct port_info *pi = netdev_priv(netdev);
 	struct adapter *adapter = pi->adapter;
-	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 	int ret = 0, i;
 
-	if (chip_ver < CHELSIO_T6)
-		return;
-
 	switch (ti->type) {
 	case UDP_TUNNEL_TYPE_VXLAN:
-		if (!adapter->vxlan_port_cnt ||
-		    adapter->vxlan_port != ti->port)
-			return; /* Invalid VxLAN destination port */
-
-		adapter->vxlan_port_cnt--;
-		if (adapter->vxlan_port_cnt)
-			return;
-
 		adapter->vxlan_port = 0;
 		t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, 0);
 		break;
 	case UDP_TUNNEL_TYPE_GENEVE:
-		if (!adapter->geneve_port_cnt ||
-		    adapter->geneve_port != ti->port)
-			return; /* Invalid GENEVE destination port */
-
-		adapter->geneve_port_cnt--;
-		if (adapter->geneve_port_cnt)
-			return;
-
 		adapter->geneve_port = 0;
 		t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0);
 		break;
 	default:
-		return;
+		return -EINVAL;
 	}
 
 	/* Matchall mac entries can be deleted only after all tunnel ports
 	 * are brought down or removed.
 	 */
 	if (!adapter->rawf_cnt)
-		return;
+		return 0;
 	for_each_port(adapter, i) {
 		pi = adap2pinfo(adapter, i);
 		ret = t4_free_raw_mac_filt(adapter, pi->viid,
 					   match_all_mac, match_all_mac,
-					   adapter->rawf_start +
-					    pi->port_id,
+					   adapter->rawf_start + pi->port_id,
 					   1, pi->port_id, false);
 		if (ret < 0) {
 			netdev_info(netdev, "Failed to free mac filter entry, for port %d\n",
 				    i);
-			return;
+			return ret;
 		}
 	}
+
+	return 0;
 }
 
-static void cxgb_add_udp_tunnel(struct net_device *netdev,
-				struct udp_tunnel_info *ti)
+static int cxgb_udp_tunnel_set_port(struct net_device *netdev,
+				    unsigned int table, unsigned int entry,
+				    struct udp_tunnel_info *ti)
 {
 	struct port_info *pi = netdev_priv(netdev);
 	struct adapter *adapter = pi->adapter;
-	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 	int i, ret;
 
-	if (chip_ver < CHELSIO_T6 || !adapter->rawf_cnt)
-		return;
-
 	switch (ti->type) {
 	case UDP_TUNNEL_TYPE_VXLAN:
-		/* Callback for adding vxlan port can be called with the same
-		 * port for both IPv4 and IPv6. We should not disable the
-		 * offloading when the same port for both protocols is added
-		 * and later one of them is removed.
-		 */
-		if (adapter->vxlan_port_cnt &&
-		    adapter->vxlan_port == ti->port) {
-			adapter->vxlan_port_cnt++;
-			return;
-		}
-
-		/* We will support only one VxLAN port */
-		if (adapter->vxlan_port_cnt) {
-			netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n",
-				    be16_to_cpu(adapter->vxlan_port),
-				    be16_to_cpu(ti->port));
-			return;
-		}
-
 		adapter->vxlan_port = ti->port;
-		adapter->vxlan_port_cnt = 1;
-
 		t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A,
 			     VXLAN_V(be16_to_cpu(ti->port)) | VXLAN_EN_F);
 		break;
 	case UDP_TUNNEL_TYPE_GENEVE:
-		if (adapter->geneve_port_cnt &&
-		    adapter->geneve_port == ti->port) {
-			adapter->geneve_port_cnt++;
-			return;
-		}
-
-		/* We will support only one GENEVE port */
-		if (adapter->geneve_port_cnt) {
-			netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n",
-				    be16_to_cpu(adapter->geneve_port),
-				    be16_to_cpu(ti->port));
-			return;
-		}
-
 		adapter->geneve_port = ti->port;
-		adapter->geneve_port_cnt = 1;
-
 		t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A,
 			     GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F);
 		break;
 	default:
-		return;
+		return -EINVAL;
 	}
 
 	/* Create a 'match all' mac filter entry for inner mac,
@@ -3541,18 +3813,27 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
 		ret = t4_alloc_raw_mac_filt(adapter, pi->viid,
 					    match_all_mac,
 					    match_all_mac,
-					    adapter->rawf_start +
-					    pi->port_id,
+					    adapter->rawf_start + pi->port_id,
 					    1, pi->port_id, false);
 		if (ret < 0) {
 			netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n",
 				    be16_to_cpu(ti->port));
-			cxgb_del_udp_tunnel(netdev, ti);
-			return;
+			return ret;
 		}
 	}
+
+	return 0;
 }
 
+static const struct udp_tunnel_nic_info cxgb_udp_tunnels = {
+	.set_port	= cxgb_udp_tunnel_set_port,
+	.unset_port	= cxgb_udp_tunnel_unset_port,
+	.tables		= {
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
+	},
+};
+
 static netdev_features_t cxgb_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
@@ -3591,7 +3872,7 @@ static const struct net_device_ops cxgb4_netdev_ops = {
 	.ndo_set_mac_address  = cxgb_set_mac_addr,
 	.ndo_set_features     = cxgb_set_features,
 	.ndo_validate_addr    = eth_validate_addr,
-	.ndo_do_ioctl         = cxgb_ioctl,
+	.ndo_eth_ioctl         = cxgb_ioctl,
 	.ndo_change_mtu       = cxgb_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller  = cxgb_netpoll,
@@ -3602,8 +3883,6 @@ static const struct net_device_ops cxgb4_netdev_ops = {
 #endif /* CONFIG_CHELSIO_T4_FCOE */
 	.ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
 	.ndo_setup_tc         = cxgb_setup_tc,
-	.ndo_udp_tunnel_add   = cxgb_add_udp_tunnel,
-	.ndo_udp_tunnel_del   = cxgb_del_udp_tunnel,
 	.ndo_features_check   = cxgb_features_check,
 	.ndo_fix_features     = cxgb_fix_features,
 };
@@ -3618,23 +3897,21 @@ static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
 	.ndo_set_vf_vlan        = cxgb4_mgmt_set_vf_vlan,
 	.ndo_set_vf_link_state	= cxgb4_mgmt_set_vf_link_state,
 };
-#endif
 
 static void cxgb4_mgmt_get_drvinfo(struct net_device *dev,
 				   struct ethtool_drvinfo *info)
 {
 	struct adapter *adapter = netdev2adap(dev);
 
-	strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
-	strlcpy(info->version, cxgb4_driver_version,
-		sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(adapter->pdev),
+	strscpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(adapter->pdev),
 		sizeof(info->bus_info));
 }
 
 static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
 	.get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
+#endif
 
 static void notify_fatal_err(struct work_struct *work)
 {
@@ -3731,7 +4008,7 @@ static void adap_free_hma_mem(struct adapter *adapter)
 
 	if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
 		dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
-			     adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+			     adapter->hma.sgt->nents, DMA_BIDIRECTIONAL);
 		adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
 	}
 
@@ -4150,8 +4427,7 @@ static int adap_init0_phy(struct adapter *adap)
 
 	/* Load PHY Firmware onto adapter.
 	 */
-	ret = t4_load_phy_fw(adap, MEMWIN_NIC, &adap->win0_lock,
-			     phy_info->phy_fw_version,
+	ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
 			     (u8 *)phyf->data, phyf->size);
 	if (ret < 0)
 		dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
@@ -4771,23 +5047,20 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 	/* Allocate the memory for the vaious egress queue bitmaps
 	 * ie starving_fl, txq_maperr and blocked_fl.
 	 */
-	adap->sge.starving_fl =	kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
-					sizeof(long), GFP_KERNEL);
+	adap->sge.starving_fl = bitmap_zalloc(adap->sge.egr_sz, GFP_KERNEL);
 	if (!adap->sge.starving_fl) {
 		ret = -ENOMEM;
 		goto bye;
 	}
 
-	adap->sge.txq_maperr = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
-				       sizeof(long), GFP_KERNEL);
+	adap->sge.txq_maperr = bitmap_zalloc(adap->sge.egr_sz, GFP_KERNEL);
 	if (!adap->sge.txq_maperr) {
 		ret = -ENOMEM;
 		goto bye;
 	}
 
 #ifdef CONFIG_DEBUG_FS
-	adap->sge.blocked_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
-				       sizeof(long), GFP_KERNEL);
+	adap->sge.blocked_fl = bitmap_zalloc(adap->sge.egr_sz, GFP_KERNEL);
 	if (!adap->sge.blocked_fl) {
 		ret = -ENOMEM;
 		goto bye;
@@ -4860,7 +5133,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 
 	/* See if FW supports FW_FILTER2 work request */
 	if (is_t4(adap->params.chip)) {
-		adap->params.filter2_wr_support = 0;
+		adap->params.filter2_wr_support = false;
 	} else {
 		params[0] = FW_PARAM_DEV(FILTER2_WR);
 		ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
@@ -5140,10 +5413,10 @@ bye:
 	adap_free_hma_mem(adap);
 	kfree(adap->sge.egr_map);
 	kfree(adap->sge.ingr_map);
-	kfree(adap->sge.starving_fl);
-	kfree(adap->sge.txq_maperr);
+	bitmap_free(adap->sge.starving_fl);
+	bitmap_free(adap->sge.txq_maperr);
 #ifdef CONFIG_DEBUG_FS
-	kfree(adap->sge.blocked_fl);
+	bitmap_free(adap->sge.blocked_fl);
 #endif
 	if (ret != -ETIMEDOUT && ret != -EIO)
 		t4_fw_bye(adap, adap->mbox);
@@ -5381,10 +5654,10 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 static int cfg_queues(struct adapter *adap)
 {
 	u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
-	u32 i, n10g = 0, qidx = 0, n1g = 0;
 	u32 ncpus = num_online_cpus();
 	u32 niqflint, neq, num_ulds;
 	struct sge *s = &adap->sge;
+	u32 i, n10g = 0, qidx = 0;
 	u32 q10g = 0, q1g;
 
 	/* Reduce memory usage in kdump environment, disable all offload. */
@@ -5430,7 +5703,6 @@ static int cfg_queues(struct adapter *adap)
 	if (n10g)
 		q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
 
-	n1g = adap->params.nports - n10g;
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging support we need to be able to support up
 	 * to 8 Traffic Priorities; each of which will be assigned to its
@@ -5448,7 +5720,8 @@ static int cfg_queues(struct adapter *adap)
 	else
 		q10g = max(8U, q10g);
 
-	while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g))
+	while ((q10g * n10g) >
+	       (avail_eth_qsets - (adap->params.nports - n10g) * q1g))
 		q10g--;
 
 #else /* !CONFIG_CHELSIO_T4_DCB */
@@ -5506,6 +5779,19 @@ static int cfg_queues(struct adapter *adap)
 		avail_qsets -= s->eoqsets;
 	}
 
+	/* Mirror queues must follow same scheme as normal Ethernet
+	 * Queues, when there are enough queues available. Otherwise,
+	 * allocate at least 1 queue per port. If even 1 queue is not
+	 * available, then disable mirror queues support.
+	 */
+	if (avail_qsets >= s->max_ethqsets)
+		s->mirrorqsets = s->max_ethqsets;
+	else if (avail_qsets >= adap->params.nports)
+		s->mirrorqsets = adap->params.nports;
+	else
+		s->mirrorqsets = 0;
+	avail_qsets -= s->mirrorqsets;
+
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
 		struct sge_eth_rxq *r = &s->ethrxq[i];
 
@@ -5564,8 +5850,7 @@ static int alloc_msix_info(struct adapter *adap, u32 num_vec)
 	if (!msix_info)
 		return -ENOMEM;
 
-	adap->msix_bmap.msix_bmap = kcalloc(BITS_TO_LONGS(num_vec),
-					    sizeof(long), GFP_KERNEL);
+	adap->msix_bmap.msix_bmap = bitmap_zalloc(num_vec, GFP_KERNEL);
 	if (!adap->msix_bmap.msix_bmap) {
 		kfree(msix_info);
 		return -ENOMEM;
@@ -5580,7 +5865,7 @@ static int alloc_msix_info(struct adapter *adap, u32 num_vec)
 
 static void free_msix_info(struct adapter *adap)
 {
-	kfree(adap->msix_bmap.msix_bmap);
+	bitmap_free(adap->msix_bmap.msix_bmap);
 	kfree(adap->msix_info);
 }
 
@@ -5619,8 +5904,8 @@ void cxgb4_free_msix_idx_in_bmap(struct adapter *adap,
 
 static int enable_msix(struct adapter *adap)
 {
-	u32 eth_need, uld_need = 0, ethofld_need = 0;
-	u32 ethqsets = 0, ofldqsets = 0, eoqsets = 0;
+	u32 eth_need, uld_need = 0, ethofld_need = 0, mirror_need = 0;
+	u32 ethqsets = 0, ofldqsets = 0, eoqsets = 0, mirrorqsets = 0;
 	u8 num_uld = 0, nchan = adap->params.nports;
 	u32 i, want, need, num_vec;
 	struct sge *s = &adap->sge;
@@ -5651,6 +5936,12 @@ static int enable_msix(struct adapter *adap)
 		need += ethofld_need;
 	}
 
+	if (s->mirrorqsets) {
+		want += s->mirrorqsets;
+		mirror_need = nchan;
+		need += mirror_need;
+	}
+
 	want += EXTRA_VECS;
 	need += EXTRA_VECS;
 
@@ -5684,8 +5975,10 @@ static int enable_msix(struct adapter *adap)
 		adap->params.ethofld = 0;
 		s->ofldqsets = 0;
 		s->eoqsets = 0;
+		s->mirrorqsets = 0;
 		uld_need = 0;
 		ethofld_need = 0;
+		mirror_need = 0;
 	}
 
 	num_vec = allocated;
@@ -5699,6 +5992,8 @@ static int enable_msix(struct adapter *adap)
 			ofldqsets = nchan;
 		if (is_ethofld(adap))
 			eoqsets = ethofld_need;
+		if (s->mirrorqsets)
+			mirrorqsets = mirror_need;
 
 		num_vec -= need;
 		while (num_vec) {
@@ -5730,12 +6025,25 @@ static int enable_msix(struct adapter *adap)
 				num_vec -= uld_need;
 			}
 		}
+
+		if (s->mirrorqsets) {
+			while (num_vec) {
+				if (num_vec < mirror_need ||
+				    mirrorqsets > s->mirrorqsets)
+					break;
+
+				mirrorqsets++;
+				num_vec -= mirror_need;
+			}
+		}
 	} else {
 		ethqsets = s->max_ethqsets;
 		if (is_uld(adap))
 			ofldqsets = s->ofldqsets;
 		if (is_ethofld(adap))
 			eoqsets = s->eoqsets;
+		if (s->mirrorqsets)
+			mirrorqsets = s->mirrorqsets;
 	}
 
 	if (ethqsets < s->max_ethqsets) {
@@ -5751,6 +6059,15 @@ static int enable_msix(struct adapter *adap)
 	if (is_ethofld(adap))
 		s->eoqsets = eoqsets;
 
+	if (s->mirrorqsets) {
+		s->mirrorqsets = mirrorqsets;
+		for_each_port(adap, i) {
+			pi = adap2pinfo(adap, i);
+			pi->nmirrorqsets = s->mirrorqsets / nchan;
+			mutex_init(&pi->vi_mirror_mutex);
+		}
+	}
+
 	/* map for msix */
 	ret = alloc_msix_info(adap, allocated);
 	if (ret)
@@ -5762,8 +6079,9 @@ static int enable_msix(struct adapter *adap)
 	}
 
 	dev_info(adap->pdev_dev,
-		 "%d MSI-X vectors allocated, nic %d eoqsets %d per uld %d\n",
-		 allocated, s->max_ethqsets, s->eoqsets, s->nqs_per_uld);
+		 "%d MSI-X vectors allocated, nic %d eoqsets %d per uld %d mirrorqsets %d\n",
+		 allocated, s->max_ethqsets, s->eoqsets, s->nqs_per_uld,
+		 s->mirrorqsets);
 
 	kfree(entries);
 	return 0;
@@ -5840,8 +6158,7 @@ static void print_port_info(const struct net_device *dev)
 		--bufp;
 	sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
 
-	netdev_info(dev, "%s: Chelsio %s (%s) %s\n",
-		    dev->name, adap->params.vpd.id, adap->name, buf);
+	netdev_info(dev, "Chelsio %s %s\n", adap->params.vpd.id, buf);
 }
 
 /*
@@ -5864,12 +6181,13 @@ static void free_some_resources(struct adapter *adapter)
 	cxgb4_cleanup_tc_mqprio(adapter);
 	cxgb4_cleanup_tc_flower(adapter);
 	cxgb4_cleanup_tc_u32(adapter);
+	cxgb4_cleanup_ethtool_filters(adapter);
 	kfree(adapter->sge.egr_map);
 	kfree(adapter->sge.ingr_map);
-	kfree(adapter->sge.starving_fl);
-	kfree(adapter->sge.txq_maperr);
+	bitmap_free(adapter->sge.starving_fl);
+	bitmap_free(adapter->sge.txq_maperr);
 #ifdef CONFIG_DEBUG_FS
-	kfree(adapter->sge.blocked_fl);
+	bitmap_free(adapter->sge.blocked_fl);
 #endif
 	disable_msi(adapter);
 
@@ -6071,6 +6389,213 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 }
 #endif /* CONFIG_PCI_IOV */
 
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) || IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+
+static int chcr_offload_state(struct adapter *adap,
+			      enum cxgb4_netdev_tls_ops op_val)
+{
+	switch (op_val) {
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	case CXGB4_TLSDEV_OPS:
+		if (!adap->uld[CXGB4_ULD_KTLS].handle) {
+			dev_dbg(adap->pdev_dev, "ch_ktls driver is not loaded\n");
+			return -EOPNOTSUPP;
+		}
+		if (!adap->uld[CXGB4_ULD_KTLS].tlsdev_ops) {
+			dev_dbg(adap->pdev_dev,
+				"ch_ktls driver has no registered tlsdev_ops\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+	case CXGB4_XFRMDEV_OPS:
+		if (!adap->uld[CXGB4_ULD_IPSEC].handle) {
+			dev_dbg(adap->pdev_dev, "chipsec driver is not loaded\n");
+			return -EOPNOTSUPP;
+		}
+		if (!adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops) {
+			dev_dbg(adap->pdev_dev,
+				"chipsec driver has no registered xfrmdev_ops\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+	default:
+		dev_dbg(adap->pdev_dev,
+			"driver has no support for offload %d\n", op_val);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+#endif /* CONFIG_CHELSIO_TLS_DEVICE || CONFIG_CHELSIO_IPSEC_INLINE */
+
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+
+static int cxgb4_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+			      enum tls_offload_ctx_dir direction,
+			      struct tls_crypto_info *crypto_info,
+			      u32 tcp_sn)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	int ret;
+
+	mutex_lock(&uld_mutex);
+	ret = chcr_offload_state(adap, CXGB4_TLSDEV_OPS);
+	if (ret)
+		goto out_unlock;
+
+	ret = cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_ENABLE);
+	if (ret)
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_add(netdev, sk,
+								direction,
+								crypto_info,
+								tcp_sn);
+	/* if there is a failure, clear the refcount */
+	if (ret)
+		cxgb4_set_ktls_feature(adap,
+				       FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
+out_unlock:
+	mutex_unlock(&uld_mutex);
+	return ret;
+}
+
+static void cxgb4_ktls_dev_del(struct net_device *netdev,
+			       struct tls_context *tls_ctx,
+			       enum tls_offload_ctx_dir direction)
+{
+	struct adapter *adap = netdev2adap(netdev);
+
+	mutex_lock(&uld_mutex);
+	if (chcr_offload_state(adap, CXGB4_TLSDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+							  direction);
+
+out_unlock:
+	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
+	mutex_unlock(&uld_mutex);
+}
+
+static const struct tlsdev_ops cxgb4_ktls_ops = {
+	.tls_dev_add = cxgb4_ktls_dev_add,
+	.tls_dev_del = cxgb4_ktls_dev_del,
+};
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+
+static int cxgb4_xfrm_add_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+	int ret;
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return -EBUSY;
+	}
+	ret = chcr_offload_state(adap, CXGB4_XFRMDEV_OPS);
+	if (ret)
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+
+	return ret;
+}
+
+static void cxgb4_xfrm_del_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static void cxgb4_xfrm_free_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static bool cxgb4_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+	bool ret = false;
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return ret;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_offload_ok(skb, x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+	return ret;
+}
+
+static void cxgb4_advance_esn_state(struct xfrm_state *x)
+{
+	struct adapter *adap = netdev2adap(x->xso.dev);
+
+	if (!mutex_trylock(&uld_mutex)) {
+		dev_dbg(adap->pdev_dev,
+			"crypto uld critical resource is under use\n");
+		return;
+	}
+	if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS))
+		goto out_unlock;
+
+	adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_advance_esn(x);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static const struct xfrmdev_ops cxgb4_xfrmdev_ops = {
+	.xdo_dev_state_add      = cxgb4_xfrm_add_state,
+	.xdo_dev_state_delete   = cxgb4_xfrm_del_state,
+	.xdo_dev_state_free     = cxgb4_xfrm_free_state,
+	.xdo_dev_offload_ok     = cxgb4_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = cxgb4_advance_esn_state,
+};
+
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *netdev;
@@ -6078,7 +6603,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	static int adap_idx = 1;
 	int s_qpp, qpp, num_seg;
 	struct port_info *pi;
-	bool highdma = false;
 	enum chip_type chip;
 	void __iomem *regs;
 	int func, chip_ver;
@@ -6086,8 +6610,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int i, err;
 	u32 whoami;
 
-	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
-
 	err = pci_request_regions(pdev, KBUILD_MODNAME);
 	if (err) {
 		/* Just info, some other driver may have claimed the device. */
@@ -6159,20 +6681,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return 0;
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		highdma = true;
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (err) {
-			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
-				"coherent allocations\n");
-			goto out_free_adapter;
-		}
-	} else {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "no usable DMA configuration\n");
-			goto out_free_adapter;
-		}
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "no usable DMA configuration\n");
+		goto out_free_adapter;
 	}
 
 	pci_enable_pcie_error_reporting(pdev);
@@ -6260,13 +6772,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	setup_memwin(adapter);
 	err = adap_init0(adapter, 0);
-#ifdef CONFIG_DEBUG_FS
-	bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
-#endif
-	setup_memwin_rdma(adapter);
 	if (err)
 		goto out_unmap_bar;
 
+	setup_memwin_rdma(adapter);
+
 	/* configure SGE_STAT_CFG_A to read WC stats */
 	if (!is_t4(adapter->params.chip))
 		t4_write_reg(adapter, SGE_STAT_CFG_A, STATSOURCE_T5_V(7) |
@@ -6303,7 +6813,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_GRO |
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
-			NETIF_F_HW_TC;
+			NETIF_F_HW_TC | NETIF_F_NTUPLE | NETIF_F_HIGHDMA;
 
 		if (chip_ver > CHELSIO_T5) {
 			netdev->hw_enc_features |= NETIF_F_IP_CSUM |
@@ -6316,12 +6826,28 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
 					       NETIF_F_GSO_UDP_TUNNEL_CSUM |
 					       NETIF_F_HW_TLS_RECORD;
+
+			if (adapter->rawf_cnt)
+				netdev->udp_tunnel_nic_info = &cxgb_udp_tunnels;
 		}
 
-		if (highdma)
-			netdev->hw_features |= NETIF_F_HIGHDMA;
 		netdev->features |= netdev->hw_features;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+		if (pi->adapter->params.crypto & FW_CAPS_CONFIG_TLS_HW) {
+			netdev->hw_features |= NETIF_F_HW_TLS_TX;
+			netdev->tlsdev_ops = &cxgb4_ktls_ops;
+			/* initialize the refcount */
+			refcount_set(&pi->adapter->chcr_ktls.ktls_refcount, 0);
+		}
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+		if (pi->adapter->params.crypto & FW_CAPS_CONFIG_IPSEC_INLINE) {
+			netdev->hw_enc_features |= NETIF_F_HW_ESP;
+			netdev->features |= NETIF_F_HW_ESP;
+			netdev->xfrmdev_ops = &cxgb4_xfrmdev_ops;
+		}
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
@@ -6419,6 +6945,24 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				 i);
 	}
 
+	if (is_offload(adapter) || is_hashfilter(adapter)) {
+		if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
+			u32 v;
+
+			v = t4_read_reg(adapter, LE_DB_HASH_CONFIG_A);
+			if (chip_ver <= CHELSIO_T5) {
+				adapter->tids.nhash = 1 << HASHTIDSIZE_G(v);
+				v = t4_read_reg(adapter, LE_DB_TID_HASHBASE_A);
+				adapter->tids.hash_base = v / 4;
+			} else {
+				adapter->tids.nhash = HASHTBLSIZE_G(v) << 3;
+				v = t4_read_reg(adapter,
+						T6_LE_DB_HASH_TID_BASE_A);
+				adapter->tids.hash_base = v;
+			}
+		}
+	}
+
 	if (tid_init(&adapter->tids) < 0) {
 		dev_warn(&pdev->dev, "could not allocate TID table, "
 			 "continuing\n");
@@ -6440,22 +6984,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		if (cxgb4_init_tc_matchall(adapter))
 			dev_warn(&pdev->dev,
 				 "could not offload tc matchall, continuing\n");
-	}
-
-	if (is_offload(adapter) || is_hashfilter(adapter)) {
-		if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
-			u32 hash_base, hash_reg;
-
-			if (chip_ver <= CHELSIO_T5) {
-				hash_reg = LE_DB_TID_HASHBASE_A;
-				hash_base = t4_read_reg(adapter, hash_reg);
-				adapter->tids.hash_base = hash_base / 4;
-			} else {
-				hash_reg = T6_LE_DB_HASH_TID_BASE_A;
-				hash_base = t4_read_reg(adapter, hash_reg);
-				adapter->tids.hash_base = hash_base;
-			}
-		}
+		if (cxgb4_init_ethtool_filters(adapter))
+			dev_warn(&pdev->dev,
+				 "could not initialize ethtool filters, continuing\n");
 	}
 
 	/* See what interrupts we'll be using */
@@ -6529,11 +7060,8 @@ fw_attach_fail:
 	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
 	pdev->needs_freset = 1;
 
-	if (is_uld(adapter)) {
-		mutex_lock(&uld_mutex);
-		list_add_tail(&adapter->list_node, &adapter_list);
-		mutex_unlock(&uld_mutex);
-	}
+	if (is_uld(adapter))
+		cxgb4_uld_enable(adapter);
 
 	if (!is_t4(adapter->params.chip))
 		cxgb4_ptp_init(adapter);
@@ -6596,10 +7124,13 @@ static void remove_one(struct pci_dev *pdev)
 		 */
 		destroy_workqueue(adapter->workq);
 
-		if (is_uld(adapter)) {
-			detach_ulds(adapter);
-			t4_uld_clean_up(adapter);
-		}
+		detach_ulds(adapter);
+
+		for_each_port(adapter, i)
+			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+				unregister_netdev(adapter->port[i]);
+
+		t4_uld_clean_up(adapter);
 
 		adap_free_hma_mem(adapter);
 
@@ -6607,10 +7138,6 @@ static void remove_one(struct pci_dev *pdev)
 
 		cxgb4_free_mps_ref_entries(adapter);
 
-		for_each_port(adapter, i)
-			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
-				unregister_netdev(adapter->port[i]);
-
 		debugfs_remove_recursive(adapter->debugfs_root);
 
 		if (!is_t4(adapter->params.chip))
@@ -6682,6 +7209,10 @@ static void shutdown_one(struct pci_dev *pdev)
 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 				cxgb_close(adapter->port[i]);
 
+		rtnl_lock();
+		cxgb4_mqprio_stop_offload(adapter);
+		rtnl_unlock();
+
 		if (is_uld(adapter)) {
 			detach_ulds(adapter);
 			t4_uld_clean_up(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
index b1a073eea60b..a020e8490681 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c
@@ -229,7 +229,7 @@ void cxgb4_free_mps_ref_entries(struct adapter *adap)
 {
 	struct mps_entries_ref *mps_entry, *tmp;
 
-	if (!list_empty(&adap->mps_ref))
+	if (list_empty(&adap->mps_ref))
 		return;
 
 	spin_lock(&adap->mps_ref_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 58a039c3224a..5bf117d2179f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
@@ -194,6 +194,7 @@ int cxgb4_ptp_redirect_rx_packet(struct adapter *adapter, struct port_info *pi)
 }
 
 /**
+ * cxgb4_ptp_adjfreq - Adjust frequency of PHC cycle counter
  * @ptp: ptp clock structure
  * @ppb: Desired frequency change in parts per billion
  *
@@ -229,7 +230,7 @@ static int cxgb4_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 /**
  * cxgb4_ptp_fineadjtime - Shift the time of the hardware clock
- * @ptp: ptp clock structure
+ * @adapter: board private structure
  * @delta: Desired change in nanoseconds
  *
  * Adjust the timer by resetting the timecounter structure.
@@ -246,6 +247,9 @@ static int  cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta)
 			     FW_PTP_CMD_PORTID_V(0));
 	c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16));
 	c.u.ts.sc = FW_PTP_SC_ADJ_FTIME;
+	c.u.ts.sign = (delta < 0) ? 1 : 0;
+	if (delta < 0)
+		delta = -delta;
 	c.u.ts.tm = cpu_to_be64(delta);
 
 	err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL);
@@ -308,32 +312,17 @@ static int cxgb4_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
  */
 static int cxgb4_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
-	struct adapter *adapter = (struct adapter *)container_of(ptp,
-				   struct adapter, ptp_clock_info);
-	struct fw_ptp_cmd c;
+	struct adapter *adapter = container_of(ptp, struct adapter,
+					       ptp_clock_info);
 	u64 ns;
-	int err;
 
-	memset(&c, 0, sizeof(c));
-	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) |
-				     FW_CMD_REQUEST_F |
-				     FW_CMD_READ_F |
-				     FW_PTP_CMD_PORTID_V(0));
-	c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16));
-	c.u.ts.sc = FW_PTP_SC_GET_TIME;
-
-	err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), &c);
-	if (err < 0) {
-		dev_err(adapter->pdev_dev,
-			"PTP: %s error %d\n", __func__, -err);
-		return err;
-	}
+	ns = t4_read_reg(adapter, T5_PORT_REG(0, MAC_PORT_PTP_SUM_LO_A));
+	ns |= (u64)t4_read_reg(adapter,
+			       T5_PORT_REG(0, MAC_PORT_PTP_SUM_HI_A)) << 32;
 
 	/* convert to timespec*/
-	ns = be64_to_cpu(c.u.ts.tm);
 	*ts = ns_to_timespec64(ns);
-
-	return err;
+	return 0;
 }
 
 /**
@@ -457,7 +446,7 @@ void cxgb4_ptp_init(struct adapter *adapter)
 }
 
 /**
- * cxgb4_ptp_remove - disable PTP device and stop the overflow check
+ * cxgb4_ptp_stop - disable PTP device and stop the overflow check
  * @adapter: board private structure
  *
  * Stop the PTP support.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index bb5513bdd293..dd9be229819a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -58,12 +58,91 @@ static struct ch_tc_pedit_fields pedits[] = {
 	PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4),
 	PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8),
 	PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
-	PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0),
-	PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0),
-	PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0),
-	PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0),
 };
 
+static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
+	/* Default supported NAT modes */
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_NONE,
+		.natmode = NAT_MODE_NONE,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP,
+		.natmode = NAT_MODE_DIP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
+		.natmode = NAT_MODE_DIP_DP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_DIP_DP_SIP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_DP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T5,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+			 CXGB4_ACTION_NATMODE_DPORT |
+			 CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_ALL,
+	},
+	/* T6+ can ignore L4 ports when they're disabled. */
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_SIP_SP,
+	},
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
+		.natmode = NAT_MODE_DIP_DP_SP,
+	},
+	{
+		.chip = CHELSIO_T6,
+		.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
+		.natmode = NAT_MODE_ALL,
+	},
+};
+
+static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
+				       u8 natmode_flags)
+{
+	u8 i = 0;
+
+	/* Translate the enabled NAT 4-tuple fields to one of the
+	 * hardware supported NAT mode configurations. This ensures
+	 * that we pick a valid combination, where the disabled fields
+	 * do not get overwritten to 0.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+		if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
+			fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
+			return;
+		}
+	}
+}
+
 static struct ch_tc_flower_entry *allocate_flower_entry(void)
 {
 	struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -81,10 +160,9 @@ static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap,
 }
 
 static void cxgb4_process_flow_match(struct net_device *dev,
-				     struct flow_cls_offload *cls,
+				     struct flow_rule *rule,
 				     struct ch_filter_specification *fs)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	u16 addr_type = 0;
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -92,6 +170,10 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 
 		flow_rule_match_control(rule, &match);
 		addr_type = match.key->addr_type;
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -156,14 +238,14 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 		struct flow_match_ports match;
 
 		flow_rule_match_ports(rule, &match);
-		fs->val.lport = cpu_to_be16(match.key->dst);
-		fs->mask.lport = cpu_to_be16(match.mask->dst);
-		fs->val.fport = cpu_to_be16(match.key->src);
-		fs->mask.fport = cpu_to_be16(match.mask->src);
+		fs->val.lport = be16_to_cpu(match.key->dst);
+		fs->mask.lport = be16_to_cpu(match.mask->dst);
+		fs->val.fport = be16_to_cpu(match.key->src);
+		fs->mask.fport = be16_to_cpu(match.mask->src);
 
 		/* also initialize nat_lport/fport to same values */
-		fs->nat_lport = cpu_to_be16(match.key->dst);
-		fs->nat_fport = cpu_to_be16(match.key->src);
+		fs->nat_lport = fs->val.lport;
+		fs->nat_fport = fs->val.fport;
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
@@ -224,9 +306,8 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 }
 
 static int cxgb4_validate_flow_match(struct net_device *dev,
-				     struct flow_cls_offload *cls)
+				     struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 ethtype_mask = 0;
 	u16 ethtype_key = 0;
@@ -291,7 +372,8 @@ static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
 }
 
 static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
-				u32 mask, u32 offset, u8 htype)
+				u32 mask, u32 offset, u8 htype,
+				u8 *natmode_flags)
 {
 	switch (htype) {
 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@@ -316,66 +398,94 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
 		switch (offset) {
 		case PEDIT_IP4_SRC:
 			offload_pedit(fs, val, mask, IP4_SRC);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP4_DST:
 			offload_pedit(fs, val, mask, IP4_DST);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 		switch (offset) {
 		case PEDIT_IP6_SRC_31_0:
 			offload_pedit(fs, val, mask, IP6_SRC_31_0);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_63_32:
 			offload_pedit(fs, val, mask, IP6_SRC_63_32);
+			*natmode_flags |=  CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_95_64:
 			offload_pedit(fs, val, mask, IP6_SRC_95_64);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_SRC_127_96:
 			offload_pedit(fs, val, mask, IP6_SRC_127_96);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
 			break;
 		case PEDIT_IP6_DST_31_0:
 			offload_pedit(fs, val, mask, IP6_DST_31_0);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_63_32:
 			offload_pedit(fs, val, mask, IP6_DST_63_32);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_95_64:
 			offload_pedit(fs, val, mask, IP6_DST_95_64);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		case PEDIT_IP6_DST_127_96:
 			offload_pedit(fs, val, mask, IP6_DST_127_96);
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 		switch (offset) {
 		case PEDIT_TCP_SPORT_DPORT:
-			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
-				offload_pedit(fs, cpu_to_be32(val) >> 16,
-					      cpu_to_be32(mask) >> 16,
-					      TCP_SPORT);
-			else
-				offload_pedit(fs, cpu_to_be32(val),
-					      cpu_to_be32(mask), TCP_DPORT);
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
+				fs->nat_fport = val;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			} else {
+				fs->nat_lport = val >> 16;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+			}
 		}
-		fs->nat_mode = NAT_MODE_ALL;
 		break;
 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 		switch (offset) {
 		case PEDIT_UDP_SPORT_DPORT:
-			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
-				offload_pedit(fs, cpu_to_be32(val) >> 16,
-					      cpu_to_be32(mask) >> 16,
-					      UDP_SPORT);
-			else
-				offload_pedit(fs, cpu_to_be32(val),
-					      cpu_to_be32(mask), UDP_DPORT);
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
+				fs->nat_fport = val;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			} else {
+				fs->nat_lport = val >> 16;
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+			}
 		}
-		fs->nat_mode = NAT_MODE_ALL;
+		break;
+	}
+}
+
+static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
+					 struct netlink_ext_ack *extack)
+{
+	u8 i = 0;
+
+	/* Extract the NAT mode to enable based on what 4-tuple fields
+	 * are enabled to be overwritten. This ensures that the
+	 * disabled fields don't get overwritten to 0.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+		const struct cxgb4_natmode_config *c;
+
+		c = &cxgb4_natmode_config_array[i];
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
+		    natmode_flags == c->flags)
+			return 0;
 	}
+	NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
+	return -EOPNOTSUPP;
 }
 
 void cxgb4_process_flow_actions(struct net_device *in,
@@ -383,6 +493,7 @@ void cxgb4_process_flow_actions(struct net_device *in,
 				struct ch_filter_specification *fs)
 {
 	struct flow_action_entry *act;
+	u8 natmode_flags = 0;
 	int i;
 
 	flow_action_for_each(i, act, actions) {
@@ -393,6 +504,7 @@ void cxgb4_process_flow_actions(struct net_device *in,
 		case FLOW_ACTION_DROP:
 			fs->action = FILTER_DROP;
 			break;
+		case FLOW_ACTION_MIRRED:
 		case FLOW_ACTION_REDIRECT: {
 			struct net_device *out = act->dev;
 			struct port_info *pi = netdev_priv(out);
@@ -433,13 +545,22 @@ void cxgb4_process_flow_actions(struct net_device *in,
 			val = act->mangle.val;
 			offset = act->mangle.offset;
 
-			process_pedit_field(fs, val, mask, offset, htype);
+			process_pedit_field(fs, val, mask, offset, htype,
+					    &natmode_flags);
 			}
 			break;
+		case FLOW_ACTION_QUEUE:
+			fs->action = FILTER_PASS;
+			fs->dirsteer = 1;
+			fs->iq = act->queue.index;
+			break;
 		default:
 			break;
 		}
 	}
+	if (natmode_flags)
+		cxgb4_action_natmode_tweak(fs, natmode_flags);
+
 }
 
 static bool valid_l4_mask(u32 mask)
@@ -456,7 +577,8 @@ static bool valid_l4_mask(u32 mask)
 }
 
 static bool valid_pedit_action(struct net_device *dev,
-			       const struct flow_action_entry *act)
+			       const struct flow_action_entry *act,
+			       u8 *natmode_flags)
 {
 	u32 mask, offset;
 	u8 htype;
@@ -481,7 +603,10 @@ static bool valid_pedit_action(struct net_device *dev,
 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 		switch (offset) {
 		case PEDIT_IP4_SRC:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+			break;
 		case PEDIT_IP4_DST:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -495,10 +620,13 @@ static bool valid_pedit_action(struct net_device *dev,
 		case PEDIT_IP6_SRC_63_32:
 		case PEDIT_IP6_SRC_95_64:
 		case PEDIT_IP6_SRC_127_96:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+			break;
 		case PEDIT_IP6_DST_31_0:
 		case PEDIT_IP6_DST_63_32:
 		case PEDIT_IP6_DST_95_64:
 		case PEDIT_IP6_DST_127_96:
+			*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -514,6 +642,10 @@ static bool valid_pedit_action(struct net_device *dev,
 					   __func__);
 				return false;
 			}
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -529,6 +661,10 @@ static bool valid_pedit_action(struct net_device *dev,
 					   __func__);
 				return false;
 			}
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+			else
+				*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
 			break;
 		default:
 			netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -544,25 +680,39 @@ static bool valid_pedit_action(struct net_device *dev,
 }
 
 int cxgb4_validate_flow_actions(struct net_device *dev,
-				struct flow_action *actions)
+				struct flow_action *actions,
+				struct netlink_ext_ack *extack,
+				u8 matchall_filter)
 {
+	struct adapter *adap = netdev2adap(dev);
 	struct flow_action_entry *act;
 	bool act_redir = false;
 	bool act_pedit = false;
 	bool act_vlan = false;
+	u8 natmode_flags = 0;
 	int i;
 
+	if (!flow_action_basic_hw_stats_check(actions, extack))
+		return -EOPNOTSUPP;
+
 	flow_action_for_each(i, act, actions) {
 		switch (act->id) {
 		case FLOW_ACTION_ACCEPT:
 		case FLOW_ACTION_DROP:
 			/* Do nothing */
 			break;
+		case FLOW_ACTION_MIRRED:
 		case FLOW_ACTION_REDIRECT: {
-			struct adapter *adap = netdev2adap(dev);
 			struct net_device *n_dev, *target_dev;
-			unsigned int i;
 			bool found = false;
+			unsigned int i;
+
+			if (act->id == FLOW_ACTION_MIRRED &&
+			    !matchall_filter) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Egress mirror action is only supported for tc-matchall");
+				return -EOPNOTSUPP;
+			}
 
 			target_dev = act->dev;
 			for_each_port(adap, i) {
@@ -609,13 +759,17 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 			}
 			break;
 		case FLOW_ACTION_MANGLE: {
-			bool pedit_valid = valid_pedit_action(dev, act);
+			bool pedit_valid = valid_pedit_action(dev, act,
+							      &natmode_flags);
 
 			if (!pedit_valid)
 				return -EOPNOTSUPP;
 			act_pedit = true;
 			}
 			break;
+		case FLOW_ACTION_QUEUE:
+			/* Do nothing. cxgb4_set_filter will validate */
+			break;
 		default:
 			netdev_err(dev, "%s: Unsupported action\n", __func__);
 			return -EOPNOTSUPP;
@@ -628,96 +782,173 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
 		return -EINVAL;
 	}
 
+	if (act_pedit) {
+		int ret;
+
+		ret = cxgb4_action_natmode_validate(adap, natmode_flags,
+						    extack);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
-int cxgb4_tc_flower_replace(struct net_device *dev,
-			    struct flow_cls_offload *cls)
+static void cxgb4_tc_flower_hash_prio_add(struct adapter *adap, u32 tc_prio)
+{
+	spin_lock_bh(&adap->tids.ftid_lock);
+	if (adap->tids.tc_hash_tids_max_prio < tc_prio)
+		adap->tids.tc_hash_tids_max_prio = tc_prio;
+	spin_unlock_bh(&adap->tids.ftid_lock);
+}
+
+static void cxgb4_tc_flower_hash_prio_del(struct adapter *adap, u32 tc_prio)
+{
+	struct tid_info *t = &adap->tids;
+	struct ch_tc_flower_entry *fe;
+	struct rhashtable_iter iter;
+	u32 found = 0;
+
+	spin_lock_bh(&t->ftid_lock);
+	/* Bail if the current rule is not the one with the max
+	 * prio.
+	 */
+	if (t->tc_hash_tids_max_prio != tc_prio)
+		goto out_unlock;
+
+	/* Search for the next rule having the same or next lower
+	 * max prio.
+	 */
+	rhashtable_walk_enter(&adap->flower_tbl, &iter);
+	do {
+		rhashtable_walk_start(&iter);
+
+		fe = rhashtable_walk_next(&iter);
+		while (!IS_ERR_OR_NULL(fe)) {
+			if (fe->fs.hash &&
+			    fe->fs.tc_prio <= t->tc_hash_tids_max_prio) {
+				t->tc_hash_tids_max_prio = fe->fs.tc_prio;
+				found++;
+
+				/* Bail if we found another rule
+				 * having the same prio as the
+				 * current max one.
+				 */
+				if (fe->fs.tc_prio == tc_prio)
+					break;
+			}
+
+			fe = rhashtable_walk_next(&iter);
+		}
+
+		rhashtable_walk_stop(&iter);
+	} while (fe == ERR_PTR(-EAGAIN));
+	rhashtable_walk_exit(&iter);
+
+	if (!found)
+		t->tc_hash_tids_max_prio = 0;
+
+out_unlock:
+	spin_unlock_bh(&t->ftid_lock);
+}
+
+int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule,
+			    u32 tc_prio, struct netlink_ext_ack *extack,
+			    struct ch_filter_specification *fs, u32 *tid)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
-	struct netlink_ext_ack *extack = cls->common.extack;
 	struct adapter *adap = netdev2adap(dev);
-	struct ch_tc_flower_entry *ch_flower;
-	struct ch_filter_specification *fs;
 	struct filter_ctx ctx;
+	u8 inet_family;
 	int fidx, ret;
 
-	if (cxgb4_validate_flow_actions(dev, &rule->action))
+	if (cxgb4_validate_flow_actions(dev, &rule->action, extack, 0))
 		return -EOPNOTSUPP;
 
-	if (cxgb4_validate_flow_match(dev, cls))
+	if (cxgb4_validate_flow_match(dev, rule))
 		return -EOPNOTSUPP;
 
-	ch_flower = allocate_flower_entry();
-	if (!ch_flower) {
-		netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__);
-		return -ENOMEM;
-	}
-
-	fs = &ch_flower->fs;
-	fs->hitcnts = 1;
-	cxgb4_process_flow_match(dev, cls, fs);
+	cxgb4_process_flow_match(dev, rule, fs);
 	cxgb4_process_flow_actions(dev, &rule->action, fs);
 
 	fs->hash = is_filter_exact_match(adap, fs);
-	if (fs->hash) {
-		fidx = 0;
-	} else {
-		u8 inet_family;
-
-		inet_family = fs->type ? PF_INET6 : PF_INET;
+	inet_family = fs->type ? PF_INET6 : PF_INET;
 
-		/* Note that TC uses prio 0 to indicate stack to
-		 * generate automatic prio and hence doesn't pass prio
-		 * 0 to driver. However, the hardware TCAM index
-		 * starts from 0. Hence, the -1 here.
-		 */
-		if (cls->common.prio <= (adap->tids.nftids +
-					 adap->tids.nhpftids)) {
-			fidx = cls->common.prio - 1;
-			if (fidx < adap->tids.nhpftids)
-				fs->prio = 1;
-		} else {
-			fidx = cxgb4_get_free_ftid(dev, inet_family);
-		}
+	/* Get a free filter entry TID, where we can insert this new
+	 * rule. Only insert rule if its prio doesn't conflict with
+	 * existing rules.
+	 */
+	fidx = cxgb4_get_free_ftid(dev, inet_family, fs->hash,
+				   tc_prio);
+	if (fidx < 0) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "No free LETCAM index available");
+		return -ENOMEM;
+	}
 
-		/* Only insert FLOWER rule if its priority doesn't
-		 * conflict with existing rules in the LETCAM.
-		 */
-		if (fidx < 0 ||
-		    !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "No free LETCAM index available");
-			ret = -ENOMEM;
-			goto free_entry;
-		}
+	if (fidx < adap->tids.nhpftids) {
+		fs->prio = 1;
+		fs->hash = 0;
 	}
 
-	fs->tc_prio = cls->common.prio;
-	fs->tc_cookie = cls->cookie;
+	/* If the rule can be inserted into HASH region, then ignore
+	 * the index to normal FILTER region.
+	 */
+	if (fs->hash)
+		fidx = 0;
+
+	fs->tc_prio = tc_prio;
 
 	init_completion(&ctx.completion);
 	ret = __cxgb4_set_filter(dev, fidx, fs, &ctx);
 	if (ret) {
 		netdev_err(dev, "%s: filter creation err %d\n",
 			   __func__, ret);
-		goto free_entry;
+		return ret;
 	}
 
 	/* Wait for reply */
 	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
-	if (!ret) {
-		ret = -ETIMEDOUT;
-		goto free_entry;
-	}
+	if (!ret)
+		return -ETIMEDOUT;
 
-	ret = ctx.result;
 	/* Check if hw returned error for filter creation */
+	if (ctx.result)
+		return ctx.result;
+
+	*tid = ctx.tid;
+
+	if (fs->hash)
+		cxgb4_tc_flower_hash_prio_add(adap, tc_prio);
+
+	return 0;
+}
+
+int cxgb4_tc_flower_replace(struct net_device *dev,
+			    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_tc_flower_entry *ch_flower;
+	struct ch_filter_specification *fs;
+	int ret;
+
+	ch_flower = allocate_flower_entry();
+	if (!ch_flower) {
+		netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__);
+		return -ENOMEM;
+	}
+
+	fs = &ch_flower->fs;
+	fs->hitcnts = 1;
+	fs->tc_cookie = cls->cookie;
+
+	ret = cxgb4_flow_rule_replace(dev, rule, cls->common.prio, extack, fs,
+				      &ch_flower->filter_id);
 	if (ret)
 		goto free_entry;
 
 	ch_flower->tc_flower_cookie = cls->cookie;
-	ch_flower->filter_id = ctx.tid;
 	ret = rhashtable_insert_fast(&adap->flower_tbl, &ch_flower->node,
 				     adap->flower_ht_params);
 	if (ret)
@@ -726,6 +957,9 @@ int cxgb4_tc_flower_replace(struct net_device *dev,
 	return 0;
 
 del_filter:
+	if (fs->hash)
+		cxgb4_tc_flower_hash_prio_del(adap, cls->common.prio);
+
 	cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs);
 
 free_entry:
@@ -733,6 +967,25 @@ free_entry:
 	return ret;
 }
 
+int cxgb4_flow_rule_destroy(struct net_device *dev, u32 tc_prio,
+			    struct ch_filter_specification *fs, int tid)
+{
+	struct adapter *adap = netdev2adap(dev);
+	u8 hash;
+	int ret;
+
+	hash = fs->hash;
+
+	ret = cxgb4_del_filter(dev, tid, fs);
+	if (ret)
+		return ret;
+
+	if (hash)
+		cxgb4_tc_flower_hash_prio_del(adap, tc_prio);
+
+	return ret;
+}
+
 int cxgb4_tc_flower_destroy(struct net_device *dev,
 			    struct flow_cls_offload *cls)
 {
@@ -744,19 +997,16 @@ int cxgb4_tc_flower_destroy(struct net_device *dev,
 	if (!ch_flower)
 		return -ENOENT;
 
-	ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs);
+	rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
+			       adap->flower_ht_params);
+
+	ret = cxgb4_flow_rule_destroy(dev, ch_flower->fs.tc_prio,
+				      &ch_flower->fs, ch_flower->filter_id);
 	if (ret)
-		goto err;
+		netdev_err(dev, "Flow rule destroy failed for tid: %u, ret: %d",
+			   ch_flower->filter_id, ret);
 
-	ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
-				     adap->flower_ht_params);
-	if (ret) {
-		netdev_err(dev, "Flow remove from rhashtable failed");
-		goto err;
-	}
 	kfree_rcu(ch_flower, rcu);
-
-err:
 	return ret;
 }
 
@@ -835,8 +1085,9 @@ int cxgb4_tc_flower_stats(struct net_device *dev,
 		if (ofld_stats->prev_packet_count != packets)
 			ofld_stats->last_used = jiffies;
 		flow_stats_update(&cls->stats, bytes - ofld_stats->byte_count,
-				  packets - ofld_stats->packet_count,
-				  ofld_stats->last_used);
+				  packets - ofld_stats->packet_count, 0,
+				  ofld_stats->last_used,
+				  FLOW_ACTION_HW_STATS_IMMEDIATE);
 
 		ofld_stats->packet_count = packets;
 		ofld_stats->byte_count = bytes;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index e132516e9868..3a2fa00c8cde 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -108,11 +108,28 @@ struct ch_tc_pedit_fields {
 #define PEDIT_TCP_SPORT_DPORT		0x0
 #define PEDIT_UDP_SPORT_DPORT		0x0
 
+enum cxgb4_action_natmode_flags {
+	CXGB4_ACTION_NATMODE_NONE = 0,
+	CXGB4_ACTION_NATMODE_DIP = (1 << 0),
+	CXGB4_ACTION_NATMODE_SIP = (1 << 1),
+	CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
+	CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
+};
+
+/* TC PEDIT action to NATMODE translation entry */
+struct cxgb4_natmode_config {
+	enum chip_type chip;
+	u8 flags;
+	u8 natmode;
+};
+
 void cxgb4_process_flow_actions(struct net_device *in,
 				struct flow_action *actions,
 				struct ch_filter_specification *fs);
 int cxgb4_validate_flow_actions(struct net_device *dev,
-				struct flow_action *actions);
+				struct flow_action *actions,
+				struct netlink_ext_ack *extack,
+				u8 matchall_filter);
 
 int cxgb4_tc_flower_replace(struct net_device *dev,
 			    struct flow_cls_offload *cls);
@@ -120,6 +137,11 @@ int cxgb4_tc_flower_destroy(struct net_device *dev,
 			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_stats(struct net_device *dev,
 			  struct flow_cls_offload *cls);
+int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule,
+			    u32 tc_prio, struct netlink_ext_ack *extack,
+			    struct ch_filter_specification *fs, u32 *tid);
+int cxgb4_flow_rule_destroy(struct net_device *dev, u32 tc_prio,
+			    struct ch_filter_specification *fs, int tid);
 
 int cxgb4_init_tc_flower(struct adapter *adap);
 void cxgb4_cleanup_tc_flower(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
index 1b7681a4eb32..1672d3afe5be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -8,6 +8,46 @@
 #include "cxgb4_filter.h"
 #include "cxgb4_tc_flower.h"
 
+static int cxgb4_policer_validate(const struct flow_action *action,
+				  const struct flow_action_entry *act,
+				  struct netlink_ext_ack *extack)
+{
+	if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Offload not supported when exceed action is not drop");
+		return -EOPNOTSUPP;
+	}
+
+	if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+	    act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Offload not supported when conform action is not pipe or ok");
+		return -EOPNOTSUPP;
+	}
+
+	if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+	    !flow_action_is_last_entry(action, act)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Offload not supported when conform action is ok, but action is not last");
+		return -EOPNOTSUPP;
+	}
+
+	if (act->police.peakrate_bytes_ps ||
+	    act->police.avrate || act->police.overhead) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Offload not supported when peakrate/avrate/overhead is configured");
+		return -EOPNOTSUPP;
+	}
+
+	if (act->police.rate_pkt_ps) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "QoS offload not support packets per second");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static int cxgb4_matchall_egress_validate(struct net_device *dev,
 					  struct tc_cls_matchall_offload *cls)
 {
@@ -48,6 +88,10 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev,
 	flow_action_for_each(i, entry, actions) {
 		switch (entry->id) {
 		case FLOW_ACTION_POLICE:
+			ret = cxgb4_policer_validate(actions, entry, extack);
+			if (ret)
+				return ret;
+
 			/* Convert bytes per second to bits per second */
 			if (entry->police.rate_bytes_ps * 8 > max_link_rate) {
 				NL_SET_ERR_MSG_MOD(extack,
@@ -146,6 +190,10 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev,
 		if (entry->id == FLOW_ACTION_POLICE)
 			break;
 
+	ret = cxgb4_policer_validate(&cls->rule->action, entry, extack);
+	if (ret)
+		return ret;
+
 	/* Convert from bytes per second to Kbps */
 	p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000);
 	p.u.params.channel = pi->tx_chan;
@@ -188,45 +236,98 @@ static void cxgb4_matchall_free_tc(struct net_device *dev)
 	tc_port_matchall->egress.state = CXGB4_MATCHALL_STATE_DISABLED;
 }
 
-static int cxgb4_matchall_alloc_filter(struct net_device *dev,
+static int cxgb4_matchall_mirror_alloc(struct net_device *dev,
 				       struct tc_cls_matchall_offload *cls)
 {
 	struct netlink_ext_ack *extack = cls->common.extack;
 	struct cxgb4_tc_port_matchall *tc_port_matchall;
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
+	struct flow_action_entry *act;
+	int ret;
+	u32 i;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	flow_action_for_each(i, act, &cls->rule->action) {
+		if (act->id == FLOW_ACTION_MIRRED) {
+			ret = cxgb4_port_mirror_alloc(dev);
+			if (ret) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Couldn't allocate mirror");
+				return ret;
+			}
+
+			tc_port_matchall->ingress.viid_mirror = pi->viid_mirror;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void cxgb4_matchall_mirror_free(struct net_device *dev)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	if (!tc_port_matchall->ingress.viid_mirror)
+		return;
+
+	cxgb4_port_mirror_free(dev);
+	tc_port_matchall->ingress.viid_mirror = 0;
+}
+
+static int cxgb4_matchall_del_filter(struct net_device *dev, u8 filter_type)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	ret = cxgb4_del_filter(dev, tc_port_matchall->ingress.tid[filter_type],
+			       &tc_port_matchall->ingress.fs[filter_type]);
+	if (ret)
+		return ret;
+
+	tc_port_matchall->ingress.tid[filter_type] = 0;
+	return 0;
+}
+
+static int cxgb4_matchall_add_filter(struct net_device *dev,
+				     struct tc_cls_matchall_offload *cls,
+				     u8 filter_type)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
 	struct ch_filter_specification *fs;
 	int ret, fidx;
 
-	/* Note that TC uses prio 0 to indicate stack to generate
-	 * automatic prio and hence doesn't pass prio 0 to driver.
-	 * However, the hardware TCAM index starts from 0. Hence, the
-	 * -1 here. 1 slot is enough to create a wildcard matchall
-	 * VIID rule.
+	/* Get a free filter entry TID, where we can insert this new
+	 * rule. Only insert rule if its prio doesn't conflict with
+	 * existing rules.
 	 */
-	if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids))
-		fidx = cls->common.prio - 1;
-	else
-		fidx = cxgb4_get_free_ftid(dev, PF_INET);
-
-	/* Only insert MATCHALL rule if its priority doesn't conflict
-	 * with existing rules in the LETCAM.
-	 */
-	if (fidx < 0 ||
-	    !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) {
+	fidx = cxgb4_get_free_ftid(dev, filter_type ? PF_INET6 : PF_INET,
+				   false, cls->common.prio);
+	if (fidx < 0) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "No free LETCAM index available");
 		return -ENOMEM;
 	}
 
 	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
-	fs = &tc_port_matchall->ingress.fs;
+	fs = &tc_port_matchall->ingress.fs[filter_type];
 	memset(fs, 0, sizeof(*fs));
 
 	if (fidx < adap->tids.nhpftids)
 		fs->prio = 1;
 	fs->tc_prio = cls->common.prio;
 	fs->tc_cookie = cls->cookie;
+	fs->type = filter_type;
 	fs->hitcnts = 1;
 
 	fs->val.pfvf_vld = 1;
@@ -239,9 +340,39 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev,
 	if (ret)
 		return ret;
 
-	tc_port_matchall->ingress.tid = fidx;
+	tc_port_matchall->ingress.tid[filter_type] = fidx;
+	return 0;
+}
+
+static int cxgb4_matchall_alloc_filter(struct net_device *dev,
+				       struct tc_cls_matchall_offload *cls)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret, i;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+
+	ret = cxgb4_matchall_mirror_alloc(dev, cls);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < CXGB4_FILTER_TYPE_MAX; i++) {
+		ret = cxgb4_matchall_add_filter(dev, cls, i);
+		if (ret)
+			goto out_free;
+	}
+
 	tc_port_matchall->ingress.state = CXGB4_MATCHALL_STATE_ENABLED;
 	return 0;
+
+out_free:
+	while (i-- > 0)
+		cxgb4_matchall_del_filter(dev, i);
+
+	cxgb4_matchall_mirror_free(dev);
+	return ret;
 }
 
 static int cxgb4_matchall_free_filter(struct net_device *dev)
@@ -250,18 +381,21 @@ static int cxgb4_matchall_free_filter(struct net_device *dev)
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
 	int ret;
+	u8 i;
 
 	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
 
-	ret = cxgb4_del_filter(dev, tc_port_matchall->ingress.tid,
-			       &tc_port_matchall->ingress.fs);
-	if (ret)
-		return ret;
+	for (i = 0; i < CXGB4_FILTER_TYPE_MAX; i++) {
+		ret = cxgb4_matchall_del_filter(dev, i);
+		if (ret)
+			return ret;
+	}
+
+	cxgb4_matchall_mirror_free(dev);
 
 	tc_port_matchall->ingress.packets = 0;
 	tc_port_matchall->ingress.bytes = 0;
 	tc_port_matchall->ingress.last_used = 0;
-	tc_port_matchall->ingress.tid = 0;
 	tc_port_matchall->ingress.state = CXGB4_MATCHALL_STATE_DISABLED;
 	return 0;
 }
@@ -286,7 +420,8 @@ int cxgb4_tc_matchall_replace(struct net_device *dev,
 		}
 
 		ret = cxgb4_validate_flow_actions(dev,
-						  &cls_matchall->rule->action);
+						  &cls_matchall->rule->action,
+						  extack, 1);
 		if (ret)
 			return ret;
 
@@ -316,8 +451,12 @@ int cxgb4_tc_matchall_destroy(struct net_device *dev,
 
 	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
 	if (ingress) {
+		/* All the filter types of this matchall rule save the
+		 * same cookie. So, checking for the first one is
+		 * enough.
+		 */
 		if (cls_matchall->cookie !=
-		    tc_port_matchall->ingress.fs.tc_cookie)
+		    tc_port_matchall->ingress.fs[0].tc_cookie)
 			return -ENOENT;
 
 		return cxgb4_matchall_free_filter(dev);
@@ -333,27 +472,36 @@ int cxgb4_tc_matchall_destroy(struct net_device *dev,
 int cxgb4_tc_matchall_stats(struct net_device *dev,
 			    struct tc_cls_matchall_offload *cls_matchall)
 {
+	u64 tmp_packets, tmp_bytes, packets = 0, bytes = 0;
 	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct cxgb4_matchall_ingress_entry *ingress;
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
-	u64 packets, bytes;
 	int ret;
+	u8 i;
 
 	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
 	if (tc_port_matchall->ingress.state == CXGB4_MATCHALL_STATE_DISABLED)
 		return -ENOENT;
 
-	ret = cxgb4_get_filter_counters(dev, tc_port_matchall->ingress.tid,
-					&packets, &bytes,
-					tc_port_matchall->ingress.fs.hash);
-	if (ret)
-		return ret;
+	ingress = &tc_port_matchall->ingress;
+	for (i = 0; i < CXGB4_FILTER_TYPE_MAX; i++) {
+		ret = cxgb4_get_filter_counters(dev, ingress->tid[i],
+						&tmp_packets, &tmp_bytes,
+						ingress->fs[i].hash);
+		if (ret)
+			return ret;
+
+		packets += tmp_packets;
+		bytes += tmp_bytes;
+	}
 
 	if (tc_port_matchall->ingress.packets != packets) {
 		flow_stats_update(&cls_matchall->stats,
 				  bytes - tc_port_matchall->ingress.bytes,
 				  packets - tc_port_matchall->ingress.packets,
-				  tc_port_matchall->ingress.last_used);
+				  0, tc_port_matchall->ingress.last_used,
+				  FLOW_ACTION_HW_STATS_IMMEDIATE);
 
 		tc_port_matchall->ingress.packets = packets;
 		tc_port_matchall->ingress.bytes = bytes;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h
index ab6b5683dfd3..fe7ec423a4c9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h
@@ -19,8 +19,10 @@ struct cxgb4_matchall_egress_entry {
 
 struct cxgb4_matchall_ingress_entry {
 	enum cxgb4_matchall_state state; /* Current MATCHALL offload state */
-	u32 tid; /* Index to hardware filter entry */
-	struct ch_filter_specification fs; /* Filter entry */
+	u32 tid[CXGB4_FILTER_TYPE_MAX]; /* Index to hardware filter entries */
+	/* Filter entries */
+	struct ch_filter_specification fs[CXGB4_FILTER_TYPE_MAX];
+	u16 viid_mirror; /* Identifier for allocated Mirror VI */
 	u64 bytes; /* # of bytes hitting the filter */
 	u64 packets; /* # of packets hitting the filter */
 	u64 last_used; /* Last updated jiffies time */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index ec3eb45ee3b4..338b04f339b3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -114,8 +114,7 @@ static int cxgb4_init_eosw_txq(struct net_device *dev,
 	eosw_txq->cred = adap->params.ofldq_wr_cred;
 	eosw_txq->hwqid = hwqid;
 	eosw_txq->netdev = dev;
-	tasklet_init(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart,
-		     (unsigned long)eosw_txq);
+	tasklet_setup(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart);
 	return 0;
 }
 
@@ -301,6 +300,7 @@ static void cxgb4_mqprio_free_hw_resources(struct net_device *dev)
 			cxgb4_clear_msix_aff(eorxq->msix->vec,
 					     eorxq->msix->aff_mask);
 			free_irq(eorxq->msix->vec, &eorxq->rspq);
+			cxgb4_free_msix_idx_in_bmap(adap, eorxq->msix->idx);
 		}
 
 		free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl);
@@ -341,6 +341,13 @@ static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
 		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
 		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
 
+		/* Request larger burst buffer for smaller MTU, so
+		 * that hardware can work on more data per burst
+		 * cycle.
+		 */
+		if (dev->mtu <= ETH_DATA_LEN)
+			p.u.params.burstsize = 8 * dev->mtu;
+
 		e = cxgb4_sched_class_alloc(dev, &p);
 		if (!e) {
 			ret = -ENOMEM;
@@ -566,6 +573,7 @@ static void cxgb4_mqprio_disable_offload(struct net_device *dev)
 int cxgb4_setup_tc_mqprio(struct net_device *dev,
 			  struct tc_mqprio_qopt_offload *mqprio)
 {
+	struct adapter *adap = netdev2adap(dev);
 	bool needs_bring_up = false;
 	int ret;
 
@@ -573,13 +581,16 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	if (ret)
 		return ret;
 
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+
 	/* To configure tc params, the current allocated EOTIDs must
 	 * be freed up. However, they can't be freed up if there's
 	 * traffic running on the interface. So, ensure interface is
 	 * down before configuring tc params.
 	 */
 	if (netif_running(dev)) {
-		cxgb_close(dev);
+		netif_tx_stop_all_queues(dev);
+		netif_carrier_off(dev);
 		needs_bring_up = true;
 	}
 
@@ -605,12 +616,39 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	}
 
 out:
-	if (needs_bring_up)
-		cxgb_open(dev);
+	if (needs_bring_up) {
+		netif_tx_start_all_queues(dev);
+		netif_carrier_on(dev);
+	}
 
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 	return ret;
 }
 
+void cxgb4_mqprio_stop_offload(struct adapter *adap)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct net_device *dev;
+	u8 i;
+
+	if (!adap->tc_mqprio || !adap->tc_mqprio->port_mqprio)
+		return;
+
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+	for_each_port(adap, i) {
+		dev = adap->port[i];
+		if (!dev)
+			continue;
+
+		tc_port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+		if (!tc_port_mqprio->mqprio.qopt.num_tc)
+			continue;
+
+		cxgb4_mqprio_disable_offload(dev);
+	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+}
+
 int cxgb4_init_tc_mqprio(struct adapter *adap)
 {
 	struct cxgb4_tc_port_mqprio *tc_port_mqprio, *port_mqprio;
@@ -630,6 +668,8 @@ int cxgb4_init_tc_mqprio(struct adapter *adap)
 		goto out_free_mqprio;
 	}
 
+	mutex_init(&tc_mqprio->mqprio_mutex);
+
 	tc_mqprio->port_mqprio = tc_port_mqprio;
 	for (i = 0; i < adap->params.nports; i++) {
 		port_mqprio = &tc_mqprio->port_mqprio[i];
@@ -664,6 +704,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 	u8 i;
 
 	if (adap->tc_mqprio) {
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
 		if (adap->tc_mqprio->port_mqprio) {
 			for (i = 0; i < adap->params.nports; i++) {
 				struct net_device *dev = adap->port[i];
@@ -675,6 +716,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 			}
 			kfree(adap->tc_mqprio->port_mqprio);
 		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 		kfree(adap->tc_mqprio);
 	}
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
index c532f1ef8451..be96f1dc0372 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
@@ -33,11 +33,13 @@ struct cxgb4_tc_port_mqprio {
 
 struct cxgb4_tc_mqprio {
 	refcount_t refcnt; /* Refcount for adapter-wide resources */
+	struct mutex mqprio_mutex; /* Lock for accessing MQPRIO info */
 	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
 };
 
 int cxgb4_setup_tc_mqprio(struct net_device *dev,
 			  struct tc_mqprio_qopt_offload *mqprio);
+void cxgb4_mqprio_stop_offload(struct adapter *adap);
 int cxgb4_init_tc_mqprio(struct adapter *adap);
 void cxgb4_cleanup_tc_mqprio(struct adapter *adap);
 #endif /* __CXGB4_TC_MQPRIO_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 269b8d9e25e0..a5d2f84dcdd5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -48,7 +48,7 @@ static int fill_match_fields(struct adapter *adap,
 			     bool next_header)
 {
 	unsigned int i, j;
-	u32 val, mask;
+	__be32 val, mask;
 	int off, err;
 	bool found;
 
@@ -155,9 +155,10 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	struct ch_filter_specification fs;
 	struct cxgb4_tc_u32_table *t;
 	struct cxgb4_link *link;
-	unsigned int filter_id;
 	u32 uhtid, link_uhtid;
 	bool is_ipv6 = false;
+	u8 inet_family;
+	int filter_id;
 	int ret;
 
 	if (!can_tc_u32_offload(dev))
@@ -166,18 +167,15 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6))
 		return -EOPNOTSUPP;
 
-	/* Note that TC uses prio 0 to indicate stack to generate
-	 * automatic prio and hence doesn't pass prio 0 to driver.
-	 * However, the hardware TCAM index starts from 0. Hence, the
-	 * -1 here.
-	 */
-	filter_id = TC_U32_NODE(cls->knode.handle) - 1;
+	inet_family = (protocol == htons(ETH_P_IPV6)) ? PF_INET6 : PF_INET;
 
-	/* Only insert U32 rule if its priority doesn't conflict with
-	 * existing rules in the LETCAM.
+	/* Get a free filter entry TID, where we can insert this new
+	 * rule. Only insert rule if its prio doesn't conflict with
+	 * existing rules.
 	 */
-	if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids ||
-	    !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) {
+	filter_id = cxgb4_get_free_ftid(dev, inet_family, false,
+					TC_U32_NODE(cls->knode.handle));
+	if (filter_id < 0) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "No free LETCAM index available");
 		return -ENOMEM;
@@ -230,7 +228,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 		const struct cxgb4_next_header *next;
 		bool found = false;
 		unsigned int i, j;
-		u32 val, mask;
+		__be32 val, mask;
 		int off;
 
 		if (t->table[link_uhtid - 1].link_handle) {
@@ -244,10 +242,10 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 
 		/* Try to find matches that allow jumps to next header. */
 		for (i = 0; next[i].jump; i++) {
-			if (next[i].offoff != cls->knode.sel->offoff ||
-			    next[i].shift != cls->knode.sel->offshift ||
-			    next[i].mask != cls->knode.sel->offmask ||
-			    next[i].offset != cls->knode.sel->off)
+			if (next[i].sel.offoff != cls->knode.sel->offoff ||
+			    next[i].sel.offshift != cls->knode.sel->offshift ||
+			    next[i].sel.offmask != cls->knode.sel->offmask ||
+			    next[i].sel.off != cls->knode.sel->off)
 				continue;
 
 			/* Found a possible candidate.  Find a key that
@@ -259,9 +257,9 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 				val = cls->knode.sel->keys[j].val;
 				mask = cls->knode.sel->keys[j].mask;
 
-				if (next[i].match_off == off &&
-				    next[i].match_val == val &&
-				    next[i].match_mask == mask) {
+				if (next[i].key.off == off &&
+				    next[i].key.val == val &&
+				    next[i].key.mask == mask) {
 					found = true;
 					break;
 				}
@@ -358,23 +356,65 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	struct cxgb4_link *link = NULL;
 	struct cxgb4_tc_u32_table *t;
 	struct filter_entry *f;
+	bool found = false;
 	u32 handle, uhtid;
+	u8 nslots;
 	int ret;
 
 	if (!can_tc_u32_offload(dev))
 		return -EOPNOTSUPP;
 
 	/* Fetch the location to delete the filter. */
-	filter_id = TC_U32_NODE(cls->knode.handle) - 1;
-	if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids)
-		return -ERANGE;
+	max_tids = adapter->tids.nhpftids + adapter->tids.nftids;
+
+	spin_lock_bh(&adapter->tids.ftid_lock);
+	filter_id = 0;
+	while (filter_id < max_tids) {
+		if (filter_id < adapter->tids.nhpftids) {
+			i = filter_id;
+			f = &adapter->tids.hpftid_tab[i];
+			if (f->valid && f->fs.tc_cookie == cls->knode.handle) {
+				found = true;
+				break;
+			}
 
-	if (filter_id < adapter->tids.nhpftids)
-		f = &adapter->tids.hpftid_tab[filter_id];
-	else
-		f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids];
+			i = find_next_bit(adapter->tids.hpftid_bmap,
+					  adapter->tids.nhpftids, i + 1);
+			if (i >= adapter->tids.nhpftids) {
+				filter_id = adapter->tids.nhpftids;
+				continue;
+			}
+
+			filter_id = i;
+		} else {
+			i = filter_id - adapter->tids.nhpftids;
+			f = &adapter->tids.ftid_tab[i];
+			if (f->valid && f->fs.tc_cookie == cls->knode.handle) {
+				found = true;
+				break;
+			}
+
+			i = find_next_bit(adapter->tids.ftid_bmap,
+					  adapter->tids.nftids, i + 1);
+			if (i >= adapter->tids.nftids)
+				break;
+
+			filter_id = i + adapter->tids.nhpftids;
+		}
+
+		nslots = 0;
+		if (f->fs.type) {
+			nslots++;
+			if (CHELSIO_CHIP_VERSION(adapter->params.chip) <
+			    CHELSIO_T6)
+				nslots += 2;
+		}
+
+		filter_id += nslots;
+	}
+	spin_unlock_bh(&adapter->tids.ftid_lock);
 
-	if (cls->knode.handle != f->fs.tc_cookie)
+	if (!found)
 		return -ERANGE;
 
 	t = adapter->tc_u32;
@@ -407,7 +447,6 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	/* If a link is being deleted, then delete all filters
 	 * associated with the link.
 	 */
-	max_tids = adapter->tids.nftids;
 	for (i = 0; i < t->size; i++) {
 		link = &t->table[i];
 
@@ -485,13 +524,9 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
 out_no_mem:
 	for (i = 0; i < t->size; i++) {
 		struct cxgb4_link *link = &t->table[i];
-
-		if (link->tid_map)
-			kvfree(link->tid_map);
+		kvfree(link->tid_map);
 	}
-
-	if (t)
-		kvfree(t);
+	kvfree(t);
 
 	return NULL;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
index a4b99edcc339..f59dd4b2ae6f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
@@ -38,12 +38,12 @@
 struct cxgb4_match_field {
 	int off; /* Offset from the beginning of the header to match */
 	/* Fill the value/mask pair in the spec if matched */
-	int (*val)(struct ch_filter_specification *f, u32 val, u32 mask);
+	int (*val)(struct ch_filter_specification *f, __be32 val, __be32 mask);
 };
 
 /* IPv4 match fields */
 static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f,
-				      u32 val, u32 mask)
+				      __be32 val, __be32 mask)
 {
 	f->val.tos  = (ntohl(val)  >> 16) & 0x000000FF;
 	f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF;
@@ -52,7 +52,7 @@ static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f,
-				       u32 val, u32 mask)
+				       __be32 val, __be32 mask)
 {
 	u32 mask_val;
 	u8 frag_val;
@@ -74,7 +74,7 @@ static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f,
-					u32 val, u32 mask)
+					__be32 val, __be32 mask)
 {
 	f->val.proto  = (ntohl(val)  >> 16) & 0x000000FF;
 	f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF;
@@ -83,7 +83,7 @@ static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f,
-					 u32 val, u32 mask)
+					 __be32 val, __be32 mask)
 {
 	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
 	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
@@ -92,7 +92,7 @@ static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f,
-					 u32 val, u32 mask)
+					 __be32 val, __be32 mask)
 {
 	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
 	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
@@ -111,7 +111,7 @@ static const struct cxgb4_match_field cxgb4_ipv4_fields[] = {
 
 /* IPv6 match fields */
 static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f,
-				      u32 val, u32 mask)
+				      __be32 val, __be32 mask)
 {
 	f->val.tos  = (ntohl(val)  >> 20) & 0x000000FF;
 	f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF;
@@ -120,7 +120,7 @@ static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f,
-					u32 val, u32 mask)
+					__be32 val, __be32 mask)
 {
 	f->val.proto  = (ntohl(val)  >> 8) & 0x000000FF;
 	f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF;
@@ -129,7 +129,7 @@ static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
 	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
@@ -138,7 +138,7 @@ static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.fip[4],  &val,  sizeof(u32));
 	memcpy(&f->mask.fip[4], &mask, sizeof(u32));
@@ -147,7 +147,7 @@ static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.fip[8],  &val,  sizeof(u32));
 	memcpy(&f->mask.fip[8], &mask, sizeof(u32));
@@ -156,7 +156,7 @@ static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.fip[12],  &val,  sizeof(u32));
 	memcpy(&f->mask.fip[12], &mask, sizeof(u32));
@@ -165,7 +165,7 @@ static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
 	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
@@ -174,7 +174,7 @@ static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.lip[4],  &val,  sizeof(u32));
 	memcpy(&f->mask.lip[4], &mask, sizeof(u32));
@@ -183,7 +183,7 @@ static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.lip[8],  &val,  sizeof(u32));
 	memcpy(&f->mask.lip[8], &mask, sizeof(u32));
@@ -192,7 +192,7 @@ static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f,
 }
 
 static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f,
-					  u32 val, u32 mask)
+					  __be32 val, __be32 mask)
 {
 	memcpy(&f->val.lip[12],  &val,  sizeof(u32));
 	memcpy(&f->mask.lip[12], &mask, sizeof(u32));
@@ -216,7 +216,7 @@ static const struct cxgb4_match_field cxgb4_ipv6_fields[] = {
 
 /* TCP/UDP match */
 static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f,
-				      u32 val, u32 mask)
+				      __be32 val, __be32 mask)
 {
 	f->val.fport  = ntohl(val)  >> 16;
 	f->mask.fport = ntohl(mask) >> 16;
@@ -237,19 +237,13 @@ static const struct cxgb4_match_field cxgb4_udp_fields[] = {
 };
 
 struct cxgb4_next_header {
-	unsigned int offset; /* Offset to next header */
-	/* offset, shift, and mask added to offset above
+	/* Offset, shift, and mask added to beginning of the header
 	 * to get to next header.  Useful when using a header
 	 * field's value to jump to next header such as IHL field
 	 * in IPv4 header.
 	 */
-	unsigned int offoff;
-	u32 shift;
-	u32 mask;
-	/* match criteria to make this jump */
-	unsigned int match_off;
-	u32 match_val;
-	u32 match_mask;
+	struct tc_u32_sel sel;
+	struct tc_u32_key key;
 	/* location of jump to make */
 	const struct cxgb4_match_field *jump;
 };
@@ -258,26 +252,74 @@ struct cxgb4_next_header {
  * IPv4 header.
  */
 static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = {
-	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
-	  .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00,
-	  .jump = cxgb4_tcp_fields },
-	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
-	  .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00,
-	  .jump = cxgb4_udp_fields },
-	{ .jump = NULL }
+	{
+		/* TCP Jump */
+		.sel = {
+			.off = 0,
+			.offoff = 0,
+			.offshift = 6,
+			.offmask = cpu_to_be16(0x0f00),
+		},
+		.key = {
+			.off = 8,
+			.val = cpu_to_be32(0x00060000),
+			.mask = cpu_to_be32(0x00ff0000),
+		},
+		.jump = cxgb4_tcp_fields,
+	},
+	{
+		/* UDP Jump */
+		.sel = {
+			.off = 0,
+			.offoff = 0,
+			.offshift = 6,
+			.offmask = cpu_to_be16(0x0f00),
+		},
+		.key = {
+			.off = 8,
+			.val = cpu_to_be32(0x00110000),
+			.mask = cpu_to_be32(0x00ff0000),
+		},
+		.jump = cxgb4_udp_fields,
+	},
+	{ .jump = NULL },
 };
 
 /* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header
  * to get to transport layer header.
  */
 static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = {
-	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
-	  .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000,
-	  .jump = cxgb4_tcp_fields },
-	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
-	  .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000,
-	  .jump = cxgb4_udp_fields },
-	{ .jump = NULL }
+	{
+		/* TCP Jump */
+		.sel = {
+			.off = 40,
+			.offoff = 0,
+			.offshift = 0,
+			.offmask = 0,
+		},
+		.key = {
+			.off = 4,
+			.val = cpu_to_be32(0x00000600),
+			.mask = cpu_to_be32(0x0000ff00),
+		},
+		.jump = cxgb4_tcp_fields,
+	},
+	{
+		/* UDP Jump */
+		.sel = {
+			.off = 40,
+			.offoff = 0,
+			.offshift = 0,
+			.offmask = 0,
+		},
+		.key = {
+			.off = 4,
+			.val = cpu_to_be32(0x00001100),
+			.mask = cpu_to_be32(0x0000ff00),
+		},
+		.jump = cxgb4_udp_fields,
+	},
+	{ .jump = NULL },
 };
 
 struct cxgb4_link {
@@ -289,6 +331,6 @@ struct cxgb4_link {
 
 struct cxgb4_tc_u32_table {
 	unsigned int size;          /* number of entries in table */
-	struct cxgb4_link table[0]; /* Jump table */
+	struct cxgb4_link table[]; /* Jump table */
 };
 #endif /* __CXGB4_TC_U32_PARSE_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c
index 3de8a5e83b6c..9a6d65243334 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_thermal.c
@@ -62,6 +62,7 @@ static struct thermal_zone_device_ops cxgb4_thermal_ops = {
 int cxgb4_thermal_init(struct adapter *adap)
 {
 	struct ch_thermal *ch_thermal = &adap->ch_thermal;
+	char ch_tz_name[THERMAL_NAME_LENGTH];
 	int num_trip = CXGB4_NUM_TRIPS;
 	u32 param, val;
 	int ret;
@@ -82,7 +83,8 @@ int cxgb4_thermal_init(struct adapter *adap)
 		ch_thermal->trip_type = THERMAL_TRIP_CRITICAL;
 	}
 
-	ch_thermal->tzdev = thermal_zone_device_register("cxgb4", num_trip,
+	snprintf(ch_tz_name, sizeof(ch_tz_name), "cxgb4_%s", adap->name);
+	ch_thermal->tzdev = thermal_zone_device_register(ch_tz_name, num_trip,
 							 0, adap,
 							 &cxgb4_thermal_ops,
 							 NULL, 0, 0);
@@ -92,12 +94,22 @@ int cxgb4_thermal_init(struct adapter *adap)
 		ch_thermal->tzdev = NULL;
 		return ret;
 	}
+
+	ret = thermal_zone_device_enable(ch_thermal->tzdev);
+	if (ret) {
+		dev_err(adap->pdev_dev, "Failed to enable thermal zone\n");
+		thermal_zone_device_unregister(adap->ch_thermal.tzdev);
+		return ret;
+	}
+
 	return 0;
 }
 
 int cxgb4_thermal_remove(struct adapter *adap)
 {
-	if (adap->ch_thermal.tzdev)
+	if (adap->ch_thermal.tzdev) {
 		thermal_zone_device_unregister(adap->ch_thermal.tzdev);
+		adap->ch_thermal.tzdev = NULL;
+	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index cce33d279094..17faac715882 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -174,13 +174,14 @@ static int
 setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int i, ret = 0;
+	int i, ret;
 
-	ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
+	ret = alloc_uld_rxqs(adap, rxq_info, lro);
+	if (ret)
+		return ret;
 
 	/* Tell uP to route control queue completions to rdma rspq */
-	if (adap->flags & CXGB4_FULL_INIT_DONE &&
-	    !ret && uld_type == CXGB4_ULD_RDMA) {
+	if (adap->flags & CXGB4_FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
 		struct sge *s = &adap->sge;
 		unsigned int cmplqid;
 		u32 param, cmdop;
@@ -580,6 +581,9 @@ void t4_uld_clean_up(struct adapter *adap)
 {
 	unsigned int i;
 
+	if (!is_uld(adap))
+		return;
+
 	mutex_lock(&uld_mutex);
 	for (i = 0; i < CXGB4_ULD_MAX; i++) {
 		if (!adap->uld[i].handle)
@@ -662,6 +666,129 @@ static int uld_attach(struct adapter *adap, unsigned int uld)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+static bool cxgb4_uld_in_use(struct adapter *adap)
+{
+	const struct tid_info *t = &adap->tids;
+
+	return (atomic_read(&t->conns_in_use) || t->stids_in_use);
+}
+
+/* cxgb4_set_ktls_feature: request FW to enable/disable ktls settings.
+ * @adap: adapter info
+ * @enable: 1 to enable / 0 to disable ktls settings.
+ */
+int cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
+{
+	int ret = 0;
+	u32 params =
+		FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_KTLS_HW) |
+		FW_PARAMS_PARAM_Y_V(enable) |
+		FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_KTLS_HW_USER_ENABLE);
+
+	if (enable) {
+		if (!refcount_read(&adap->chcr_ktls.ktls_refcount)) {
+			/* At this moment if ULD connection are up means, other
+			 * ULD is/are already active, return failure.
+			 */
+			if (cxgb4_uld_in_use(adap)) {
+				dev_dbg(adap->pdev_dev,
+					"ULD connections (tid/stid) active. Can't enable kTLS\n");
+				return -EINVAL;
+			}
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &params, &params);
+			if (ret)
+				return ret;
+			refcount_set(&adap->chcr_ktls.ktls_refcount, 1);
+			pr_debug("kTLS has been enabled. Restrictions placed on ULD support\n");
+		} else {
+			/* ktls settings already up, just increment refcount. */
+			refcount_inc(&adap->chcr_ktls.ktls_refcount);
+		}
+	} else {
+		/* return failure if refcount is already 0. */
+		if (!refcount_read(&adap->chcr_ktls.ktls_refcount))
+			return -EINVAL;
+		/* decrement refcount and test, if 0, disable ktls feature,
+		 * else return command success.
+		 */
+		if (refcount_dec_and_test(&adap->chcr_ktls.ktls_refcount)) {
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &params, &params);
+			if (ret)
+				return ret;
+			pr_debug("kTLS is disabled. Restrictions on ULD support removed\n");
+		}
+	}
+
+	return ret;
+}
+#endif
+
+static void cxgb4_uld_alloc_resources(struct adapter *adap,
+				      enum cxgb4_uld type,
+				      const struct cxgb4_uld_info *p)
+{
+	int ret = 0;
+
+	if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+	    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+		return;
+	if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+		return;
+	ret = cfg_queues_uld(adap, type, p);
+	if (ret)
+		goto out;
+	ret = setup_sge_queues_uld(adap, type, p->lro);
+	if (ret)
+		goto free_queues;
+	if (adap->flags & CXGB4_USING_MSIX) {
+		ret = request_msix_queue_irqs_uld(adap, type);
+		if (ret)
+			goto free_rxq;
+	}
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
+		enable_rx_uld(adap, type);
+	if (adap->uld[type].add)
+		goto free_irq;
+	ret = setup_sge_txq_uld(adap, type, p);
+	if (ret)
+		goto free_irq;
+	adap->uld[type] = *p;
+	ret = uld_attach(adap, type);
+	if (ret)
+		goto free_txq;
+	return;
+free_txq:
+	release_sge_txq_uld(adap, type);
+free_irq:
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
+		quiesce_rx_uld(adap, type);
+	if (adap->flags & CXGB4_USING_MSIX)
+		free_msix_queue_irqs_uld(adap, type);
+free_rxq:
+	free_sge_queues_uld(adap, type);
+free_queues:
+	free_queues_uld(adap, type);
+out:
+	dev_warn(adap->pdev_dev,
+		 "ULD registration failed for uld type %d\n", type);
+}
+
+void cxgb4_uld_enable(struct adapter *adap)
+{
+	struct cxgb4_uld_list *uld_entry;
+
+	mutex_lock(&uld_mutex);
+	list_add_tail(&adap->list_node, &adapter_list);
+	list_for_each_entry(uld_entry, &uld_list, list_node)
+		cxgb4_uld_alloc_resources(adap, uld_entry->uld_type,
+					  &uld_entry->uld_info);
+	mutex_unlock(&uld_mutex);
+}
+
 /* cxgb4_register_uld - register an upper-layer driver
  * @type: the ULD type
  * @p: the ULD methods
@@ -672,57 +799,23 @@ static int uld_attach(struct adapter *adap, unsigned int uld)
 void cxgb4_register_uld(enum cxgb4_uld type,
 			const struct cxgb4_uld_info *p)
 {
+	struct cxgb4_uld_list *uld_entry;
 	struct adapter *adap;
-	int ret = 0;
 
 	if (type >= CXGB4_ULD_MAX)
 		return;
 
+	uld_entry = kzalloc(sizeof(*uld_entry), GFP_KERNEL);
+	if (!uld_entry)
+		return;
+
+	memcpy(&uld_entry->uld_info, p, sizeof(struct cxgb4_uld_info));
 	mutex_lock(&uld_mutex);
-	list_for_each_entry(adap, &adapter_list, list_node) {
-		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
-		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
-			continue;
-		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
-			continue;
-		ret = cfg_queues_uld(adap, type, p);
-		if (ret)
-			goto out;
-		ret = setup_sge_queues_uld(adap, type, p->lro);
-		if (ret)
-			goto free_queues;
-		if (adap->flags & CXGB4_USING_MSIX) {
-			ret = request_msix_queue_irqs_uld(adap, type);
-			if (ret)
-				goto free_rxq;
-		}
-		if (adap->flags & CXGB4_FULL_INIT_DONE)
-			enable_rx_uld(adap, type);
-		if (adap->uld[type].add)
-			goto free_irq;
-		ret = setup_sge_txq_uld(adap, type, p);
-		if (ret)
-			goto free_irq;
-		adap->uld[type] = *p;
-		ret = uld_attach(adap, type);
-		if (ret)
-			goto free_txq;
-		continue;
-free_txq:
-		release_sge_txq_uld(adap, type);
-free_irq:
-		if (adap->flags & CXGB4_FULL_INIT_DONE)
-			quiesce_rx_uld(adap, type);
-		if (adap->flags & CXGB4_USING_MSIX)
-			free_msix_queue_irqs_uld(adap, type);
-free_rxq:
-		free_sge_queues_uld(adap, type);
-free_queues:
-		free_queues_uld(adap, type);
-out:
-		dev_warn(adap->pdev_dev,
-			 "ULD registration failed for uld type %d\n", type);
-	}
+	list_for_each_entry(adap, &adapter_list, list_node)
+		cxgb4_uld_alloc_resources(adap, type, p);
+
+	uld_entry->uld_type = type;
+	list_add_tail(&uld_entry->list_node, &uld_list);
 	mutex_unlock(&uld_mutex);
 	return;
 }
@@ -736,6 +829,7 @@ EXPORT_SYMBOL(cxgb4_register_uld);
  */
 int cxgb4_unregister_uld(enum cxgb4_uld type)
 {
+	struct cxgb4_uld_list *uld_entry, *tmp;
 	struct adapter *adap;
 
 	if (type >= CXGB4_ULD_MAX)
@@ -751,6 +845,13 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 
 		cxgb4_shutdown_uld_adapter(adap, type);
 	}
+
+	list_for_each_entry_safe(uld_entry, tmp, &uld_list, list_node) {
+		if (uld_entry->uld_type == type) {
+			list_del(&uld_entry->list_node);
+			kfree(uld_entry);
+		}
+	}
 	mutex_unlock(&uld_mutex);
 
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index d9d27bc1ae67..34546f5312ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -40,9 +40,14 @@
 #include <linux/skbuff.h>
 #include <linux/inetdevice.h>
 #include <linux/atomic.h>
+#include <net/tls.h>
 #include "cxgb4.h"
 
 #define MAX_ULD_QSETS 16
+#define MAX_ULD_NPORTS 4
+
+/* ulp_mem_io + ulptx_idata + payload + padding */
+#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8)
 
 /* CPL message priority levels */
 enum {
@@ -106,6 +111,8 @@ struct tid_info {
 	unsigned long *stid_bmap;
 	unsigned int nstids;
 	unsigned int stid_base;
+
+	unsigned int nhash;
 	unsigned int hash_base;
 
 	union aopen_entry *atid_tab;
@@ -147,8 +154,13 @@ struct tid_info {
 	/* TIDs in the HASH */
 	atomic_t hash_tids_in_use;
 	atomic_t conns_in_use;
+	/* ETHOFLD TIDs used for rate limiting */
+	atomic_t eotids_in_use;
+
 	/* lock for setting/clearing filter bitmap */
 	spinlock_t ftid_lock;
+
+	unsigned int tc_hash_tids_max_prio;
 };
 
 static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@ -219,12 +231,14 @@ static inline void cxgb4_alloc_eotid(struct tid_info *t, u32 eotid, void *data)
 {
 	set_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = data;
+	atomic_inc(&t->eotids_in_use);
 }
 
 static inline void cxgb4_free_eotid(struct tid_info *t, u32 eotid)
 {
 	clear_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = NULL;
+	atomic_dec(&t->eotids_in_use);
 }
 
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
@@ -261,9 +275,14 @@ struct filter_ctx {
 	u32 tid;			/* to store tid */
 };
 
+struct chcr_ktls {
+	refcount_t ktls_refcount;
+};
+
 struct ch_filter_specification;
 
-int cxgb4_get_free_ftid(struct net_device *dev, int family);
+int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en,
+			u32 tc_prio);
 int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		       struct ch_filter_specification *fs,
 		       struct filter_ctx *ctx);
@@ -288,7 +307,9 @@ enum cxgb4_uld {
 	CXGB4_ULD_ISCSI,
 	CXGB4_ULD_ISCSIT,
 	CXGB4_ULD_CRYPTO,
+	CXGB4_ULD_IPSEC,
 	CXGB4_ULD_TLS,
+	CXGB4_ULD_KTLS,
 	CXGB4_ULD_MAX
 };
 
@@ -319,6 +340,7 @@ enum cxgb4_control {
 	CXGB4_CONTROL_DB_DROP,
 };
 
+struct adapter;
 struct pci_dev;
 struct l2t_data;
 struct net_device;
@@ -346,6 +368,33 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range ppod_edram;
 };
 
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+struct ch_ktls_port_stats_debug {
+	atomic64_t ktls_tx_connection_open;
+	atomic64_t ktls_tx_connection_fail;
+	atomic64_t ktls_tx_connection_close;
+	atomic64_t ktls_tx_encrypted_packets;
+	atomic64_t ktls_tx_encrypted_bytes;
+	atomic64_t ktls_tx_ctx;
+	atomic64_t ktls_tx_ooo;
+	atomic64_t ktls_tx_skip_no_sync_data;
+	atomic64_t ktls_tx_drop_no_sync_data;
+	atomic64_t ktls_tx_drop_bypass_req;
+};
+
+struct ch_ktls_stats_debug {
+	struct ch_ktls_port_stats_debug ktls_port[MAX_ULD_NPORTS];
+	atomic64_t ktls_tx_send_records;
+	atomic64_t ktls_tx_end_pkts;
+	atomic64_t ktls_tx_start_pkts;
+	atomic64_t ktls_tx_middle_pkts;
+	atomic64_t ktls_tx_retransmit_pkts;
+	atomic64_t ktls_tx_complete_pkts;
+	atomic64_t ktls_tx_trimmed_pkts;
+	atomic64_t ktls_tx_fallback;
+};
+#endif
+
 struct chcr_stats_debug {
 	atomic_t cipher_rqst;
 	atomic_t digest_rqst;
@@ -353,12 +402,17 @@ struct chcr_stats_debug {
 	atomic_t complete;
 	atomic_t error;
 	atomic_t fallback;
-	atomic_t ipsec_cnt;
 	atomic_t tls_pdu_tx;
 	atomic_t tls_pdu_rx;
 	atomic_t tls_key;
 };
 
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
+struct ch_ipsec_stats_debug {
+	atomic_t ipsec_cnt;
+};
+#endif
+
 #define OCQ_WIN_OFFSET(pdev, vres) \
 	(pci_resource_len((pdev), 2) - roundup_pow_of_two((vres)->ocq.size))
 
@@ -435,8 +489,20 @@ struct cxgb4_uld_info {
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	const struct tlsdev_ops *tlsdev_ops;
+#endif
+#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
+	const struct xfrmdev_ops *xfrmdev_ops;
+#endif
 };
 
+static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb)
+{
+	return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
+void cxgb4_uld_enable(struct adapter *adap);
 void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 1a16449e9deb..a10a6862a9a4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -59,7 +59,7 @@ struct l2t_data {
 	rwlock_t lock;
 	atomic_t nfree;             /* number of free entries */
 	struct l2t_entry *rover;    /* starting point for next allocation */
-	struct l2t_entry l2tab[0];  /* MUST BE LAST */
+	struct l2t_entry l2tab[];  /* MUST BE LAST */
 };
 
 static inline unsigned int vlan_prio(const struct l2t_entry *e)
@@ -231,7 +231,7 @@ again:
 		if (e->state == L2T_STATE_STALE)
 			e->state = L2T_STATE_VALID;
 		spin_unlock_bh(&e->lock);
-		/* fall through */
+		fallthrough;
 	case L2T_STATE_VALID:     /* fast-path, send the packet on */
 		return t4_ofld_send(adap, skb);
 	case L2T_STATE_RESOLVING:
@@ -503,40 +503,19 @@ u64 cxgb4_select_ntuple(struct net_device *dev,
 EXPORT_SYMBOL(cxgb4_select_ntuple);
 
 /*
- * Called when address resolution fails for an L2T entry to handle packets
- * on the arpq head.  If a packet specifies a failure handler it is invoked,
- * otherwise the packet is sent to the device.
- */
-static void handle_failed_resolution(struct adapter *adap, struct l2t_entry *e)
-{
-	struct sk_buff *skb;
-
-	while ((skb = __skb_dequeue(&e->arpq)) != NULL) {
-		const struct l2t_skb_cb *cb = L2T_SKB_CB(skb);
-
-		spin_unlock(&e->lock);
-		if (cb->arp_err_handler)
-			cb->arp_err_handler(cb->handle, skb);
-		else
-			t4_ofld_send(adap, skb);
-		spin_lock(&e->lock);
-	}
-}
-
-/*
  * Called when the host's neighbor layer makes a change to some entry that is
  * loaded into the HW L2 table.
  */
 void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
 {
-	struct l2t_entry *e;
-	struct sk_buff_head *arpq = NULL;
-	struct l2t_data *d = adap->l2t;
 	unsigned int addr_len = neigh->tbl->key_len;
 	u32 *addr = (u32 *) neigh->primary_key;
-	int ifidx = neigh->dev->ifindex;
-	int hash = addr_hash(d, addr, addr_len, ifidx);
+	int hash, ifidx = neigh->dev->ifindex;
+	struct sk_buff_head *arpq = NULL;
+	struct l2t_data *d = adap->l2t;
+	struct l2t_entry *e;
 
+	hash = addr_hash(d, addr, addr_len, ifidx);
 	read_lock_bh(&d->lock);
 	for (e = d->l2tab[hash].first; e; e = e->next)
 		if (!addreq(e, addr) && e->ifindex == ifidx) {
@@ -569,8 +548,25 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
 			write_l2e(adap, e, 0);
 	}
 
-	if (arpq)
-		handle_failed_resolution(adap, e);
+	if (arpq) {
+		struct sk_buff *skb;
+
+		/* Called when address resolution fails for an L2T
+		 * entry to handle packets on the arpq head. If a
+		 * packet specifies a failure handler it is invoked,
+		 * otherwise the packet is sent to the device.
+		 */
+		while ((skb = __skb_dequeue(&e->arpq)) != NULL) {
+			const struct l2t_skb_cb *cb = L2T_SKB_CB(skb);
+
+			spin_unlock(&e->lock);
+			if (cb->arp_err_handler)
+				cb->arp_err_handler(cb->handle, skb);
+			else
+				t4_ofld_send(adap, skb);
+			spin_lock(&e->lock);
+		}
+	}
 	spin_unlock_bh(&e->lock);
 }
 
@@ -613,6 +609,7 @@ struct l2t_entry *t4_l2t_alloc_switching(struct adapter *adap, u16 vlan,
 }
 
 /**
+ * cxgb4_l2t_alloc_switching - Allocates an L2T entry for switch filters
  * @dev: net_device pointer
  * @vlan: VLAN Id
  * @port: Associated port
@@ -700,6 +697,17 @@ static char l2e_state(const struct l2t_entry *e)
 	}
 }
 
+bool cxgb4_check_l2t_valid(struct l2t_entry *e)
+{
+	bool valid;
+
+	spin_lock(&e->lock);
+	valid = (e->state == L2T_STATE_VALID);
+	spin_unlock(&e->lock);
+	return valid;
+}
+EXPORT_SYMBOL(cxgb4_check_l2t_valid);
+
 static int l2t_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
index 79665bd8f881..340fecb28a13 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
@@ -122,6 +122,7 @@ struct l2t_entry *t4_l2t_alloc_switching(struct adapter *adap, u16 vlan,
 					 u8 port, u8 *dmac);
 struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end);
 void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl);
+bool cxgb4_check_l2t_valid(struct l2t_entry *e);
 
 extern const struct file_operations t4_l2t_fops;
 #endif  /* __CXGB4_L2T_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index cebe1412d960..a1b14468d1ff 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -57,7 +57,8 @@ static int t4_sched_class_fw_cmd(struct port_info *pi,
 				      p->u.params.ratemode,
 				      p->u.params.channel, e->idx,
 				      p->u.params.minrate, p->u.params.maxrate,
-				      p->u.params.weight, p->u.params.pktsize);
+				      p->u.params.weight, p->u.params.pktsize,
+				      p->u.params.burstsize);
 		break;
 	default:
 		err = -ENOTSUPP;
@@ -597,7 +598,7 @@ struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
 /**
  * cxgb4_sched_class_free - free a scheduling class
  * @dev: net_device pointer
- * @e: scheduling class
+ * @classid: scheduling class id to free
  *
  * Frees a scheduling class if there are no users.
  */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 5cc74a5a1774..5f8b871d79af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -82,7 +82,7 @@ struct sched_class {
 
 struct sched_table {      /* per port scheduling table */
 	u8 sched_size;
-	struct sched_class tab[0];
+	struct sched_class tab[];
 };
 
 static inline bool can_sched(struct net_device *dev)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 97cda501e7e8..46809e2d94ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -302,7 +302,7 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
 
 /**
  *	free_tx_desc - reclaims Tx descriptors and their buffers
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@q: the Tx queue to reclaim descriptors from
  *	@n: the number of descriptors to reclaim
  *	@unmap: whether the buffers should be unmapped for DMA
@@ -443,7 +443,7 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
 		if (is_buf_mapped(d))
 			dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
 				       get_buf_size(adap, d),
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		put_page(d->page);
 		d->page = NULL;
 		if (++q->cidx == q->size)
@@ -469,7 +469,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
 
 	if (is_buf_mapped(d))
 		dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
-			       get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
+			       get_buf_size(adap, d), DMA_FROM_DEVICE);
 	d->page = NULL;
 	if (++q->cidx == q->size)
 		q->cidx = 0;
@@ -566,7 +566,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 
 		mapping = dma_map_page(adap->pdev_dev, pg, 0,
 				       PAGE_SIZE << s->fl_pg_order,
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 			__free_pages(pg, s->fl_pg_order);
 			q->mapping_err++;
@@ -596,7 +596,7 @@ alloc_small_pages:
 		}
 
 		mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 			put_page(pg);
 			q->mapping_err++;
@@ -722,6 +722,7 @@ static inline unsigned int flits_to_desc(unsigned int n)
 /**
  *	is_eth_imm - can an Ethernet packet be sent as immediate data?
  *	@skb: the packet
+ *	@chip_ver: chip version
  *
  *	Returns whether an Ethernet packet is small enough to fit as
  *	immediate data. Return value corresponds to headroom required.
@@ -749,6 +750,7 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver)
 /**
  *	calc_tx_flits - calculate the number of flits for a packet Tx WR
  *	@skb: the packet
+ *	@chip_ver: chip version
  *
  *	Returns the number of flits needed for a Tx WR for the given Ethernet
  *	packet, including the needed WR and CPL headers.
@@ -804,6 +806,7 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb,
 /**
  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
  *	@skb: the packet
+ *	@chip_ver: chip version
  *
  *	Returns the number of Tx descriptors needed for the given Ethernet
  *	packet, including the needed WR and CPL headers.
@@ -887,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 }
 EXPORT_SYMBOL(cxgb4_write_sgl);
 
+/*	cxgb4_write_partial_sgl - populate SGL for partial packet
+ *	@skb: the packet
+ *	@q: the Tx queue we are writing into
+ *	@sgl: starting location for writing the SGL
+ *	@end: points right after the end of the SGL
+ *	@addr: the list of bus addresses for the SGL elements
+ *	@start: start offset in the SKB where partial data starts
+ *	@len: length of data from @start to send out
+ *
+ *	This API will handle sending out partial data of a skb if required.
+ *	Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
+ *	and @len will decide how much data after @start offset to send out.
+ */
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+			     struct ulptx_sgl *sgl, u64 *end,
+			     const dma_addr_t *addr, u32 start, u32 len)
+{
+	struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
+	u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+	struct skb_shared_info *si = skb_shinfo(skb);
+	u8 i = 0, frag_idx = 0, nfrags = 0;
+	skb_frag_t *frag;
+
+	/* Fill the first SGL either from linear data or from partial
+	 * frag based on @start.
+	 */
+	if (unlikely(start < skb_linear_data_len)) {
+		frag_size = min(len, skb_linear_data_len - start);
+		sgl->len0 = htonl(frag_size);
+		sgl->addr0 = cpu_to_be64(addr[0] + start);
+		len -= frag_size;
+		nfrags++;
+	} else {
+		start -= skb_linear_data_len;
+		frag = &si->frags[frag_idx];
+		frag_size = skb_frag_size(frag);
+		/* find the first frag */
+		while (start >= frag_size) {
+			start -= frag_size;
+			frag_idx++;
+			frag = &si->frags[frag_idx];
+			frag_size = skb_frag_size(frag);
+		}
+
+		frag_size = min(len, skb_frag_size(frag) - start);
+		sgl->len0 = cpu_to_be32(frag_size);
+		sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
+		len -= frag_size;
+		nfrags++;
+		frag_idx++;
+	}
+
+	/* If the entire partial data fit in one SGL, then send it out
+	 * now.
+	 */
+	if (!len)
+		goto done;
+
+	/* Most of the complexity below deals with the possibility we hit the
+	 * end of the queue in the middle of writing the SGL.  For this case
+	 * only we create the SGL in a temporary buffer and then copy it.
+	 */
+	to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
+
+	/* If the skb couldn't fit in first SGL completely, fill the
+	 * rest of the frags in subsequent SGLs. Note that each SGL
+	 * pair can store 2 frags.
+	 */
+	while (len) {
+		frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+		to->len[i & 1] = cpu_to_be32(frag_size);
+		to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
+		if (i && (i & 1))
+			to++;
+		nfrags++;
+		frag_idx++;
+		i++;
+		len -= frag_size;
+	}
+
+	/* If we ended in an odd boundary, then set the second SGL's
+	 * length in the pair to 0.
+	 */
+	if (i & 1)
+		to->len[1] = cpu_to_be32(0);
+
+	/* Copy from temporary buffer to Tx ring, in case we hit the
+	 * end of the queue in the middle of writing the SGL.
+	 */
+	if (unlikely((u8 *)end > (u8 *)q->stat)) {
+		u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
+
+		if (likely(part0))
+			memcpy(sgl->sge, buf, part0);
+		part1 = (u8 *)end - (u8 *)q->stat;
+		memcpy(q->desc, (u8 *)buf + part0, part1);
+		end = (void *)q->desc + part1;
+	}
+
+	/* 0-pad to multiple of 16 */
+	if ((uintptr_t)end & 8)
+		*end = 0;
+done:
+	sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
+			ULPTX_NSGE_V(nfrags));
+}
+EXPORT_SYMBOL(cxgb4_write_partial_sgl);
+
 /* This function copies 64 byte coalesced work request to
  * memory mapped BAR2 space. For coalesced WR SGE fetches
  * data from the FIFO instead of from Host.
@@ -1307,8 +1418,9 @@ static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb,
 int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
 				 int maxreclaim)
 {
+	unsigned int reclaimed, hw_cidx;
 	struct sge_txq *q = &eq->q;
-	unsigned int reclaimed;
+	int hw_in_use;
 
 	if (!q->in_use || !__netif_tx_trylock(eq->txq))
 		return 0;
@@ -1316,12 +1428,17 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
 	/* Reclaim pending completed TX Descriptors. */
 	reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
 
+	hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
+	hw_in_use = q->pidx - hw_cidx;
+	if (hw_in_use < 0)
+		hw_in_use += q->size;
+
 	/* If the TX Queue is currently stopped and there's now more than half
 	 * the queue available, restart it.  Otherwise bail out since the rest
 	 * of what we want do here is with the possibility of shipping any
 	 * currently buffered Coalesced TX Work Request.
 	 */
-	if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+	if (netif_tx_queue_stopped(eq->txq) && hw_in_use < (q->size / 2)) {
 		netif_tx_wake_queue(eq->txq);
 		eq->q.restarts++;
 	}
@@ -1407,19 +1524,23 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
 	ssi = skb_shinfo(skb);
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
 	if (xfrm_offload(skb) && !ssi->gso_size)
-		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+		return adap->uld[CXGB4_ULD_IPSEC].tx_handler(skb, dev);
 #endif /* CHELSIO_IPSEC_INLINE */
 
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+	if (cxgb4_is_ktls_skb(skb) &&
+	    (skb->len - skb_tcp_all_headers(skb)))
+		return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
+#endif /* CHELSIO_TLS_DEVICE */
+
 	qidx = skb_get_queue_mapping(skb);
 	if (ptp_enabled) {
-		spin_lock(&adap->ptp_lock);
 		if (!(adap->ptp_tx_skb)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			adap->ptp_tx_skb = skb_get(skb);
 		} else {
-			spin_unlock(&adap->ptp_lock);
 			goto out_free;
 		}
 		q = &adap->sge.ptptxq;
@@ -1433,11 +1554,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
 	ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
-	if (unlikely(ret == -ENOTSUPP)) {
-		if (ptp_enabled)
-			spin_unlock(&adap->ptp_lock);
+	if (unlikely(ret == -EOPNOTSUPP))
 		goto out_free;
-	}
 #endif /* CONFIG_CHELSIO_T4_FCOE */
 
 	chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
@@ -1450,8 +1568,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		dev_err(adap->pdev_dev,
 			"%s: Tx ring %u full while queue awake!\n",
 			dev->name, qidx);
-		if (ptp_enabled)
-			spin_unlock(&adap->ptp_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1470,8 +1586,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
 		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
 		q->mapping_err++;
-		if (ptp_enabled)
-			spin_unlock(&adap->ptp_lock);
 		goto out_free;
 	}
 
@@ -1486,15 +1600,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * has opened up.
 		 */
 		eth_txq_stop(q);
-
-		/* If we're using the SGE Doorbell Queue Timer facility, we
-		 * don't need to ask the Firmware to send us Egress Queue CIDX
-		 * Updates: the Hardware will do this automatically.  And
-		 * since we send the Ingress Queue CIDX Updates to the
-		 * corresponding Ethernet Response Queue, we'll get them very
-		 * quickly.
-		 */
-		if (!q->dbqt)
+		if (chip_ver > CHELSIO_T5)
 			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
 	}
 
@@ -1531,8 +1637,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 			if (iph->version == 4) {
 				iph->check = 0;
 				iph->tot_len = 0;
-				iph->check = (u16)(~ip_fast_csum((u8 *)iph,
-								 iph->ihl));
+				iph->check = ~ip_fast_csum((u8 *)iph, iph->ihl);
 			}
 			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				cntrl = hwcsum(adap->params.chip, skb);
@@ -1628,8 +1733,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	txq_advance(&q->q, ndesc);
 
 	cxgb4_ring_tx_db(adap, &q->q, ndesc);
-	if (ptp_enabled)
-		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
 
 out_free:
@@ -1730,6 +1833,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 	struct adapter *adapter;
 	int qidx, credits, ret;
 	size_t fw_hdr_copy_len;
+	unsigned int chip_ver;
 	u64 cntrl, *end;
 	u32 wr_mid;
 
@@ -1738,8 +1842,10 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 	 * (including the VLAN tag) into the header so we reject anything
 	 * smaller than that ...
 	 */
-	fw_hdr_copy_len = sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
-			  sizeof(wr->ethtype) + sizeof(wr->vlantci);
+	BUILD_BUG_ON(sizeof(wr->firmware) !=
+		     (sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
+		      sizeof(wr->ethtype) + sizeof(wr->vlantci)));
+	fw_hdr_copy_len = sizeof(wr->firmware);
 	ret = cxgb4_validate_skb(skb, dev, fw_hdr_copy_len);
 	if (ret)
 		goto out_free;
@@ -1794,6 +1900,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 		goto out_free;
 	}
 
+	chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
 	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
 		/* After we're done injecting the Work Request for this
@@ -1805,15 +1912,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 		 * has opened up.
 		 */
 		eth_txq_stop(txq);
-
-		/* If we're using the SGE Doorbell Queue Timer facility, we
-		 * don't need to ask the Firmware to send us Egress Queue CIDX
-		 * Updates: the Hardware will do this automatically.  And
-		 * since we send the Ingress Queue CIDX Updates to the
-		 * corresponding Ethernet Response Queue, we'll get them very
-		 * quickly.
-		 */
-		if (!txq->dbqt)
+		if (chip_ver > CHELSIO_T5)
 			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
 	}
 
@@ -1827,7 +1926,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 	wr->equiq_to_len16 = cpu_to_be32(wr_mid);
 	wr->r3[0] = cpu_to_be32(0);
 	wr->r3[1] = cpu_to_be32(0);
-	skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
+	skb_copy_from_linear_data(skb, &wr->firmware, fw_hdr_copy_len);
 	end = (u64 *)wr + flits;
 
 	/* If this is a Large Send Offload packet we'll put in an LSO CPL
@@ -1867,7 +1966,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 		 */
 		cpl = (void *)(lso + 1);
 
-		if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
+		if (chip_ver <= CHELSIO_T5)
 			cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
 		else
 			cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
@@ -2098,10 +2197,9 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap,
 	return flits + nsgl;
 }
 
-static inline void *write_eo_wr(struct adapter *adap,
-				struct sge_eosw_txq *eosw_txq,
-				struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
-				u32 hdr_len, u32 wrlen)
+static void *write_eo_wr(struct adapter *adap, struct sge_eosw_txq *eosw_txq,
+			 struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+			 u32 hdr_len, u32 wrlen)
 {
 	const struct skb_shared_info *ssi = skb_shinfo(skb);
 	struct cpl_tx_pkt_core *cpl;
@@ -2120,7 +2218,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	immd_len += hdr_len;
 
 	if (!eosw_txq->ncompl ||
-	    eosw_txq->last_compl >= adap->params.ofldq_wr_cred / 2) {
+	    (eosw_txq->last_compl + wrlen16) >=
+	    (adap->params.ofldq_wr_cred / 2)) {
 		compl = true;
 		eosw_txq->ncompl++;
 		eosw_txq->last_compl = 0;
@@ -2160,8 +2259,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	return cpl;
 }
 
-static void ethofld_hard_xmit(struct net_device *dev,
-			      struct sge_eosw_txq *eosw_txq)
+static int ethofld_hard_xmit(struct net_device *dev,
+			     struct sge_eosw_txq *eosw_txq)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
@@ -2174,8 +2273,8 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	bool skip_eotx_wr = false;
 	struct tx_sw_desc *d;
 	struct sk_buff *skb;
+	int left, ret = 0;
 	u8 flits, ndesc;
-	int left;
 
 	eohw_txq = &adap->sge.eohw_txq[eosw_txq->hwqid];
 	spin_lock(&eohw_txq->lock);
@@ -2205,15 +2304,26 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	wrlen = flits * 8;
 	wrlen16 = DIV_ROUND_UP(wrlen, 16);
 
-	/* If there are no CPL credits, then wait for credits
-	 * to come back and retry again
+	left = txq_avail(&eohw_txq->q) - ndesc;
+
+	/* If there are no descriptors left in hardware queues or no
+	 * CPL credits left in software queues, then wait for them
+	 * to come back and retry again. Note that we always request
+	 * for credits update via interrupt for every half credits
+	 * consumed. So, the interrupt will eventually restore the
+	 * credits and invoke the Tx path again.
 	 */
-	if (unlikely(wrlen16 > eosw_txq->cred))
+	if (unlikely(left < 0 || wrlen16 > eosw_txq->cred)) {
+		ret = -ENOMEM;
 		goto out_unlock;
+	}
 
 	if (unlikely(skip_eotx_wr)) {
 		start = (u64 *)wr;
 		eosw_txq->state = next_state;
+		eosw_txq->cred -= wrlen16;
+		eosw_txq->ncompl++;
+		eosw_txq->last_compl = 0;
 		goto write_wr_headers;
 	}
 
@@ -2235,7 +2345,8 @@ write_wr_headers:
 	sgl = (u64 *)inline_tx_skb_header(skb, &eohw_txq->q, (void *)start,
 					  hdr_len);
 	if (data_len) {
-		if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, d->addr))) {
+		ret = cxgb4_map_skb(adap->pdev_dev, skb, d->addr);
+		if (unlikely(ret)) {
 			memset(d->addr, 0, sizeof(d->addr));
 			eohw_txq->mapping_err++;
 			goto out_unlock;
@@ -2281,12 +2392,13 @@ write_wr_headers:
 
 out_unlock:
 	spin_unlock(&eohw_txq->lock);
+	return ret;
 }
 
 static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 {
 	struct sk_buff *skb;
-	int pktcount;
+	int pktcount, ret;
 
 	switch (eosw_txq->state) {
 	case CXGB4_EO_STATE_ACTIVE:
@@ -2311,7 +2423,9 @@ static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 			continue;
 		}
 
-		ethofld_hard_xmit(dev, eosw_txq);
+		ret = ethofld_hard_xmit(dev, eosw_txq);
+		if (ret)
+			break;
 	}
 }
 
@@ -2369,14 +2483,52 @@ netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(qid >= pi->nqsets))
 		return cxgb4_ethofld_xmit(skb, dev);
 
+	if (is_ptp_enabled(skb, dev)) {
+		struct adapter *adap = netdev2adap(dev);
+		netdev_tx_t ret;
+
+		spin_lock(&adap->ptp_lock);
+		ret = cxgb4_eth_xmit(skb, dev);
+		spin_unlock(&adap->ptp_lock);
+		return ret;
+	}
+
 	return cxgb4_eth_xmit(skb, dev);
 }
 
+static void eosw_txq_flush_pending_skbs(struct sge_eosw_txq *eosw_txq)
+{
+	int pktcount = eosw_txq->pidx - eosw_txq->last_pidx;
+	int pidx = eosw_txq->pidx;
+	struct sk_buff *skb;
+
+	if (!pktcount)
+		return;
+
+	if (pktcount < 0)
+		pktcount += eosw_txq->ndesc;
+
+	while (pktcount--) {
+		pidx--;
+		if (pidx < 0)
+			pidx += eosw_txq->ndesc;
+
+		skb = eosw_txq->desc[pidx].skb;
+		if (skb) {
+			dev_consume_skb_any(skb);
+			eosw_txq->desc[pidx].skb = NULL;
+			eosw_txq->inuse--;
+		}
+	}
+
+	eosw_txq->pidx = eosw_txq->last_pidx + 1;
+}
+
 /**
  * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc.
- * @dev - netdevice
- * @eotid - ETHOFLD tid to bind/unbind
- * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
+ * @dev: netdevice
+ * @eotid: ETHOFLD tid to bind/unbind
+ * @tc: traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
  *
  * Send a FLOWC work request to bind an ETHOFLD TID to a traffic class.
  * If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from
@@ -2395,7 +2547,7 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 	struct sk_buff *skb;
 	int ret = 0;
 
-	len = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval) * nparams;
+	len = struct_size(flowc, mnemval, nparams);
 	len16 = DIV_ROUND_UP(len, 16);
 
 	entry = cxgb4_lookup_eotid(&adap->tids, eotid);
@@ -2406,6 +2558,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 	if (!eosw_txq)
 		return -ENOMEM;
 
+	if (!(adap->flags & CXGB4_FW_OK)) {
+		/* Don't stall caller when access to FW is lost */
+		complete(&eosw_txq->completion);
+		return -EIO;
+	}
+
 	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
@@ -2413,12 +2571,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 	spin_lock_bh(&eosw_txq->lock);
 	if (tc != FW_SCHED_CLS_NONE) {
 		if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
-			goto out_unlock;
+			goto out_free_skb;
 
 		next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
 	} else {
 		if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
-			goto out_unlock;
+			goto out_free_skb;
 
 		next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
 	}
@@ -2447,22 +2605,26 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 					    FW_FLOWC_MNEM_EOSTATE_CLOSING :
 					    FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
 
-	eosw_txq->cred -= len16;
-	eosw_txq->ncompl++;
-	eosw_txq->last_compl = 0;
+	/* Free up any pending skbs to ensure there's room for
+	 * termination FLOWC.
+	 */
+	if (tc == FW_SCHED_CLS_NONE)
+		eosw_txq_flush_pending_skbs(eosw_txq);
 
 	ret = eosw_txq_enqueue(eosw_txq, skb);
-	if (ret) {
-		dev_consume_skb_any(skb);
-		goto out_unlock;
-	}
+	if (ret)
+		goto out_free_skb;
 
 	eosw_txq->state = next_state;
 	eosw_txq->flowc_idx = eosw_txq->pidx;
 	eosw_txq_advance(eosw_txq, 1);
 	ethofld_xmit(dev, eosw_txq);
 
-out_unlock:
+	spin_unlock_bh(&eosw_txq->lock);
+	return 0;
+
+out_free_skb:
+	dev_consume_skb_any(skb);
 	spin_unlock_bh(&eosw_txq->lock);
 	return ret;
 }
@@ -2498,6 +2660,84 @@ static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr)
 	}
 }
 
+#define CXGB4_SELFTEST_LB_STR "CHELSIO_SELFTEST"
+
+int cxgb4_selftest_lb_pkt(struct net_device *netdev)
+{
+	struct port_info *pi = netdev_priv(netdev);
+	struct adapter *adap = pi->adapter;
+	struct cxgb4_ethtool_lb_test *lb;
+	int ret, i = 0, pkt_len, credits;
+	struct fw_eth_tx_pkt_wr *wr;
+	struct cpl_tx_pkt_core *cpl;
+	u32 ctrl0, ndesc, flits;
+	struct sge_eth_txq *q;
+	u8 *sgl;
+
+	pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR);
+
+	flits = DIV_ROUND_UP(pkt_len + sizeof(*cpl) + sizeof(*wr),
+			     sizeof(__be64));
+	ndesc = flits_to_desc(flits);
+
+	lb = &pi->ethtool_lb;
+	lb->loopback = 1;
+
+	q = &adap->sge.ethtxq[pi->first_qset];
+	__netif_tx_lock(q->txq, smp_processor_id());
+
+	reclaim_completed_tx(adap, &q->q, -1, true);
+	credits = txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		__netif_tx_unlock(q->txq);
+		return -ENOMEM;
+	}
+
+	wr = (void *)&q->q.desc[q->q.pidx];
+	memset(wr, 0, sizeof(struct tx_desc));
+
+	wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+			       FW_WR_IMMDLEN_V(pkt_len +
+			       sizeof(*cpl)));
+	wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)));
+	wr->r3 = cpu_to_be64(0);
+
+	cpl = (void *)(wr + 1);
+	sgl = (u8 *)(cpl + 1);
+
+	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_PF_V(adap->pf) |
+		TXPKT_INTF_V(pi->tx_chan + 4);
+
+	cpl->ctrl0 = htonl(ctrl0);
+	cpl->pack = htons(0);
+	cpl->len = htons(pkt_len);
+	cpl->ctrl1 = cpu_to_be64(TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F);
+
+	eth_broadcast_addr(sgl);
+	i += ETH_ALEN;
+	ether_addr_copy(&sgl[i], netdev->dev_addr);
+	i += ETH_ALEN;
+
+	snprintf(&sgl[i], sizeof(CXGB4_SELFTEST_LB_STR), "%s",
+		 CXGB4_SELFTEST_LB_STR);
+
+	init_completion(&lb->completion);
+	txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	__netif_tx_unlock(q->txq);
+
+	/* wait for the pkt to return */
+	ret = wait_for_completion_timeout(&lb->completion, 10 * HZ);
+	if (!ret)
+		ret = -ETIMEDOUT;
+	else
+		ret = lb->result;
+
+	lb->loopback = 0;
+
+	return ret;
+}
+
 /**
  *	ctrl_xmit - send a packet through an SGE control Tx queue
  *	@q: the control queue
@@ -2543,15 +2783,15 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
 
 /**
  *	restart_ctrlq - restart a suspended control queue
- *	@data: the control queue to restart
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx control queue.
  */
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
 {
 	struct sk_buff *skb;
 	unsigned int written = 0;
-	struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
+	struct sge_ctrl_txq *q = from_tasklet(q, t, qresume_tsk);
 
 	spin_lock(&q->sendq.lock);
 	reclaim_completed_tx_imm(&q->q);
@@ -2616,17 +2856,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
  *	@skb: the packet
  *
  *	Returns true if a packet can be sent as an offload WR with immediate
- *	data.  We currently use the same limit as for Ethernet packets.
+ *	data.
+ *	FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field.
+ *      However, FW_ULPTX_WR commands have a 256 byte immediate only
+ *      payload limit.
  */
 static inline int is_ofld_imm(const struct sk_buff *skb)
 {
 	struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
 	unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
 
-	if (opcode == FW_CRYPTO_LOOKASIDE_WR)
+	if (unlikely(opcode == FW_ULPTX_WR))
+		return skb->len <= MAX_IMM_ULPTX_WR_LEN;
+	else if (opcode == FW_CRYPTO_LOOKASIDE_WR)
 		return skb->len <= SGE_MAX_WR_LEN;
 	else
-		return skb->len <= MAX_IMM_TX_PKT_LEN;
+		return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
 }
 
 /**
@@ -2653,7 +2898,6 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
 
 /**
  *	txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
- *	@adap: the adapter
  *	@q: the queue to stop
  *
  *	Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
@@ -2702,6 +2946,7 @@ static void ofldtxq_stop(struct sge_uld_txq *q, struct fw_wr_hdr *wr)
  *	is ever running at a time ...
  */
 static void service_ofldq(struct sge_uld_txq *q)
+	__must_hold(&q->sendq.lock)
 {
 	u64 *pos, *before, *end;
 	int credits;
@@ -2844,13 +3089,13 @@ static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
 
 /**
  *	restart_ofldq - restart a suspended offload queue
- *	@data: the offload queue to restart
+ *	@t: pointer to the tasklet associated with this handler
  *
  *	Resumes transmission on a suspended Tx offload queue.
  */
-static void restart_ofldq(unsigned long data)
+static void restart_ofldq(struct tasklet_struct *t)
 {
-	struct sge_uld_txq *q = (struct sge_uld_txq *)data;
+	struct sge_uld_txq *q = from_tasklet(q, t, qresume_tsk);
 
 	spin_lock(&q->sendq.lock);
 	q->full = 0;            /* the queue actually is completely empty now */
@@ -2899,6 +3144,7 @@ static inline int uld_send(struct adapter *adap, struct sk_buff *skb,
 	txq_info = adap->sge.uld_txq_info[tx_uld_type];
 	if (unlikely(!txq_info)) {
 		WARN_ON(true);
+		kfree_skb(skb);
 		return NET_XMIT_DROP;
 	}
 
@@ -3247,7 +3493,7 @@ enum {
 
 /**
  *     t4_systim_to_hwstamp - read hardware time stamp
- *     @adap: the adapter
+ *     @adapter: the adapter
  *     @skb: the packet
  *
  *     Read Time Stamp from MPS packet and insert in skb which
@@ -3274,15 +3520,16 @@ static noinline int t4_systim_to_hwstamp(struct adapter *adapter,
 
 	hwtstamps = skb_hwtstamps(skb);
 	memset(hwtstamps, 0, sizeof(*hwtstamps));
-	hwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*((u64 *)data)));
+	hwtstamps->hwtstamp = ns_to_ktime(get_unaligned_be64(data));
 
 	return RX_PTP_PKT_SUC;
 }
 
 /**
  *     t4_rx_hststamp - Recv PTP Event Message
- *     @adap: the adapter
+ *     @adapter: the adapter
  *     @rsp: the response queue descriptor holding the RX_PKT message
+ *     @rxq: the response queue holding the RX_PKT message
  *     @skb: the packet
  *
  *     PTP enabled and MPS packet, read HW timestamp
@@ -3306,7 +3553,7 @@ static int t4_rx_hststamp(struct adapter *adapter, const __be64 *rsp,
 
 /**
  *      t4_tx_hststamp - Loopback PTP Transmit Event Message
- *      @adap: the adapter
+ *      @adapter: the adapter
  *      @skb: the packet
  *      @dev: the ingress net device
  *
@@ -3372,9 +3619,8 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
 	txq = &s->ethtxq[pi->first_qset + rspq->idx];
 
 	/* We've got the Hardware Consumer Index Update in the Egress Update
-	 * message.  If we're using the SGE Doorbell Queue Timer mechanism,
-	 * these Egress Update messages will be our sole CIDX Updates we get
-	 * since we don't want to chew up PCIe bandwidth for both Ingress
+	 * message. These Egress Update messages will be our sole CIDX Updates
+	 * we get since we don't want to chew up PCIe bandwidth for both Ingress
 	 * Messages and Status Page writes.  However, The code which manages
 	 * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
 	 * stored in the Status Page at the end of the TX Queue.  It's easiest
@@ -3383,7 +3629,7 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
 	 * considered here since both are Big Endian and we're just copying
 	 * bytes consistently ...
 	 */
-	if (txq->dbqt) {
+	if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) {
 		struct cpl_sge_egr_update *egr;
 
 		egr = (struct cpl_sge_egr_update *)rsp;
@@ -3393,6 +3639,31 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
 	t4_sge_eth_txq_egress_update(adapter, txq, -1);
 }
 
+static int cxgb4_validate_lb_pkt(struct port_info *pi, const struct pkt_gl *si)
+{
+	struct adapter *adap = pi->adapter;
+	struct cxgb4_ethtool_lb_test *lb;
+	struct sge *s = &adap->sge;
+	struct net_device *netdev;
+	u8 *data;
+	int i;
+
+	netdev = adap->port[pi->port_id];
+	lb = &pi->ethtool_lb;
+	data = si->va + s->pktshift;
+
+	i = ETH_ALEN;
+	if (!ether_addr_equal(data + i, netdev->dev_addr))
+		return -1;
+
+	i += ETH_ALEN;
+	if (strcmp(&data[i], CXGB4_SELFTEST_LB_STR))
+		lb->result = -EIO;
+
+	complete(&lb->completion);
+	return 0;
+}
+
 /**
  *	t4_ethrx_handler - process an ingress ethernet packet
  *	@q: the response queue that received the packet
@@ -3416,6 +3687,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	struct port_info *pi;
 	int ret = 0;
 
+	pi = netdev_priv(q->netdev);
 	/* If we're looking at TX Queue CIDX Update, handle that separately
 	 * and return.
 	 */
@@ -3443,6 +3715,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	if (err_vec)
 		rxq->stats.bad_rx_pkts++;
 
+	if (unlikely(pi->ethtool_lb.loopback && pkt->iff >= NCHAN)) {
+		ret = cxgb4_validate_lb_pkt(pi, si);
+		if (!ret)
+			return 0;
+	}
+
 	if (((pkt->l2info & htonl(RXF_TCP_F)) ||
 	     tnl_hdr_len) &&
 	    (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
@@ -3456,7 +3734,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 		rxq->stats.rx_drops++;
 		return 0;
 	}
-	pi = netdev_priv(q->netdev);
 
 	/* Handle PTP Event Rx packet */
 	if (unlikely(pi->ptp_enable)) {
@@ -3757,9 +4034,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-void cxgb4_ethofld_restart(unsigned long data)
+void cxgb4_ethofld_restart(struct tasklet_struct *t)
 {
-	struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data;
+	struct sge_eosw_txq *eosw_txq = from_tasklet(eosw_txq, t,
+						     qresume_tsk);
 	int pktcount;
 
 	spin_lock(&eosw_txq->lock);
@@ -4189,7 +4467,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	if (ret)
 		goto err;
 
-	netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
+	netif_napi_add(dev, &iq->napi, napi_rx_handler);
 	iq->cur_desc = iq->desc;
 	iq->cidx = 0;
 	iq->gen = 1;
@@ -4343,11 +4621,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 	 * write the CIDX Updates into the Status Page at the end of the
 	 * TX Queue.
 	 */
-	c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+	c.autoequiqe_to_viid = htonl(((chip_ver <= CHELSIO_T5) ?
+				      FW_EQ_ETH_CMD_AUTOEQUIQE_F :
+				      FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
 				     FW_EQ_ETH_CMD_VIID_V(pi->viid));
 
 	c.fetchszm_to_iqid =
-		htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
+		htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V((chip_ver <= CHELSIO_T5) ?
+						 HOSTFCMODE_INGRESS_QUEUE_X :
+						 HOSTFCMODE_STATUS_PAGE_X) |
 		      FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
 
@@ -4358,6 +4640,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 					    : FETCHBURSTMIN_64B_T6_X) |
 		      FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
+		      FW_EQ_ETH_CMD_CIDXFTHRESHO_V(chip_ver == CHELSIO_T5) |
 		      FW_EQ_ETH_CMD_EQSIZE_V(nentries));
 
 	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
@@ -4450,7 +4733,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 	init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
-	tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
+	tasklet_setup(&txq->qresume_tsk, restart_ctrlq);
 	txq->full = 0;
 	return 0;
 }
@@ -4540,7 +4823,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 	txq->q.q_type = CXGB4_TXQ_ULD;
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
-	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
+	tasklet_setup(&txq->qresume_tsk, restart_ofldq);
 	txq->full = 0;
 	txq->mapping_err = 0;
 	return 0;
@@ -4742,9 +5025,6 @@ void t4_sge_stop(struct adapter *adap)
 	int i;
 	struct sge *s = &adap->sge;
 
-	if (in_interrupt())  /* actions below require waiting */
-		return;
-
 	if (s->rx_timer.function)
 		del_timer_sync(&s->rx_timer);
 	if (s->tx_timer.function)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
index 01c65d13fc0e..e617e4aabbcc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c
@@ -55,7 +55,7 @@ struct smt_data *t4_init_smt(void)
 	for (i = 0; i < s->smt_size; ++i) {
 		s->smtab[i].idx = i;
 		s->smtab[i].state = SMT_STATE_UNUSED;
-		memset(&s->smtab[i].src_mac, 0, ETH_ALEN);
+		eth_zero_addr(s->smtab[i].src_mac);
 		spin_lock_init(&s->smtab[i].lock);
 		s->smtab[i].refcnt = 0;
 	}
@@ -103,6 +103,7 @@ static void t4_smte_free(struct smt_entry *e)
 }
 
 /**
+ * cxgb4_smt_release - Release SMT entry
  * @e: smt entry to release
  *
  * Releases ref count and frees up an smt entry from SMT table
@@ -231,6 +232,7 @@ static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf,
 }
 
 /**
+ * cxgb4_smt_alloc_switching - Allocates an SMT entry for switch filters.
  * @dev: net_device pointer
  * @smac: MAC address to add to SMT
  * Returns pointer to the SMT entry created
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h
index 1268d6e93a47..541249d78914 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h
@@ -66,7 +66,7 @@ struct smt_entry {
 struct smt_data {
 	unsigned int smt_size;
 	rwlock_t lock;
-	struct smt_entry smtab[0];
+	struct smt_entry smtab[];
 };
 
 struct smt_data *t4_init_smt(void);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 844fdcf55118..8d719f82854a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -1379,8 +1379,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x9608, 0x9638,
 		0x9640, 0x96f4,
 		0x9800, 0x9808,
-		0x9820, 0x983c,
-		0x9850, 0x9864,
+		0x9810, 0x9864,
 		0x9c00, 0x9c6c,
 		0x9c80, 0x9cec,
 		0x9d00, 0x9d6c,
@@ -1389,7 +1388,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x9e80, 0x9eec,
 		0x9f00, 0x9f6c,
 		0x9f80, 0xa020,
-		0xd004, 0xd004,
+		0xd000, 0xd004,
 		0xd010, 0xd03c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0x1106c,
@@ -1430,10 +1429,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1a0b0, 0x1a0e4,
 		0x1a0ec, 0x1a0f8,
 		0x1a100, 0x1a108,
-		0x1a114, 0x1a120,
-		0x1a128, 0x1a130,
-		0x1a138, 0x1a138,
-		0x1a190, 0x1a1c4,
+		0x1a114, 0x1a130,
+		0x1a138, 0x1a1c4,
 		0x1a1fc, 0x1a1fc,
 		0x1e008, 0x1e00c,
 		0x1e040, 0x1e044,
@@ -2093,7 +2090,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1190, 0x1194,
 		0x11a0, 0x11a4,
 		0x11b0, 0x11b4,
-		0x11fc, 0x1274,
+		0x11fc, 0x123c,
+		0x1254, 0x1274,
 		0x1280, 0x133c,
 		0x1800, 0x18fc,
 		0x3000, 0x302c,
@@ -2162,8 +2160,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x9640, 0x9704,
 		0x9710, 0x971c,
 		0x9800, 0x9808,
-		0x9820, 0x983c,
-		0x9850, 0x9864,
+		0x9810, 0x9864,
 		0x9c00, 0x9c6c,
 		0x9c80, 0x9cec,
 		0x9d00, 0x9d6c,
@@ -2172,7 +2169,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x9e80, 0x9eec,
 		0x9f00, 0x9f6c,
 		0x9f80, 0xa020,
-		0xd004, 0xd03c,
+		0xd000, 0xd03c,
 		0xd100, 0xd118,
 		0xd200, 0xd214,
 		0xd220, 0xd234,
@@ -2240,10 +2237,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1a0b0, 0x1a0e4,
 		0x1a0ec, 0x1a0f8,
 		0x1a100, 0x1a108,
-		0x1a114, 0x1a120,
-		0x1a128, 0x1a130,
-		0x1a138, 0x1a138,
-		0x1a190, 0x1a1c4,
+		0x1a114, 0x1a130,
+		0x1a138, 0x1a1c4,
 		0x1a1fc, 0x1a1fc,
 		0x1e008, 0x1e00c,
 		0x1e040, 0x1e044,
@@ -2695,7 +2690,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 #define VPD_BASE           0x400
 #define VPD_BASE_OLD       0
 #define VPD_LEN            1024
-#define CHELSIO_VPD_UNIQUE_ID 0x82
 
 /**
  * t4_eeprom_ptov - translate a physical EEPROM address to virtual
@@ -2749,10 +2743,9 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable)
  */
 int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
-	int i, ret = 0, addr;
-	int ec, sn, pn, na;
-	u8 *vpd, csum;
-	unsigned int vpdr_len, kw_offset, id_len;
+	unsigned int id_len, pn_len, sn_len, na_len;
+	int id, sn, pn, na, addr, ret = 0;
+	u8 *vpd, base_val = 0;
 
 	vpd = vmalloc(VPD_LEN);
 	if (!vpd)
@@ -2761,90 +2754,62 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	/* Card information normally starts at VPD_BASE but early cards had
 	 * it at 0.
 	 */
-	ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd);
+	ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val);
 	if (ret < 0)
 		goto out;
 
-	/* The VPD shall have a unique identifier specified by the PCI SIG.
-	 * For chelsio adapters, the identifier is 0x82. The first byte of a VPD
-	 * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software
-	 * is expected to automatically put this entry at the
-	 * beginning of the VPD.
-	 */
-	addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD;
+	addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : VPD_BASE_OLD;
 
 	ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd);
 	if (ret < 0)
 		goto out;
 
-	if (vpd[0] != PCI_VPD_LRDT_ID_STRING) {
-		dev_err(adapter->pdev_dev, "missing VPD ID string\n");
-		ret = -EINVAL;
+	ret = pci_vpd_find_id_string(vpd, VPD_LEN, &id_len);
+	if (ret < 0)
 		goto out;
-	}
+	id = ret;
 
-	id_len = pci_vpd_lrdt_size(vpd);
-	if (id_len > ID_LEN)
-		id_len = ID_LEN;
-
-	i = pci_vpd_find_tag(vpd, 0, VPD_LEN, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0) {
-		dev_err(adapter->pdev_dev, "missing VPD-R section\n");
+	ret = pci_vpd_check_csum(vpd, VPD_LEN);
+	if (ret) {
+		dev_err(adapter->pdev_dev, "VPD checksum incorrect or missing\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
-	vpdr_len = pci_vpd_lrdt_size(&vpd[i]);
-	kw_offset = i + PCI_VPD_LRDT_TAG_SIZE;
-	if (vpdr_len + kw_offset > VPD_LEN) {
-		dev_err(adapter->pdev_dev, "bad VPD-R length %u\n", vpdr_len);
-		ret = -EINVAL;
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN,
+					   PCI_VPD_RO_KEYWORD_SERIALNO, &sn_len);
+	if (ret < 0)
 		goto out;
-	}
-
-#define FIND_VPD_KW(var, name) do { \
-	var = pci_vpd_find_info_keyword(vpd, kw_offset, vpdr_len, name); \
-	if (var < 0) { \
-		dev_err(adapter->pdev_dev, "missing VPD keyword " name "\n"); \
-		ret = -EINVAL; \
-		goto out; \
-	} \
-	var += PCI_VPD_INFO_FLD_HDR_SIZE; \
-} while (0)
+	sn = ret;
 
-	FIND_VPD_KW(i, "RV");
-	for (csum = 0; i >= 0; i--)
-		csum += vpd[i];
-
-	if (csum) {
-		dev_err(adapter->pdev_dev,
-			"corrupted VPD EEPROM, actual csum %u\n", csum);
-		ret = -EINVAL;
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN,
+					   PCI_VPD_RO_KEYWORD_PARTNO, &pn_len);
+	if (ret < 0)
 		goto out;
-	}
+	pn = ret;
 
-	FIND_VPD_KW(ec, "EC");
-	FIND_VPD_KW(sn, "SN");
-	FIND_VPD_KW(pn, "PN");
-	FIND_VPD_KW(na, "NA");
-#undef FIND_VPD_KW
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN, "NA", &na_len);
+	if (ret < 0)
+		goto out;
+	na = ret;
 
-	memcpy(p->id, vpd + PCI_VPD_LRDT_TAG_SIZE, id_len);
+	memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN));
 	strim(p->id);
-	memcpy(p->ec, vpd + ec, EC_LEN);
-	strim(p->ec);
-	i = pci_vpd_info_field_size(vpd + sn - PCI_VPD_INFO_FLD_HDR_SIZE);
-	memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN));
+	memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN));
 	strim(p->sn);
-	i = pci_vpd_info_field_size(vpd + pn - PCI_VPD_INFO_FLD_HDR_SIZE);
-	memcpy(p->pn, vpd + pn, min(i, PN_LEN));
+	memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN));
 	strim(p->pn);
-	memcpy(p->na, vpd + na, min(i, MACADDR_LEN));
-	strim((char *)p->na);
+	memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN));
+	strim(p->na);
 
 out:
 	vfree(vpd);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		dev_err(adapter->pdev_dev, "error reading VPD\n");
+		return ret;
+	}
+
+	return 0;
 }
 
 /**
@@ -3072,16 +3037,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
  *	@addr: the start address to write
  *	@n: length of data to write in bytes
  *	@data: the data to write
+ *	@byte_oriented: whether to store data as bytes or as words
  *
  *	Writes up to a page of data (256 bytes) to the serial flash starting
  *	at the given address.  All the data must be written to the same page.
+ *	If @byte_oriented is set the write data is stored as byte stream
+ *	(i.e. matches what on disk), otherwise in big-endian.
  */
 static int t4_write_flash(struct adapter *adapter, unsigned int addr,
-			  unsigned int n, const u8 *data)
+			  unsigned int n, const u8 *data, bool byte_oriented)
 {
-	int ret;
-	u32 buf[64];
 	unsigned int i, c, left, val, offset = addr & 0xff;
+	u32 buf[64];
+	int ret;
 
 	if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
 		return -EINVAL;
@@ -3092,10 +3060,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
 	    (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
 		goto unlock;
 
-	for (left = n; left; left -= c) {
+	for (left = n; left; left -= c, data += c) {
 		c = min(left, 4U);
-		for (val = 0, i = 0; i < c; ++i)
-			val = (val << 8) + *data++;
+		for (val = 0, i = 0; i < c; ++i) {
+			if (byte_oriented)
+				val = (val << 8) + data[i];
+			else
+				val = (val << 8) + data[c - i - 1];
+		}
 
 		ret = sf1_write(adapter, c, c != left, 1, val);
 		if (ret)
@@ -3108,7 +3080,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
 	t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
 
 	/* Read the page to verify the write succeeded */
-	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+			    byte_oriented);
 	if (ret)
 		return ret;
 
@@ -3169,7 +3142,7 @@ int t4_get_tp_version(struct adapter *adapter, u32 *vers)
 
 /**
  *	t4_get_exprom_version - return the Expansion ROM version (if any)
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@vers: where to place the version
  *
  *	Reads the Expansion ROM header from FLASH and returns the version
@@ -3499,7 +3472,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
 	drv_fw = &fw_info->fw_hdr;
 
 	/* Read the header of the firmware on the card */
-	ret = -t4_read_flash(adap, FLASH_FW_START,
+	ret = t4_read_flash(adap, FLASH_FW_START,
 			    sizeof(*card_fw) / sizeof(uint32_t),
 			    (uint32_t *)card_fw, 1);
 	if (ret == 0) {
@@ -3528,8 +3501,8 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
 		   should_install_fs_fw(adap, card_fw_usable,
 					be32_to_cpu(fs_fw->fw_ver),
 					be32_to_cpu(card_fw->fw_ver))) {
-		ret = -t4_fw_upgrade(adap, adap->mbox, fw_data,
-				     fw_size, 0);
+		ret = t4_fw_upgrade(adap, adap->mbox, fw_data,
+				    fw_size, 0);
 		if (ret != 0) {
 			dev_err(adap->pdev_dev,
 				"failed to install firmware: %d\n", ret);
@@ -3560,7 +3533,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
 			FW_HDR_FW_VER_MICRO_G(c), FW_HDR_FW_VER_BUILD_G(c),
 			FW_HDR_FW_VER_MAJOR_G(k), FW_HDR_FW_VER_MINOR_G(k),
 			FW_HDR_FW_VER_MICRO_G(k), FW_HDR_FW_VER_BUILD_G(k));
-		ret = EINVAL;
+		ret = -EINVAL;
 		goto bye;
 	}
 
@@ -3704,7 +3677,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	 */
 	memcpy(first_page, fw_data, SF_PAGE_SIZE);
 	((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
+	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
 	if (ret)
 		goto out;
 
@@ -3712,14 +3685,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
 		addr += SF_PAGE_SIZE;
 		fw_data += SF_PAGE_SIZE;
-		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
 		if (ret)
 			goto out;
 	}
 
-	ret = t4_write_flash(adap,
-			     fw_start + offsetof(struct fw_hdr, fw_ver),
-			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+	ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
+			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
+			     true);
 out:
 	if (ret)
 		dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
@@ -3748,7 +3721,7 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver)
 		 FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_VERSION));
 	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
 			      &param, &val);
-	if (ret < 0)
+	if (ret)
 		return ret;
 	*phy_fw_ver = val;
 	return 0;
@@ -3758,7 +3731,6 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver)
  *	t4_load_phy_fw - download port PHY firmware
  *	@adap: the adapter
  *	@win: the PCI-E Memory Window index to use for t4_memory_rw()
- *	@win_lock: the lock to use to guard the memory copy
  *	@phy_fw_version: function to check PHY firmware versions
  *	@phy_fw_data: the PHY firmware image to write
  *	@phy_fw_size: image size
@@ -3767,9 +3739,7 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver)
  *	@phy_fw_version is supplied, then it will be used to determine if
  *	it's necessary to perform the transfer by comparing the version
  *	of any existing adapter PHY firmware with that of the passed in
- *	PHY firmware image.  If @win_lock is non-NULL then it will be used
- *	around the call to t4_memory_rw() which transfers the PHY firmware
- *	to the adapter.
+ *	PHY firmware image.
  *
  *	A negative error number will be returned if an error occurs.  If
  *	version number support is available and there's no need to upgrade
@@ -3781,14 +3751,13 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver)
  *	contents.  Thus, loading PHY firmware on such adapters must happen
  *	after any FW_RESET_CMDs ...
  */
-int t4_load_phy_fw(struct adapter *adap,
-		   int win, spinlock_t *win_lock,
+int t4_load_phy_fw(struct adapter *adap, int win,
 		   int (*phy_fw_version)(const u8 *, size_t),
 		   const u8 *phy_fw_data, size_t phy_fw_size)
 {
+	int cur_phy_fw_ver = 0, new_phy_fw_vers = 0;
 	unsigned long mtype = 0, maddr = 0;
 	u32 param, val;
-	int cur_phy_fw_ver = 0, new_phy_fw_vers = 0;
 	int ret;
 
 	/* If we have version number support, then check to see if the adapter
@@ -3828,13 +3797,11 @@ int t4_load_phy_fw(struct adapter *adap,
 	/* Copy the supplied PHY Firmware image to the adapter memory location
 	 * allocated by the adapter firmware.
 	 */
-	if (win_lock)
-		spin_lock_bh(win_lock);
+	spin_lock_bh(&adap->win0_lock);
 	ret = t4_memory_rw(adap, win, mtype, maddr,
 			   phy_fw_size, (__be32 *)phy_fw_data,
 			   T4_MEMORY_WRITE);
-	if (win_lock)
-		spin_unlock_bh(win_lock);
+	spin_unlock_bh(&adap->win0_lock);
 	if (ret)
 		return ret;
 
@@ -4480,7 +4447,7 @@ static void tp_intr_handler(struct adapter *adapter)
  */
 static void sge_intr_handler(struct adapter *adapter)
 {
-	u64 v;
+	u32 v = 0, perr;
 	u32 err;
 
 	static const struct intr_info sge_intr_info[] = {
@@ -4515,13 +4482,29 @@ static void sge_intr_handler(struct adapter *adapter)
 		{ 0 }
 	};
 
-	v = (u64)t4_read_reg(adapter, SGE_INT_CAUSE1_A) |
-		((u64)t4_read_reg(adapter, SGE_INT_CAUSE2_A) << 32);
-	if (v) {
-		dev_alert(adapter->pdev_dev, "SGE parity error (%#llx)\n",
-				(unsigned long long)v);
-		t4_write_reg(adapter, SGE_INT_CAUSE1_A, v);
-		t4_write_reg(adapter, SGE_INT_CAUSE2_A, v >> 32);
+	perr = t4_read_reg(adapter, SGE_INT_CAUSE1_A);
+	if (perr) {
+		v |= perr;
+		dev_alert(adapter->pdev_dev, "SGE Cause1 Parity Error %#x\n",
+			  perr);
+	}
+
+	perr = t4_read_reg(adapter, SGE_INT_CAUSE2_A);
+	if (perr) {
+		v |= perr;
+		dev_alert(adapter->pdev_dev, "SGE Cause2 Parity Error %#x\n",
+			  perr);
+	}
+
+	if (CHELSIO_CHIP_VERSION(adapter->params.chip) >= CHELSIO_T5) {
+		perr = t4_read_reg(adapter, SGE_INT_CAUSE5_A);
+		/* Parity error (CRC) for err_T_RxCRC is trivial, ignore it */
+		perr &= ~ERR_T_RXCRC_F;
+		if (perr) {
+			v |= perr;
+			dev_alert(adapter->pdev_dev,
+				  "SGE Cause5 Parity Error %#x\n", perr);
+		}
 	}
 
 	v |= t4_handle_intr_status(adapter, SGE_INT_CAUSE3_A, sge_intr_info);
@@ -4743,9 +4726,11 @@ static void le_intr_handler(struct adapter *adap)
 	static struct intr_info t6_le_intr_info[] = {
 		{ T6_LIPMISS_F, "LE LIP miss", -1, 0 },
 		{ T6_LIP0_F, "LE 0 LIP error", -1, 0 },
+		{ CMDTIDERR_F, "LE cmd tid error", -1, 1 },
 		{ TCAMINTPERR_F, "LE parity error", -1, 1 },
 		{ T6_UNKNOWNCMD_F, "LE unknown command", -1, 1 },
 		{ SSRAMINTPERR_F, "LE request queue parity error", -1, 1 },
+		{ HASHTBLMEMCRCERR_F, "LE hash table mem crc error", -1, 0 },
 		{ 0 }
 	};
 
@@ -5300,7 +5285,7 @@ static unsigned int t4_use_ldst(struct adapter *adap)
  * @cmd: TP fw ldst address space type
  * @vals: where the indirect register values are stored/written
  * @nregs: how many indirect registers to read/write
- * @start_idx: index of first indirect register to read/write
+ * @start_index: index of first indirect register to read/write
  * @rw: Read (1) or Write (0)
  * @sleep_ok: if true we may sleep while awaiting command completion
  *
@@ -6105,7 +6090,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
 
 /**
  *	compute_mps_bg_map - compute the MPS Buffer Group Map for a Port
- *	@adap: the adapter
+ *	@adapter: the adapter
  *	@pidx: the port index
  *
  *	Computes and returns a bitmap indicating which MPS buffer groups are
@@ -6242,7 +6227,7 @@ static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx)
 
 /**
  *	t4_get_tp_ch_map - return TP ingress channels associated with a port
- *	@adapter: the adapter
+ *	@adap: the adapter
  *	@pidx: the port index
  *
  *	Returns a bitmap indicating which TP Ingress Channels are associated
@@ -6579,7 +6564,7 @@ int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr,
  *	@phy_addr: the PHY address
  *	@mmd: the PHY MMD to access (0 for clause 22 PHYs)
  *	@reg: the register to write
- *	@valp: value to write
+ *	@val: value to write
  *
  *	Issues a FW command through the given mailbox to write a PHY register.
  */
@@ -6605,7 +6590,7 @@ int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr,
 
 /**
  *	t4_sge_decode_idma_state - decode the idma state
- *	@adap: the adapter
+ *	@adapter: the adapter
  *	@state: the state idma is stuck in
  */
 void t4_sge_decode_idma_state(struct adapter *adapter, int state)
@@ -6772,7 +6757,7 @@ void t4_sge_decode_idma_state(struct adapter *adapter, int state)
  *      t4_sge_ctxt_flush - flush the SGE context cache
  *      @adap: the adapter
  *      @mbox: mailbox to use for the FW command
- *      @ctx_type: Egress or Ingress
+ *      @ctxt_type: Egress or Ingress
  *
  *      Issues a FW command through the given mailbox to flush the
  *      SGE context cache.
@@ -6799,7 +6784,7 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type)
 
 /**
  *	t4_read_sge_dbqtimers - read SGE Doorbell Queue Timer values
- *	@adap - the adapter
+ *	@adap: the adapter
  *	@ndbqtimers: size of the provided SGE Doorbell Queue Timer table
  *	@dbqtimers: SGE Doorbell Queue Timer table
  *
@@ -6985,7 +6970,7 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox)
 }
 
 /**
- *	t4_init_cmd - ask FW to initialize the device
+ *	t4_early_init - ask FW to initialize the device
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
  *
@@ -7082,6 +7067,7 @@ static int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force)
 /**
  *	t4_fw_restart - restart the firmware by taking the uP out of RESET
  *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
  *	@reset: if we want to do a RESET to restart things
  *
  *	Restart firmware previously halted by t4_fw_halt().  On successful
@@ -7620,6 +7606,8 @@ int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf,
  *	@nmac: number of MAC addresses needed (1 to 5)
  *	@mac: the MAC addresses of the VI
  *	@rss_size: size of RSS table slice associated with this VI
+ *	@vivld: the destination to store the VI Valid value.
+ *	@vin: the destination to store the VIN value.
  *
  *	Allocates a virtual interface for the given physical port.  If @mac is
  *	not %NULL it contains the MAC addresses of the VI as assigned by FW.
@@ -7651,13 +7639,13 @@ int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port,
 		switch (nmac) {
 		case 5:
 			memcpy(mac + 24, c.nmac3, sizeof(c.nmac3));
-			/* Fall through */
+			fallthrough;
 		case 4:
 			memcpy(mac + 18, c.nmac2, sizeof(c.nmac2));
-			/* Fall through */
+			fallthrough;
 		case 3:
 			memcpy(mac + 12, c.nmac1, sizeof(c.nmac1));
-			/* Fall through */
+			fallthrough;
 		case 2:
 			memcpy(mac + 6,  c.nmac0, sizeof(c.nmac0));
 		}
@@ -7706,6 +7694,7 @@ int t4_free_vi(struct adapter *adap, unsigned int mbox, unsigned int pf,
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
  *	@viid: the VI id
+ *	@viid_mirror: the mirror VI id
  *	@mtu: the new MTU or -1
  *	@promisc: 1 to enable promiscuous mode, 0 to disable it, -1 no change
  *	@all_multi: 1 to enable all-multi mode, 0 to disable it, -1 no change
@@ -7716,10 +7705,11 @@ int t4_free_vi(struct adapter *adap, unsigned int mbox, unsigned int pf,
  *	Sets Rx properties of a virtual interface.
  */
 int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
-		  int mtu, int promisc, int all_multi, int bcast, int vlanex,
-		  bool sleep_ok)
+		  unsigned int viid_mirror, int mtu, int promisc, int all_multi,
+		  int bcast, int vlanex, bool sleep_ok)
 {
-	struct fw_vi_rxmode_cmd c;
+	struct fw_vi_rxmode_cmd c, c_mirror;
+	int ret;
 
 	/* convert to FW values */
 	if (mtu < 0)
@@ -7744,7 +7734,24 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
 			    FW_VI_RXMODE_CMD_ALLMULTIEN_V(all_multi) |
 			    FW_VI_RXMODE_CMD_BROADCASTEN_V(bcast) |
 			    FW_VI_RXMODE_CMD_VLANEXEN_V(vlanex));
-	return t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), NULL, sleep_ok);
+
+	if (viid_mirror) {
+		memcpy(&c_mirror, &c, sizeof(c_mirror));
+		c_mirror.op_to_viid =
+			cpu_to_be32(FW_CMD_OP_V(FW_VI_RXMODE_CMD) |
+				    FW_CMD_REQUEST_F | FW_CMD_WRITE_F |
+				    FW_VI_RXMODE_CMD_VIID_V(viid_mirror));
+	}
+
+	ret = t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), NULL, sleep_ok);
+	if (ret)
+		return ret;
+
+	if (viid_mirror)
+		ret = t4_wr_mbox_meat(adap, mbox, &c_mirror, sizeof(c_mirror),
+				      NULL, sleep_ok);
+
+	return ret;
 }
 
 /**
@@ -7762,7 +7769,6 @@ int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
 			   int idx, bool sleep_ok)
 {
 	struct fw_vi_mac_exact *p;
-	u8 addr[] = {0, 0, 0, 0, 0, 0};
 	struct fw_vi_mac_cmd c;
 	int ret = 0;
 	u32 exact;
@@ -7779,7 +7785,7 @@ int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
 	p = c.u.exact;
 	p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F |
 				      FW_VI_MAC_CMD_IDX_V(idx));
-	memcpy(p->macaddr, addr, sizeof(p->macaddr));
+	eth_zero_addr(p->macaddr);
 	ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
 	return ret;
 }
@@ -7838,7 +7844,7 @@ int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
  *      t4_alloc_encap_mac_filt - Adds a mac entry in mps tcam with VNI support
  *      @adap: the adapter
  *      @viid: the VI id
- *      @mac: the MAC address
+ *      @addr: the MAC address
  *      @mask: the mask
  *      @vni: the VNI id for the tunnel protocol
  *      @vni_mask: mask for the VNI id
@@ -7887,11 +7893,11 @@ int t4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid,
  *	t4_alloc_raw_mac_filt - Adds a mac entry in mps tcam
  *	@adap: the adapter
  *	@viid: the VI id
- *	@mac: the MAC address
+ *	@addr: the MAC address
  *	@mask: the mask
  *	@idx: index at which to add this entry
- *	@port_id: the port index
  *	@lookup_type: MAC address for inner (1) or outer (0) header
+ *	@port_id: the port index
  *	@sleep_ok: call is allowed to sleep
  *
  *	Adds the mac entry at the specified index using raw mac interface.
@@ -8116,7 +8122,7 @@ int t4_free_mac_filt(struct adapter *adap, unsigned int mbox,
  *	@idx: index of existing filter for old value of MAC address, or -1
  *	@addr: the new MAC address value
  *	@persist: whether a new MAC allocation should be persistent
- *	@add_smt: if true also add the address to the HW SMT
+ *	@smt_idx: the destination to store the new SMT index.
  *
  *	Modifies an exact-match filter and sets it to the new MAC address.
  *	Note that in general it is not possible to modify the value of a given
@@ -8438,7 +8444,6 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 
 /**
  *	t4_link_down_rc_str - return a string for a Link Down Reason Code
- *	@adap: the adapter
  *	@link_down_rc: Link Down Reason Code
  *
  *	Returns a string representation of the Link Down Reason Code.
@@ -8462,9 +8467,7 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc)
 	return reason[link_down_rc];
 }
 
-/**
- * Return the highest speed set in the port capabilities, in Mb/s.
- */
+/* Return the highest speed set in the port capabilities, in Mb/s. */
 static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
 {
 	#define TEST_SPEED_RETURN(__caps_speed, __speed) \
@@ -9100,7 +9103,6 @@ found:
 /**
  *	t4_prep_adapter - prepare SW and HW for operation
  *	@adapter: the adapter
- *	@reset: if true perform a HW reset
  *
  *	Initialize adapter SW state for the various HW modules, set initial
  *	values for some adapter tunables, take PHYs out of reset, and
@@ -9704,12 +9706,28 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf)
 		if (ret)
 			return ret;
 
-		memcpy(adap->port[i]->dev_addr, addr, ETH_ALEN);
+		eth_hw_addr_set(adap->port[i], addr);
 		j++;
 	}
 	return 0;
 }
 
+int t4_init_port_mirror(struct port_info *pi, u8 mbox, u8 port, u8 pf, u8 vf,
+			u16 *mirror_viid)
+{
+	int ret;
+
+	ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, NULL, NULL,
+			  NULL, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (mirror_viid)
+		*mirror_viid = ret;
+
+	return 0;
+}
+
 /**
  *	t4_read_cimq_cfg - read CIM queue configuration
  *	@adap: the adapter
@@ -10176,7 +10194,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
 			n = size - i;
 		else
 			n = SF_PAGE_SIZE;
-		ret = t4_write_flash(adap, addr, n, cfg_data);
+		ret = t4_write_flash(adap, addr, n, cfg_data, true);
 		if (ret)
 			goto out;
 
@@ -10192,7 +10210,7 @@ out:
 }
 
 /**
- *	t4_set_vf_mac - Set MAC address for the specified VF
+ *	t4_set_vf_mac_acl - Set MAC address for the specified VF
  *	@adapter: The adapter
  *	@vf: one of the VFs instantiated by the specified PF
  *	@naddr: the number of MAC addresses
@@ -10351,9 +10369,10 @@ int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 	return ret;
 }
 
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize)
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize)
 {
 	struct fw_sched_cmd cmd;
 
@@ -10375,6 +10394,7 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 	cmd.u.params.max = cpu_to_be32(maxrate);
 	cmd.u.params.weight = cpu_to_be16(weight);
 	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+	cmd.u.params.burstsize = cpu_to_be16(burstsize);
 
 	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
 			       NULL, 1);
@@ -10383,6 +10403,7 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 /**
  *	t4_i2c_rd - read I2C data from adapter
  *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
  *	@port: Port number if per-port device; <0 if not
  *	@devid: per-port device ID or absolute device ID
  *	@offset: byte offset into device I2C space
@@ -10438,7 +10459,7 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
 
 /**
  *      t4_set_vlan_acl - Set a VLAN id for the specified VF
- *      @adapter: the adapter
+ *      @adap: the adapter
  *      @mbox: mailbox to use for the FW command
  *      @vf: one of the VFs instantiated by the specified PF
  *      @vlan: The vlanid to be set
@@ -10469,3 +10490,282 @@ int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
 
 	return t4_wr_mbox(adap, adap->mbox, &vlan_cmd, sizeof(vlan_cmd), NULL);
 }
+
+/**
+ *	modify_device_id - Modifies the device ID of the Boot BIOS image
+ *	@device_id: the device ID to write.
+ *	@boot_data: the boot image to modify.
+ *
+ *	Write the supplied device ID to the boot BIOS image.
+ */
+static void modify_device_id(int device_id, u8 *boot_data)
+{
+	struct cxgb4_pcir_data *pcir_header;
+	struct legacy_pci_rom_hdr *header;
+	u8 *cur_header = boot_data;
+	u16 pcir_offset;
+
+	 /* Loop through all chained images and change the device ID's */
+	do {
+		header = (struct legacy_pci_rom_hdr *)cur_header;
+		pcir_offset = le16_to_cpu(header->pcir_offset);
+		pcir_header = (struct cxgb4_pcir_data *)(cur_header +
+			      pcir_offset);
+
+		/**
+		 * Only modify the Device ID if code type is Legacy or HP.
+		 * 0x00: Okay to modify
+		 * 0x01: FCODE. Do not modify
+		 * 0x03: Okay to modify
+		 * 0x04-0xFF: Do not modify
+		 */
+		if (pcir_header->code_type == CXGB4_HDR_CODE1) {
+			u8 csum = 0;
+			int i;
+
+			/**
+			 * Modify Device ID to match current adatper
+			 */
+			pcir_header->device_id = cpu_to_le16(device_id);
+
+			/**
+			 * Set checksum temporarily to 0.
+			 * We will recalculate it later.
+			 */
+			header->cksum = 0x0;
+
+			/**
+			 * Calculate and update checksum
+			 */
+			for (i = 0; i < (header->size512 * 512); i++)
+				csum += cur_header[i];
+
+			/**
+			 * Invert summed value to create the checksum
+			 * Writing new checksum value directly to the boot data
+			 */
+			cur_header[7] = -csum;
+
+		} else if (pcir_header->code_type == CXGB4_HDR_CODE2) {
+			/**
+			 * Modify Device ID to match current adatper
+			 */
+			pcir_header->device_id = cpu_to_le16(device_id);
+		}
+
+		/**
+		 * Move header pointer up to the next image in the ROM.
+		 */
+		cur_header += header->size512 * 512;
+	} while (!(pcir_header->indicator & CXGB4_HDR_INDI));
+}
+
+/**
+ *	t4_load_boot - download boot flash
+ *	@adap: the adapter
+ *	@boot_data: the boot image to write
+ *	@boot_addr: offset in flash to write boot_data
+ *	@size: image size
+ *
+ *	Write the supplied boot image to the card's serial flash.
+ *	The boot image has the following sections: a 28-byte header and the
+ *	boot image.
+ */
+int t4_load_boot(struct adapter *adap, u8 *boot_data,
+		 unsigned int boot_addr, unsigned int size)
+{
+	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+	unsigned int boot_sector = (boot_addr * 1024);
+	struct cxgb4_pci_exp_rom_header *header;
+	struct cxgb4_pcir_data *pcir_header;
+	int pcir_offset;
+	unsigned int i;
+	u16 device_id;
+	int ret, addr;
+
+	/**
+	 * Make sure the boot image does not encroach on the firmware region
+	 */
+	if ((boot_sector + size) >> 16 > FLASH_FW_START_SEC) {
+		dev_err(adap->pdev_dev, "boot image encroaching on firmware region\n");
+		return -EFBIG;
+	}
+
+	/* Get boot header */
+	header = (struct cxgb4_pci_exp_rom_header *)boot_data;
+	pcir_offset = le16_to_cpu(header->pcir_offset);
+	/* PCIR Data Structure */
+	pcir_header = (struct cxgb4_pcir_data *)&boot_data[pcir_offset];
+
+	/**
+	 * Perform some primitive sanity testing to avoid accidentally
+	 * writing garbage over the boot sectors.  We ought to check for
+	 * more but it's not worth it for now ...
+	 */
+	if (size < BOOT_MIN_SIZE || size > BOOT_MAX_SIZE) {
+		dev_err(adap->pdev_dev, "boot image too small/large\n");
+		return -EFBIG;
+	}
+
+	if (le16_to_cpu(header->signature) != BOOT_SIGNATURE) {
+		dev_err(adap->pdev_dev, "Boot image missing signature\n");
+		return -EINVAL;
+	}
+
+	/* Check PCI header signature */
+	if (le32_to_cpu(pcir_header->signature) != PCIR_SIGNATURE) {
+		dev_err(adap->pdev_dev, "PCI header missing signature\n");
+		return -EINVAL;
+	}
+
+	/* Check Vendor ID matches Chelsio ID*/
+	if (le16_to_cpu(pcir_header->vendor_id) != PCI_VENDOR_ID_CHELSIO) {
+		dev_err(adap->pdev_dev, "Vendor ID missing signature\n");
+		return -EINVAL;
+	}
+
+	/**
+	 * The boot sector is comprised of the Expansion-ROM boot, iSCSI boot,
+	 * and Boot configuration data sections. These 3 boot sections span
+	 * sectors 0 to 7 in flash and live right before the FW image location.
+	 */
+	i = DIV_ROUND_UP(size ? size : FLASH_FW_START,  sf_sec_size);
+	ret = t4_flash_erase_sectors(adap, boot_sector >> 16,
+				     (boot_sector >> 16) + i - 1);
+
+	/**
+	 * If size == 0 then we're simply erasing the FLASH sectors associated
+	 * with the on-adapter option ROM file
+	 */
+	if (ret || size == 0)
+		goto out;
+	/* Retrieve adapter's device ID */
+	pci_read_config_word(adap->pdev, PCI_DEVICE_ID, &device_id);
+       /* Want to deal with PF 0 so I strip off PF 4 indicator */
+	device_id = device_id & 0xf0ff;
+
+	 /* Check PCIE Device ID */
+	if (le16_to_cpu(pcir_header->device_id) != device_id) {
+		/**
+		 * Change the device ID in the Boot BIOS image to match
+		 * the Device ID of the current adapter.
+		 */
+		modify_device_id(device_id, boot_data);
+	}
+
+	/**
+	 * Skip over the first SF_PAGE_SIZE worth of data and write it after
+	 * we finish copying the rest of the boot image. This will ensure
+	 * that the BIOS boot header will only be written if the boot image
+	 * was written in full.
+	 */
+	addr = boot_sector;
+	for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+		addr += SF_PAGE_SIZE;
+		boot_data += SF_PAGE_SIZE;
+		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
+				     false);
+		if (ret)
+			goto out;
+	}
+
+	ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
+			     (const u8 *)header, false);
+
+out:
+	if (ret)
+		dev_err(adap->pdev_dev, "boot image load failed, error %d\n",
+			ret);
+	return ret;
+}
+
+/**
+ *	t4_flash_bootcfg_addr - return the address of the flash
+ *	optionrom configuration
+ *	@adapter: the adapter
+ *
+ *	Return the address within the flash where the OptionROM Configuration
+ *	is stored, or an error if the device FLASH is too small to contain
+ *	a OptionROM Configuration.
+ */
+static int t4_flash_bootcfg_addr(struct adapter *adapter)
+{
+	/**
+	 * If the device FLASH isn't large enough to hold a Firmware
+	 * Configuration File, return an error.
+	 */
+	if (adapter->params.sf_size <
+	    FLASH_BOOTCFG_START + FLASH_BOOTCFG_MAX_SIZE)
+		return -ENOSPC;
+
+	return FLASH_BOOTCFG_START;
+}
+
+int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+{
+	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+	struct cxgb4_bootcfg_data *header;
+	unsigned int flash_cfg_start_sec;
+	unsigned int addr, npad;
+	int ret, i, n, cfg_addr;
+
+	cfg_addr = t4_flash_bootcfg_addr(adap);
+	if (cfg_addr < 0)
+		return cfg_addr;
+
+	addr = cfg_addr;
+	flash_cfg_start_sec = addr / SF_SEC_SIZE;
+
+	if (size > FLASH_BOOTCFG_MAX_SIZE) {
+		dev_err(adap->pdev_dev, "bootcfg file too large, max is %u bytes\n",
+			FLASH_BOOTCFG_MAX_SIZE);
+		return -EFBIG;
+	}
+
+	header = (struct cxgb4_bootcfg_data *)cfg_data;
+	if (le16_to_cpu(header->signature) != BOOT_CFG_SIG) {
+		dev_err(adap->pdev_dev, "Wrong bootcfg signature\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	i = DIV_ROUND_UP(FLASH_BOOTCFG_MAX_SIZE,
+			 sf_sec_size);
+	ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec,
+				     flash_cfg_start_sec + i - 1);
+
+	/**
+	 * If size == 0 then we're simply erasing the FLASH sectors associated
+	 * with the on-adapter OptionROM Configuration File.
+	 */
+	if (ret || size == 0)
+		goto out;
+
+	/* this will write to the flash up to SF_PAGE_SIZE at a time */
+	for (i = 0; i < size; i += SF_PAGE_SIZE) {
+		n = min_t(u32, size - i, SF_PAGE_SIZE);
+
+		ret = t4_write_flash(adap, addr, n, cfg_data, false);
+		if (ret)
+			goto out;
+
+		addr += SF_PAGE_SIZE;
+		cfg_data += SF_PAGE_SIZE;
+	}
+
+	npad = ((size + 4 - 1) & ~3) - size;
+	for (i = 0; i < npad; i++) {
+		u8 data = 0;
+
+		ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
+				     false);
+		if (ret)
+			goto out;
+	}
+
+out:
+	if (ret)
+		dev_err(adap->pdev_dev, "boot config data %s failed %d\n",
+			(size == 0 ? "clear" : "download"), ret);
+	return ret;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 002fc62ea726..63bc956d2037 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -293,6 +293,8 @@ enum {
 #define I2C_PAGE_SIZE		0x100
 #define SFP_DIAG_TYPE_ADDR	0x5c
 #define SFP_DIAG_TYPE_LEN	0x1
+#define SFP_DIAG_ADDRMODE	BIT(2)
+#define SFP_DIAG_IMPLEMENTED	BIT(6)
 #define SFF_8472_COMP_ADDR	0x5e
 #define SFF_8472_COMP_LEN	0x1
 #define SFF_REV_ADDR		0x1
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 575c6abcdae7..fed5f93bf620 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -47,6 +47,7 @@ enum {
 	CPL_CLOSE_LISTSRV_REQ = 0x9,
 	CPL_ABORT_REQ         = 0xA,
 	CPL_ABORT_RPL         = 0xB,
+	CPL_TX_DATA           = 0xC,
 	CPL_RX_DATA_ACK       = 0xD,
 	CPL_TX_PKT            = 0xE,
 	CPL_L2T_WRITE_REQ     = 0x12,
@@ -705,6 +706,14 @@ struct cpl_set_tcb_field {
 	__be64 val;
 };
 
+struct cpl_set_tcb_field_core {
+	union opcode_tid ot;
+	__be16 reply_ctrl;
+	__be16 word_cookie;
+	__be64 mask;
+	__be64 val;
+};
+
 /* cpl_set_tcb_field.word_cookie fields */
 #define TCB_WORD_S	0
 #define TCB_WORD_V(x)	((x) << TCB_WORD_S)
@@ -1462,6 +1471,16 @@ struct cpl_tx_data {
 #define TX_FORCE_S	13
 #define TX_FORCE_V(x)	((x) << TX_FORCE_S)
 
+#define TX_DATA_MSS_S    16
+#define TX_DATA_MSS_M    0xFFFF
+#define TX_DATA_MSS_V(x) ((x) << TX_DATA_MSS_S)
+#define TX_DATA_MSS_G(x) (((x) >> TX_DATA_MSS_S) & TX_DATA_MSS_M)
+
+#define TX_LENGTH_S    0
+#define TX_LENGTH_M    0xFFFF
+#define TX_LENGTH_V(x) ((x) << TX_LENGTH_S)
+#define TX_LENGTH_G(x) (((x) >> TX_LENGTH_S) & TX_LENGTH_M)
+
 #define T6_TX_FORCE_S		20
 #define T6_TX_FORCE_V(x)	((x) << T6_TX_FORCE_S)
 #define T6_TX_FORCE_F		T6_TX_FORCE_V(1U)
@@ -1471,6 +1490,15 @@ struct cpl_tx_data {
 
 #define TX_SHOVE_S    14
 #define TX_SHOVE_V(x) ((x) << TX_SHOVE_S)
+#define TX_SHOVE_F    TX_SHOVE_V(1U)
+
+#define TX_BYPASS_S    21
+#define TX_BYPASS_V(x) ((x) << TX_BYPASS_S)
+#define TX_BYPASS_F    TX_BYPASS_V(1U)
+
+#define TX_PUSH_S    22
+#define TX_PUSH_V(x) ((x) << TX_PUSH_S)
+#define TX_PUSH_F    TX_PUSH_V(1U)
 
 #define TX_ULP_MODE_S    10
 #define TX_ULP_MODE_M    0x7
@@ -1511,7 +1539,7 @@ struct ulptx_sgl {
 	__be32 cmd_nsge;
 	__be32 len0;
 	__be64 addr0;
-	struct ulptx_sge_pair sge[0];
+	struct ulptx_sge_pair sge[];
 };
 
 struct ulptx_idata {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 0c5373462ced..0b1b5f9c67d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -219,6 +219,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */
 	CH_PCI_ID_TABLE_FENTRY(0x608a), /* Custom T62100-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x608b), /* Custom T6225-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x6092), /* Custom T62100-CR-LOM */
 CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
 
 #endif /* __T4_PCI_ID_TBL_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index a957a6e4d4c4..695916ba0405 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -487,6 +487,12 @@
 #define ERROR_QID_M    0x1ffffU
 #define ERROR_QID_G(x) (((x) >> ERROR_QID_S) & ERROR_QID_M)
 
+#define SGE_INT_CAUSE5_A        0x110c
+
+#define ERR_T_RXCRC_S    31
+#define ERR_T_RXCRC_V(x) ((x) << ERR_T_RXCRC_S)
+#define ERR_T_RXCRC_F    ERR_T_RXCRC_V(1U)
+
 #define HP_INT_THRESH_S    28
 #define HP_INT_THRESH_M    0xfU
 #define HP_INT_THRESH_V(x) ((x) << HP_INT_THRESH_S)
@@ -557,6 +563,12 @@
 #define AIVEC_V(x) ((x) << AIVEC_S)
 
 #define PCIE_PF_CLI_A	0x44
+
+#define PCIE_PF_EXPROM_OFST_A 0x4c
+#define OFFSET_S    10
+#define OFFSET_M    0x3fffU
+#define OFFSET_G(x) (((x) >> OFFSET_S) & OFFSET_M)
+
 #define PCIE_INT_CAUSE_A	0x3004
 
 #define UNXSPLCPLERR_S    29
@@ -872,6 +884,12 @@
 #define TDUE_V(x) ((x) << TDUE_S)
 #define TDUE_F    TDUE_V(1U)
 
+/* SPARE2 register contains 32-bit value at offset 0x6 in Serial INIT
+ * Configuration flashed on EEPROM. This value corresponds to 32-bit
+ * Serial Configuration Version information.
+ */
+#define PCIE_STATIC_SPARE2_A	0x5bfc
+
 /* registers for module MC */
 #define MC_INT_CAUSE_A		0x7518
 #define MC_P_INT_CAUSE_A	0x41318
@@ -1900,6 +1918,9 @@
 
 #define MAC_PORT_CFG2_A 0x818
 
+#define MAC_PORT_PTP_SUM_LO_A 0x990
+#define MAC_PORT_PTP_SUM_HI_A 0x994
+
 #define MPS_CMN_CTL_A	0x9000
 
 #define COUNTPAUSEMCRX_S    5
@@ -3002,6 +3023,14 @@
 #define REV_V(x) ((x) << REV_S)
 #define REV_G(x) (((x) >> REV_S) & REV_M)
 
+#define HASHTBLMEMCRCERR_S    27
+#define HASHTBLMEMCRCERR_V(x) ((x) << HASHTBLMEMCRCERR_S)
+#define HASHTBLMEMCRCERR_F    HASHTBLMEMCRCERR_V(1U)
+
+#define CMDTIDERR_S    22
+#define CMDTIDERR_V(x) ((x) << CMDTIDERR_S)
+#define CMDTIDERR_F    CMDTIDERR_V(1U)
+
 #define T6_UNKNOWNCMD_S    3
 #define T6_UNKNOWNCMD_V(x) ((x) << T6_UNKNOWNCMD_S)
 #define T6_UNKNOWNCMD_F    T6_UNKNOWNCMD_V(1U)
@@ -3029,6 +3058,10 @@
 #define HASHTIDSIZE_M    0x3fU
 #define HASHTIDSIZE_G(x) (((x) >> HASHTIDSIZE_S) & HASHTIDSIZE_M)
 
+#define HASHTBLSIZE_S    3
+#define HASHTBLSIZE_M    0x1ffffU
+#define HASHTBLSIZE_G(x) (((x) >> HASHTBLSIZE_S) & HASHTBLSIZE_M)
+
 #define LE_DB_HASH_TID_BASE_A 0x19c30
 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30
 #define LE_DB_INT_CAUSE_A 0x19c3c
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
index 1b9afb192f7f..22a0220123ad 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
@@ -35,6 +35,18 @@
 #ifndef __T4_TCB_H
 #define __T4_TCB_H
 
+#define TCB_L2T_IX_W		0
+#define TCB_L2T_IX_S		12
+#define TCB_L2T_IX_M		0xfffULL
+#define TCB_L2T_IX_V(x)		((x) << TCB_L2T_IX_S)
+
+#define TCB_T_FLAGS_W           1
+#define TCB_T_FLAGS_S           0
+#define TCB_T_FLAGS_M           0xffffffffffffffffULL
+#define TCB_T_FLAGS_V(x)        ((__u64)(x) << TCB_T_FLAGS_S)
+
+#define TCB_FIELD_COOKIE_TFLAG	1
+
 #define TCB_SMAC_SEL_W		0
 #define TCB_SMAC_SEL_S		24
 #define TCB_SMAC_SEL_M		0xffULL
@@ -45,10 +57,9 @@
 #define TCB_T_FLAGS_M		0xffffffffffffffffULL
 #define TCB_T_FLAGS_V(x)	((__u64)(x) << TCB_T_FLAGS_S)
 
-#define TCB_RQ_START_W		30
-#define TCB_RQ_START_S		0
-#define TCB_RQ_START_M		0x3ffffffULL
-#define TCB_RQ_START_V(x)	((x) << TCB_RQ_START_S)
+#define TF_DROP_S		22
+#define TF_DIRECT_STEER_S	23
+#define TF_LPBK_S		59
 
 #define TF_CCTRL_ECE_S		60
 #define TF_CCTRL_CWR_S		61
@@ -59,6 +70,11 @@
 #define TCB_RSS_INFO_M		0x3ffULL
 #define TCB_RSS_INFO_V(x)	((x) << TCB_RSS_INFO_S)
 
+#define TCB_T_STATE_W		3
+#define TCB_T_STATE_S		16
+#define TCB_T_STATE_M		0xfULL
+#define TCB_T_STATE_V(x)	((x) << TCB_T_STATE_S)
+
 #define TCB_TIMESTAMP_W		5
 #define TCB_TIMESTAMP_S		0
 #define TCB_TIMESTAMP_M		0xffffffffULL
@@ -69,13 +85,60 @@
 #define TCB_RTT_TS_RECENT_AGE_M		0xffffffffULL
 #define TCB_RTT_TS_RECENT_AGE_V(x)	((x) << TCB_RTT_TS_RECENT_AGE_S)
 
+#define TCB_T_RTSEQ_RECENT_W    7
+#define TCB_T_RTSEQ_RECENT_S    0
+#define TCB_T_RTSEQ_RECENT_M    0xffffffffULL
+#define TCB_T_RTSEQ_RECENT_V(x) ((x) << TCB_T_RTSEQ_RECENT_S)
+
+#define TCB_TX_MAX_W		9
+#define TCB_TX_MAX_S		0
+#define TCB_TX_MAX_M		0xffffffffULL
+#define TCB_TX_MAX_V(x)		((x) << TCB_TX_MAX_S)
+
 #define TCB_SND_UNA_RAW_W	10
+#define TCB_SND_UNA_RAW_S	0
+#define TCB_SND_UNA_RAW_M	0xfffffffULL
+#define TCB_SND_UNA_RAW_V(x)	((x) << TCB_SND_UNA_RAW_S)
+
+#define TCB_SND_NXT_RAW_W	10
+#define TCB_SND_NXT_RAW_S	28
+#define TCB_SND_NXT_RAW_M	0xfffffffULL
+#define TCB_SND_NXT_RAW_V(x)	((x) << TCB_SND_NXT_RAW_S)
+
+#define TCB_SND_MAX_RAW_W	11
+#define TCB_SND_MAX_RAW_S	24
+#define TCB_SND_MAX_RAW_M	0xfffffffULL
+#define TCB_SND_MAX_RAW_V(x)	((x) << TCB_SND_MAX_RAW_S)
+
+#define TCB_RCV_NXT_W		16
+#define TCB_RCV_NXT_S		10
+#define TCB_RCV_NXT_M		0xffffffffULL
+#define TCB_RCV_NXT_V(x)	((x) << TCB_RCV_NXT_S)
+
+#define TCB_RCV_WND_W		17
+#define TCB_RCV_WND_S		10
+#define TCB_RCV_WND_M		0xffffffULL
+#define TCB_RCV_WND_V(x)	((x) << TCB_RCV_WND_S)
+
 #define TCB_RX_FRAG2_PTR_RAW_W	27
 #define TCB_RX_FRAG3_LEN_RAW_W	29
 #define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W	30
 #define TCB_PDU_HDR_LEN_W	31
 
+#define TCB_RQ_START_W		30
+#define TCB_RQ_START_S		0
+#define TCB_RQ_START_M		0x3ffffffULL
+#define TCB_RQ_START_V(x)	((x) << TCB_RQ_START_S)
+
 #define TF_RX_PDU_OUT_S		49
 #define TF_RX_PDU_OUT_V(x)	((__u64)(x) << TF_RX_PDU_OUT_S)
 
+#define TF_CORE_BYPASS_S	63
+#define TF_CORE_BYPASS_V(x)	((__u64)(x) << TF_CORE_BYPASS_S)
+#define TF_CORE_BYPASS_F	TF_CORE_BYPASS_V(1)
+
+#define TF_NON_OFFLOAD_S	1
+#define TF_NON_OFFLOAD_V(x)	((x) << TF_NON_OFFLOAD_S)
+#define TF_NON_OFFLOAD_F	TF_NON_OFFLOAD_V(1)
+
 #endif /* __T4_TCB_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index accad1101ad1..2419459a0b85 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -737,7 +737,7 @@ struct fw_flowc_mnemval {
 struct fw_flowc_wr {
 	__be32 op_to_nparams;
 	__be32 flowid_len16;
-	struct fw_flowc_mnemval mnemval[0];
+	struct fw_flowc_mnemval mnemval[];
 };
 
 #define FW_FLOWC_WR_NPARAMS_S           0
@@ -794,10 +794,12 @@ struct fw_eth_tx_pkt_vm_wr {
 	__be32 op_immdlen;
 	__be32 equiq_to_len16;
 	__be32 r3[2];
-	u8 ethmacdst[6];
-	u8 ethmacsrc[6];
-	__be16 ethtype;
-	__be16 vlantci;
+	struct_group(firmware,
+		u8 ethmacdst[ETH_ALEN];
+		u8 ethmacsrc[ETH_ALEN];
+		__be16 ethtype;
+		__be16 vlantci;
+	);
 };
 
 #define FW_CMD_MAX_TIMEOUT 10000
@@ -1205,6 +1207,7 @@ enum fw_caps_config_crypto {
 	FW_CAPS_CONFIG_CRYPTO_LOOKASIDE = 0x00000001,
 	FW_CAPS_CONFIG_TLS_INLINE = 0x00000002,
 	FW_CAPS_CONFIG_IPSEC_INLINE = 0x00000004,
+	FW_CAPS_CONFIG_TLS_HW = 0x00000008,
 };
 
 enum fw_caps_config_fcoe {
@@ -1328,6 +1331,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
 	FW_PARAMS_PARAM_DEV_NUM_TM_CLASS = 0x2B,
 	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
+	FW_PARAMS_PARAM_DEV_KTLS_HW = 0x31,
 };
 
 /*
@@ -1410,6 +1414,12 @@ enum fw_params_param_dmaq {
 	FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
 };
 
+enum fw_params_param_dev_ktls_hw {
+	FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE      = 0x00,
+	FW_PARAMS_PARAM_DEV_KTLS_HW_ENABLE       = 0x01,
+	FW_PARAMS_PARAM_DEV_KTLS_HW_USER_ENABLE  = 0x01,
+};
+
 enum fw_params_param_dev_phyfw {
 	FW_PARAMS_PARAM_DEV_PHYFW_DOWNLOAD = 0x00,
 	FW_PARAMS_PARAM_DEV_PHYFW_VERSION = 0x01,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 3782e48dada2..03cb1410d6fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -40,6 +40,7 @@
 #ifndef __CXGB4VF_ADAPTER_H__
 #define __CXGB4VF_ADAPTER_H__
 
+#include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
@@ -507,7 +508,7 @@ static inline const char *port_name(struct adapter *adapter, int pidx)
 static inline void t4_os_set_hw_addr(struct adapter *adapter, int pidx,
 				     u8 hw_addr[])
 {
-	memcpy(adapter->port[pidx]->dev_addr, hw_addr, ETH_ALEN);
+	eth_hw_addr_set(adapter->port[pidx], hw_addr);
 }
 
 /**
@@ -562,7 +563,7 @@ int t4vf_sge_alloc_eth_txq(struct adapter *, struct sge_eth_txq *,
 			   unsigned int);
 void t4vf_free_sge_resources(struct adapter *);
 
-int t4vf_eth_xmit(struct sk_buff *, struct net_device *);
+netdev_tx_t t4vf_eth_xmit(struct sk_buff *, struct net_device *);
 int t4vf_ethrx_handler(struct sge_rspq *, const __be64 *,
 		       const struct pkt_gl *);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index f4d41f968afa..63b2bd084130 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -55,7 +55,6 @@
 /*
  * Generic information about the driver.
  */
-#define DRV_VERSION "2.0.0-ko"
 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
 
 /*
@@ -261,8 +260,7 @@ static int cxgb4vf_set_addr_hash(struct port_info *pi)
  *	@tcam_idx: TCAM index of existing filter for old value of MAC address,
  *		   or -1
  *	@addr: the new MAC address value
- *	@persist: whether a new MAC allocation should be persistent
- *	@add_smt: if true also add the address to the HW SMT
+ *	@persistent: whether a new MAC allocation should be persistent
  *
  *	Modifies an MPS filter and sets it to the new MAC address if
  *	@tcam_idx >= 0, or adds the MAC address to a new filter if
@@ -519,7 +517,7 @@ static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 		}
 		cpl = (void *)p;
 	}
-		/* Fall through */
+		fallthrough;
 
 	case CPL_SGE_EGR_UPDATE: {
 		/*
@@ -860,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev)
 	 */
 	err = t4vf_update_port_info(pi);
 	if (err < 0)
-		return err;
+		goto err_unwind;
 
 	/*
 	 * Note that this interface is up and start everything up ...
@@ -1220,7 +1218,7 @@ static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
 	if (ret < 0)
 		return ret;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	eth_hw_addr_set(dev, addr->sa_data);
 	return 0;
 }
 
@@ -1555,9 +1553,8 @@ static void cxgb4vf_get_drvinfo(struct net_device *dev,
 {
 	struct adapter *adapter = netdev2adap(dev);
 
-	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
-	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
+	strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+	strscpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
 		sizeof(drvinfo->bus_info));
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
@@ -1594,7 +1591,9 @@ static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
  * first Queue Set.
  */
 static void cxgb4vf_get_ringparam(struct net_device *dev,
-				  struct ethtool_ringparam *rp)
+				  struct ethtool_ringparam *rp,
+				  struct kernel_ethtool_ringparam *kernel_rp,
+				  struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	const struct sge *s = &pi->adapter->sge;
@@ -1617,7 +1616,9 @@ static void cxgb4vf_get_ringparam(struct net_device *dev,
  * device -- after vetting them of course!
  */
 static int cxgb4vf_set_ringparam(struct net_device *dev,
-				 struct ethtool_ringparam *rp)
+				 struct ethtool_ringparam *rp,
+				 struct kernel_ethtool_ringparam *kernel_rp,
+				 struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -1650,7 +1651,9 @@ static int cxgb4vf_set_ringparam(struct net_device *dev,
  * interrupt holdoff timer to be read on all of the device's Queue Sets.
  */
 static int cxgb4vf_get_coalesce(struct net_device *dev,
-				struct ethtool_coalesce *coalesce)
+				struct ethtool_coalesce *coalesce,
+				struct kernel_ethtool_coalesce *kernel_coal,
+				struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	const struct adapter *adapter = pi->adapter;
@@ -1670,7 +1673,9 @@ static int cxgb4vf_get_coalesce(struct net_device *dev,
  * the interrupt holdoff timer on any of the device's Queue Sets.
  */
 static int cxgb4vf_set_coalesce(struct net_device *dev,
-				struct ethtool_coalesce *coalesce)
+				struct ethtool_coalesce *coalesce,
+				struct kernel_ethtool_coalesce *kernel_coal,
+				struct netlink_ext_ack *extack)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -1921,6 +1926,8 @@ static void cxgb4vf_get_wol(struct net_device *dev,
 		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 
 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+				     ETHTOOL_COALESCE_RX_MAX_FRAMES,
 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
 	.get_fecparam		= cxgb4vf_get_fecparam,
 	.get_drvinfo		= cxgb4vf_get_drvinfo,
@@ -2018,33 +2025,14 @@ static void mboxlog_stop(struct seq_file *seq, void *v)
 {
 }
 
-static const struct seq_operations mboxlog_seq_ops = {
+static const struct seq_operations mboxlog_sops = {
 	.start = mboxlog_start,
 	.next  = mboxlog_next,
 	.stop  = mboxlog_stop,
 	.show  = mboxlog_show
 };
 
-static int mboxlog_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &mboxlog_seq_ops);
-
-	if (!res) {
-		struct seq_file *seq = file->private_data;
-
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations mboxlog_fops = {
-	.owner   = THIS_MODULE,
-	.open    = mboxlog_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
+DEFINE_SEQ_ATTRIBUTE(mboxlog);
 /*
  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
  */
@@ -2172,31 +2160,14 @@ static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
 }
 
-static const struct seq_operations sge_qinfo_seq_ops = {
+static const struct seq_operations sge_qinfo_sops = {
 	.start = sge_queue_start,
 	.next  = sge_queue_next,
 	.stop  = sge_queue_stop,
 	.show  = sge_qinfo_show
 };
 
-static int sge_qinfo_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &sge_qinfo_seq_ops);
-
-	if (!res) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations sge_qinfo_debugfs_fops = {
-	.owner   = THIS_MODULE,
-	.open    = sge_qinfo_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(sge_qinfo);
 
 /*
  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
@@ -2318,31 +2289,14 @@ static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
 }
 
-static const struct seq_operations sge_qstats_seq_ops = {
+static const struct seq_operations sge_qstats_sops = {
 	.start = sge_qstats_start,
 	.next  = sge_qstats_next,
 	.stop  = sge_qstats_stop,
 	.show  = sge_qstats_show
 };
 
-static int sge_qstats_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &sge_qstats_seq_ops);
-
-	if (res == 0) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations sge_qstats_proc_fops = {
-	.owner   = THIS_MODULE,
-	.open    = sge_qstats_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(sge_qstats);
 
 /*
  * Show PCI-E SR-IOV Virtual Function Resource Limits.
@@ -2416,31 +2370,14 @@ static void interfaces_stop(struct seq_file *seq, void *v)
 {
 }
 
-static const struct seq_operations interfaces_seq_ops = {
+static const struct seq_operations interfaces_sops = {
 	.start = interfaces_start,
 	.next  = interfaces_next,
 	.stop  = interfaces_stop,
 	.show  = interfaces_show
 };
 
-static int interfaces_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &interfaces_seq_ops);
-
-	if (res == 0) {
-		struct seq_file *seq = file->private_data;
-		seq->private = inode->i_private;
-	}
-	return res;
-}
-
-static const struct file_operations interfaces_proc_fops = {
-	.owner   = THIS_MODULE,
-	.open    = interfaces_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(interfaces);
 
 /*
  * /sys/kernel/debugfs/cxgb4vf/ files list.
@@ -2453,10 +2390,10 @@ struct cxgb4vf_debugfs_entry {
 
 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
 	{ "mboxlog",    0444, &mboxlog_fops },
-	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
-	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
+	{ "sge_qinfo",  0444, &sge_qinfo_fops },
+	{ "sge_qstats", 0444, &sge_qstats_fops },
 	{ "resources",  0444, &resources_fops },
-	{ "interfaces", 0444, &interfaces_proc_fops },
+	{ "interfaces", 0444, &interfaces_fops },
 };
 
 /*
@@ -2480,7 +2417,7 @@ static int setup_debugfs(struct adapter *adapter)
 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
 		debugfs_create_file(debugfs_files[i].name,
 				    debugfs_files[i].mode,
-				    adapter->debugfs_root, (void *)adapter,
+				    adapter->debugfs_root, adapter,
 				    debugfs_files[i].fops);
 
 	return 0;
@@ -2908,7 +2845,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops	= {
 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
+	.ndo_eth_ioctl		= cxgb4vf_do_ioctl,
 	.ndo_change_mtu		= cxgb4vf_change_mtu,
 	.ndo_fix_features	= cxgb4vf_fix_features,
 	.ndo_set_features	= cxgb4vf_set_features,
@@ -2917,6 +2854,39 @@ static const struct net_device_ops cxgb4vf_netdev_ops	= {
 #endif
 };
 
+/**
+ *	cxgb4vf_get_port_mask - Get port mask for the VF based on mac
+ *				address stored on the adapter
+ *	@adapter: The adapter
+ *
+ *	Find the port mask for the VF based on the index of mac
+ *	address stored in the adapter. If no mac address is stored on
+ *	the adapter for the VF, use the port mask received from the
+ *	firmware.
+ */
+static unsigned int cxgb4vf_get_port_mask(struct adapter *adapter)
+{
+	unsigned int naddr = 1, pidx = 0;
+	unsigned int pmask, rmask = 0;
+	u8 mac[ETH_ALEN];
+	int err;
+
+	pmask = adapter->params.vfres.pmask;
+	while (pmask) {
+		if (pmask & 1) {
+			err = t4vf_get_vf_mac_acl(adapter, pidx, &naddr, mac);
+			if (!err && !is_zero_ether_addr(mac))
+				rmask |= (1 << pidx);
+		}
+		pmask >>= 1;
+		pidx++;
+	}
+	if (!rmask)
+		rmask = adapter->params.vfres.pmask;
+
+	return rmask;
+}
+
 /*
  * "Probe" a device: initialize a device and construct all kernel and driver
  * state needed to manage the device.  This routine is called "init_one" in
@@ -2925,28 +2895,18 @@ static const struct net_device_ops cxgb4vf_netdev_ops	= {
 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 			     const struct pci_device_id *ent)
 {
-	int pci_using_dac;
-	int err, pidx;
-	unsigned int pmask;
 	struct adapter *adapter;
-	struct port_info *pi;
 	struct net_device *netdev;
-	unsigned int pf;
-
-	/*
-	 * Print our driver banner the first time we're called to initialize a
-	 * device.
-	 */
-	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
+	struct port_info *pi;
+	unsigned int pmask;
+	int err, pidx;
 
 	/*
 	 * Initialize generic PCI device state.
 	 */
 	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "cannot enable PCI device\n");
-		return err;
-	}
+	if (err)
+		return dev_err_probe(&pdev->dev, err, "cannot enable PCI device\n");
 
 	/*
 	 * Reserve PCI resources for the device.  If we can't get them some
@@ -2959,25 +2919,12 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 	}
 
 	/*
-	 * Set up our DMA mask: try for 64-bit address masking first and
-	 * fall back to 32-bit if we can't get 64 bits ...
+	 * Set up our DMA mask
 	 */
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err == 0) {
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (err) {
-			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
-				" coherent allocations\n");
-			goto err_release_regions;
-		}
-		pci_using_dac = 1;
-	} else {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err != 0) {
-			dev_err(&pdev->dev, "no usable DMA configuration\n");
-			goto err_release_regions;
-		}
-		pci_using_dac = 0;
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "no usable DMA configuration\n");
+		goto err_release_regions;
 	}
 
 	/*
@@ -3080,8 +3027,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 	/*
 	 * Allocate our "adapter ports" and stitch everything together.
 	 */
-	pmask = adapter->params.vfres.pmask;
-	pf = t4vf_get_pf_from_vf(adapter);
+	pmask = cxgb4vf_get_port_mask(adapter);
 	for_each_port(adapter, pidx) {
 		int port_id, viid;
 		u8 mac[ETH_ALEN];
@@ -3124,9 +3070,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_GRO |
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-		netdev->features = netdev->hw_features;
-		if (pci_using_dac)
-			netdev->features |= NETIF_F_HIGHDMA;
+		netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
@@ -3164,7 +3108,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 			goto err_free_dev;
 		}
 
-		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
+		err = t4vf_get_vf_mac_acl(adapter, port_id, &naddr, mac);
 		if (err) {
 			dev_err(&pdev->dev,
 				"unable to determine MAC ACL address, "
@@ -3246,6 +3190,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 	}
 	if (adapter->registered_device_map == 0) {
 		dev_err(&pdev->dev, "could not register any net devices\n");
+		err = -EINVAL;
 		goto err_disable_interrupts;
 	}
 
@@ -3454,7 +3399,6 @@ static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
 MODULE_DESCRIPTION(DRV_DESC);
 MODULE_AUTHOR("Chelsio Communications");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
 
 static struct pci_driver cxgb4vf_driver = {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index f71c973398ec..2d0cf76fb3c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -478,7 +478,7 @@ static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n)
 		if (is_buf_mapped(sdesc))
 			dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
 				       get_buf_size(adapter, sdesc),
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		put_page(sdesc->page);
 		sdesc->page = NULL;
 		if (++fl->cidx == fl->size)
@@ -507,7 +507,7 @@ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl)
 	if (is_buf_mapped(sdesc))
 		dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
 			       get_buf_size(adapter, sdesc),
-			       PCI_DMA_FROMDEVICE);
+			       DMA_FROM_DEVICE);
 	sdesc->page = NULL;
 	if (++fl->cidx == fl->size)
 		fl->cidx = 0;
@@ -644,7 +644,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
 
 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0,
 					PAGE_SIZE << s->fl_pg_order,
-					PCI_DMA_FROMDEVICE);
+					DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
 			/*
 			 * We've run out of DMA mapping space.  Free up the
@@ -682,7 +682,7 @@ alloc_small_pages:
 		poison_buf(page, PAGE_SIZE);
 
 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
 			put_page(page);
 			break;
@@ -954,7 +954,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq,
 }
 
 /**
- *	check_ring_tx_db - check and potentially ring a TX queue's doorbell
+ *	ring_tx_db - check and potentially ring a TX queue's doorbell
  *	@adapter: the adapter
  *	@tq: the TX queue
  *	@n: number of new descriptors to give to HW
@@ -1154,7 +1154,7 @@ static inline void txq_advance(struct sge_txq *tq, unsigned int n)
  *
  *	Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled.
  */
-int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	u32 wr_mid;
 	u64 cntrl, *end;
@@ -1167,10 +1167,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct cpl_tx_pkt_core *cpl;
 	const struct skb_shared_info *ssi;
 	dma_addr_t addr[MAX_SKB_FRAGS + 1];
-	const size_t fw_hdr_copy_len = (sizeof(wr->ethmacdst) +
-					sizeof(wr->ethmacsrc) +
-					sizeof(wr->ethtype) +
-					sizeof(wr->vlantci));
+	const size_t fw_hdr_copy_len = sizeof(wr->firmware);
 
 	/*
 	 * The chip minimum packet length is 10 octets but the firmware
@@ -1267,7 +1264,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	wr->equiq_to_len16 = cpu_to_be32(wr_mid);
 	wr->r3[0] = cpu_to_be32(0);
 	wr->r3[1] = cpu_to_be32(0);
-	skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
+	skb_copy_from_linear_data(skb, &wr->firmware, fw_hdr_copy_len);
 	end = (u64 *)wr + flits;
 
 	/*
@@ -1692,7 +1689,7 @@ static inline bool is_new_response(const struct rsp_ctrl *rc,
  *	restore_rx_bufs - put back a packet's RX buffers
  *	@gl: the packet gather list
  *	@fl: the SGE Free List
- *	@nfrags: how many fragments in @si
+ *	@frags: how many fragments in @si
  *
  *	Called when we find out that the current packet, @si, can't be
  *	processed right away for some reason.  This is a very rare event and
@@ -2054,7 +2051,7 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter)
 
 /**
  *	sge_rx_timer_cb - perform periodic maintenance of SGE RX queues
- *	@data: the adapter
+ *	@t: Rx timer
  *
  *	Runs periodically from a timer to perform maintenance of SGE RX queues.
  *
@@ -2113,7 +2110,7 @@ static void sge_rx_timer_cb(struct timer_list *t)
 
 /**
  *	sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues
- *	@data: the adapter
+ *	@t: Tx timer
  *
  *	Runs periodically from a timer to perform maintenance of SGE TX queues.
  *
@@ -2339,7 +2336,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
 	if (ret)
 		goto err;
 
-	netif_napi_add(dev, &rspq->napi, napi_rx_handler, 64);
+	netif_napi_add(dev, &rspq->napi, napi_rx_handler);
 	rspq->cur_desc = rspq->desc;
 	rspq->cidx = 0;
 	rspq->gen = 1;
@@ -2405,6 +2402,7 @@ err:
  *	t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue
  *	@adapter: the adapter
  *	@txq: pointer to the new txq to be filled in
+ *	@dev: the network device
  *	@devq: the network TX queue associated with the new txq
  *	@iqid: the relative ingress queue ID to which events relating to
  *		the new txq should be directed
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 57cfd10a99ec..03777145efec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -415,7 +415,7 @@ int t4vf_eth_eq_free(struct adapter *, unsigned int);
 int t4vf_update_port_info(struct port_info *pi);
 int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
 int t4vf_prep_adapter(struct adapter *);
-int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int port,
 			unsigned int *naddr, u8 *addr);
 int t4vf_get_vf_vlan_acl(struct adapter *adapter);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 9d49ff211cc1..1c52592d3b65 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -33,6 +33,7 @@
  * SOFTWARE.
  */
 
+#include <linux/ethtool.h>
 #include <linux/pci.h>
 
 #include "t4vf_common.h"
@@ -389,9 +390,7 @@ static inline enum cc_fec fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
 	return cc_fec;
 }
 
-/**
- * Return the highest speed set in the port capabilities, in Mb/s.
- */
+/* Return the highest speed set in the port capabilities, in Mb/s. */
 static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
 {
 	#define TEST_SPEED_RETURN(__caps_speed, __speed) \
@@ -878,7 +877,7 @@ int t4vf_get_sge_params(struct adapter *adapter)
 
 	/* T4 uses a single control field to specify both the PCIe Padding and
 	 * Packing Boundary.  T5 introduced the ability to specify these
-	 * separately with the Padding Boundary in SGE_CONTROL and and Packing
+	 * separately with the Padding Boundary in SGE_CONTROL and Packing
 	 * Boundary in SGE_CONTROL2.  So for T5 and later we need to grab
 	 * SGE_CONTROL in order to determine how ingress packet data will be
 	 * laid out in Packed Buffer Mode.  Unfortunately, older versions of
@@ -1467,6 +1466,7 @@ int t4vf_identify_port(struct adapter *adapter, unsigned int viid,
  *	@bcast: 1 to enable broadcast Rx, 0 to disable it, -1 no change
  *	@vlanex: 1 to enable hardware VLAN Tag extraction, 0 to disable it,
  *		-1 no change
+ *	@sleep_ok: call is allowed to sleep
  *
  *	Sets Rx properties of a virtual interface.
  */
@@ -1906,7 +1906,7 @@ static const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
 /**
  *	t4vf_handle_get_port_info - process a FW reply message
  *	@pi: the port info
- *	@rpl: start of the FW message
+ *	@cmd: start of the FW message
  *
  *	Processes a GET_PORT_INFO FW reply message.
  */
@@ -2137,8 +2137,6 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
 	return 0;
 }
 
-/**
- */
 int t4vf_prep_adapter(struct adapter *adapter)
 {
 	int err;
@@ -2190,14 +2188,14 @@ int t4vf_prep_adapter(struct adapter *adapter)
  *	t4vf_get_vf_mac_acl - Get the MAC address to be set to
  *			      the VI of this VF.
  *	@adapter: The adapter
- *	@pf: The pf associated with vf
+ *	@port: The port associated with vf
  *	@naddr: the number of ACL MAC addresses returned in addr
  *	@addr: Placeholder for MAC addresses
  *
  *	Find the MAC address to be set to the VF's VI. The requested MAC address
  *	is from the host OS via callback in the PF driver.
  */
-int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int port,
 			unsigned int *naddr, u8 *addr)
 {
 	struct fw_acl_mac_cmd cmd;
@@ -2215,7 +2213,7 @@ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
 	if (cmd.nmac < *naddr)
 		*naddr = cmd.nmac;
 
-	switch (pf) {
+	switch (port) {
 	case 3:
 		memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
 		break;
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
new file mode 100644
index 000000000000..521955e1f894
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Chelsio inline crypto configuration
+#
+
+config CHELSIO_INLINE_CRYPTO
+	bool "Chelsio Inline Crypto support"
+	depends on CHELSIO_T4
+	default y
+	help
+	  Enable support for inline crypto.
+	  Allows enable/disable from list of inline crypto drivers.
+
+if CHELSIO_INLINE_CRYPTO
+
+config CRYPTO_DEV_CHELSIO_TLS
+	tristate "Chelsio Crypto Inline TLS Driver"
+	depends on CHELSIO_T4
+	depends on TLS
+	depends on TLS_TOE
+	help
+	  Support Chelsio Inline TLS with Chelsio crypto accelerator.
+	  Enable inline TLS support for Tx and Rx.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called chtls.
+
+config CHELSIO_IPSEC_INLINE
+       tristate "Chelsio IPSec XFRM Tx crypto offload"
+       depends on CHELSIO_T4
+       depends on XFRM_OFFLOAD
+       depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+       help
+        Support Chelsio Inline IPsec with Chelsio crypto accelerator.
+        Enable inline IPsec support for Tx.
+
+        To compile this driver as a module, choose M here: the module
+        will be called ch_ipsec.
+
+config CHELSIO_TLS_DEVICE
+        tristate "Chelsio Inline KTLS Offload"
+        depends on CHELSIO_T4
+        depends on TLS
+        depends on TLS_DEVICE
+	select CRYPTO_LIB_AES
+        help
+          This flag enables support for kernel tls offload over Chelsio T6
+          crypto accelerator. CONFIG_CHELSIO_TLS_DEVICE flag can be enabled
+          only if CONFIG_TLS and CONFIG_TLS_DEVICE flags are enabled.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ch_ktls.
+
+endif # CHELSIO_INLINE_CRYPTO
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
new file mode 100644
index 000000000000..27e6d7e2f1eb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec/
+obj-$(CONFIG_CHELSIO_TLS_DEVICE) += ch_ktls/
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
new file mode 100644
index 000000000000..efdcaaebc455
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 \
+             -I $(srctree)/drivers/crypto/chelsio
+
+obj-$(CONFIG_CHELSIO_IPSEC_INLINE) += ch_ipsec.o
+ch_ipsec-objs := chcr_ipsec.o
+
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
new file mode 100644
index 000000000000..585590520076
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c
@@ -0,0 +1,823 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *	Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "ch_ipsec: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/esp.h>
+#include <net/xfrm.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+
+#include "chcr_ipsec.h"
+
+/*
+ * Max Tx descriptor space we allow for an Ethernet packet to be inlined
+ * into a WR.
+ */
+#define MAX_IMM_TX_PKT_LEN 256
+#define GCM_ESP_IV_SIZE     8
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state);
+static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop);
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x);
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x);
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x);
+
+static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = {
+	.xdo_dev_state_add      = ch_ipsec_xfrm_add_state,
+	.xdo_dev_state_delete   = ch_ipsec_xfrm_del_state,
+	.xdo_dev_state_free     = ch_ipsec_xfrm_free_state,
+	.xdo_dev_offload_ok     = ch_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = ch_ipsec_advance_esn_state,
+};
+
+static struct cxgb4_uld_info ch_ipsec_uld_info = {
+	.name = CHIPSEC_DRV_MODULE_NAME,
+	.add = ch_ipsec_uld_add,
+	.state_change = ch_ipsec_uld_state_change,
+	.tx_handler = ch_ipsec_xmit,
+	.xfrmdev_ops = &ch_ipsec_xfrmdev_ops,
+};
+
+static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop)
+{
+	struct ipsec_uld_ctx *u_ctx;
+
+	pr_info_once("%s - version %s\n", CHIPSEC_DRV_DESC,
+		     CHIPSEC_DRV_VERSION);
+	u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+	if (!u_ctx) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	u_ctx->lldi = *infop;
+out:
+	return u_ctx;
+}
+
+static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct ipsec_uld_ctx *u_ctx = handle;
+
+	pr_debug("new_state %u\n", new_state);
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		list_add_tail(&u_ctx->entry, &uld_ctx_list);
+		mutex_unlock(&dev_mutex);
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+	case CXGB4_STATE_DOWN:
+	case CXGB4_STATE_DETACH:
+		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
+		list_del(&u_ctx->entry);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ch_ipsec_setauthsize(struct xfrm_state *x,
+				struct ipsec_sa_entry *sa_entry)
+{
+	int hmac_ctrl;
+	int authsize = x->aead->alg_icv_len / 8;
+
+	sa_entry->authsize = authsize;
+
+	switch (authsize) {
+	case ICV_8:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		break;
+	case ICV_12:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		break;
+	case ICV_16:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return hmac_ctrl;
+}
+
+static int ch_ipsec_setkey(struct xfrm_state *x,
+			   struct ipsec_sa_entry *sa_entry)
+{
+	int keylen = (x->aead->alg_key_len + 7) / 8;
+	unsigned char *key = x->aead->alg_key;
+	int ck_size, key_ctx_size = 0;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	struct crypto_aes_ctx aes;
+	int ret = 0;
+
+	if (keylen > 3) {
+		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+		memcpy(sa_entry->salt, key + keylen, 4);
+	}
+
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(sa_entry->key, key, keylen);
+	sa_entry->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+			      ((DIV_ROUND_UP(keylen, 16)) << 4) +
+			      AEAD_H_SIZE;
+
+	sa_entry->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0,
+						 key_ctx_size >> 4);
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret) {
+		sa_entry->enckey_len = 0;
+		goto out;
+	}
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
+
+	memcpy(sa_entry->key + (DIV_ROUND_UP(sa_entry->enckey_len, 16) *
+	       16), ghash_h, AEAD_H_SIZE);
+	sa_entry->kctx_len = ((DIV_ROUND_UP(sa_entry->enckey_len, 16)) << 4) +
+			      AEAD_H_SIZE;
+out:
+	return ret;
+}
+
+/*
+ * ch_ipsec_xfrm_add_state
+ * returns 0 on success, negative error if failed to send message to FPGA
+ * positive error if FPGA returned a bad response
+ */
+static int ch_ipsec_xfrm_add_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+	int res = 0;
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		pr_debug("Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		pr_debug("Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		pr_debug("Only IPv4/6 xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		pr_debug("Only transport and tunnel xfrm offload\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		pr_debug("Only ESP xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		pr_debug("Encapsulated xfrm state not offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		pr_debug("Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128 &&
+	    x->aead->alg_icv_len != 96) {
+		pr_debug("Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
+	return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		pr_debug("cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		pr_debug("Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		pr_debug("Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		pr_debug("Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		res = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->hmac_ctrl = ch_ipsec_setauthsize(x, sa_entry);
+	if (x->props.flags & XFRM_STATE_ESN)
+		sa_entry->esn = 1;
+	ch_ipsec_setkey(x, sa_entry);
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	try_module_get(THIS_MODULE);
+out:
+	return res;
+}
+
+static void ch_ipsec_xfrm_del_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static void ch_ipsec_xfrm_free_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kfree(sa_entry);
+	module_put(THIS_MODULE);
+}
+
+static bool ch_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	if (x->props.family == AF_INET) {
+		/* Offload with IP options is not supported yet */
+		if (ip_hdr(skb)->ihl > 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+	return true;
+}
+
+static void ch_ipsec_advance_esn_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static int is_eth_imm(const struct sk_buff *skb,
+		      struct ipsec_sa_entry *sa_entry)
+{
+	unsigned int kctx_len;
+	int hdrlen;
+
+	kctx_len = sa_entry->kctx_len;
+	hdrlen = sizeof(struct fw_ulptx_wr) +
+		 sizeof(struct chcr_ipsec_req) + kctx_len;
+
+	hdrlen += sizeof(struct cpl_tx_pkt);
+	if (sa_entry->esn)
+		hdrlen += (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16)
+			   << 4);
+	if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
+		return hdrlen;
+	return 0;
+}
+
+static unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
+				      struct ipsec_sa_entry *sa_entry,
+				      bool *immediate)
+{
+	unsigned int kctx_len;
+	unsigned int flits;
+	int aadivlen;
+	int hdrlen;
+
+	kctx_len = sa_entry->kctx_len;
+	hdrlen = is_eth_imm(skb, sa_entry);
+	aadivlen = sa_entry->esn ? DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv),
+						16) : 0;
+	aadivlen <<= 4;
+
+	/* If the skb is small enough, we can pump it out as a work request
+	 * with only immediate data.  In that case we just have to have the
+	 * TX Packet header plus the skb data in the Work Request.
+	 */
+
+	if (hdrlen) {
+		*immediate = true;
+		return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
+	}
+
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+
+	/* Otherwise, we're going to have to construct a Scatter gather list
+	 * of the skb body and fragments.  We also include the flits necessary
+	 * for the TX Packet Work Request and CPL.  We always have a firmware
+	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+	 * message or, if we're doing a Large Send Offload, an LSO CPL message
+	 * with an embedded TX Packet Write CPL message.
+	 */
+	flits += (sizeof(struct fw_ulptx_wr) +
+		  sizeof(struct chcr_ipsec_req) +
+		  kctx_len +
+		  sizeof(struct cpl_tx_pkt_core) +
+		  aadivlen) / sizeof(__be64);
+	return flits;
+}
+
+static void *copy_esn_pktxt(struct sk_buff *skb,
+			    struct net_device *dev,
+			    void *pos,
+			    struct ipsec_sa_entry *sa_entry)
+{
+	struct chcr_ipsec_aadiv *aadiv;
+	struct ulptx_idata *sc_imm;
+	struct ip_esp_hdr *esphdr;
+	struct xfrm_offload *xo;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	__be64 seqno;
+	u32 qidx;
+	u32 seqlo;
+	u8 *iv;
+	int eoq;
+	int len;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	if (!eoq)
+		pos = q->q.desc;
+
+	len = DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv), 16) << 4;
+	memset(pos, 0, len);
+	aadiv = (struct chcr_ipsec_aadiv *)pos;
+	esphdr = (struct ip_esp_hdr *)skb_transport_header(skb);
+	iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr);
+	xo = xfrm_offload(skb);
+
+	aadiv->spi = (esphdr->spi);
+	seqlo = ntohl(esphdr->seq_no);
+	seqno = cpu_to_be64(seqlo + ((u64)xo->seq.hi << 32));
+	memcpy(aadiv->seq_no, &seqno, 8);
+	iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr);
+	memcpy(aadiv->iv, iv, 8);
+
+	if (is_eth_imm(skb, sa_entry) && !skb_is_nonlinear(skb)) {
+		sc_imm = (struct ulptx_idata *)(pos +
+			  (DIV_ROUND_UP(sizeof(struct chcr_ipsec_aadiv),
+					sizeof(__be64)) << 3));
+		sc_imm->cmd_more = FILL_CMD_MORE(0);
+		sc_imm->len = cpu_to_be32(skb->len);
+	}
+	pos += len;
+	return pos;
+}
+
+static void *copy_cpltx_pktxt(struct sk_buff *skb,
+			      struct net_device *dev,
+			      void *pos,
+			      struct ipsec_sa_entry *sa_entry)
+{
+	struct cpl_tx_pkt_core *cpl;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	u32 ctrl0, qidx;
+	u64 cntrl = 0;
+	int left;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	left = (void *)q->q.stat - pos;
+	if (!left)
+		pos = q->q.desc;
+
+	cpl = (struct cpl_tx_pkt_core *)pos;
+
+	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
+			       TXPKT_PF_V(adap->pf);
+	if (skb_vlan_tag_present(skb)) {
+		q->vlan_ins++;
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+	}
+
+	cpl->ctrl0 = htonl(ctrl0);
+	cpl->pack = htons(0);
+	cpl->len = htons(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	pos += sizeof(struct cpl_tx_pkt_core);
+	/* Copy ESN info for HW */
+	if (sa_entry->esn)
+		pos = copy_esn_pktxt(skb, dev, pos, sa_entry);
+	return pos;
+}
+
+static void *copy_key_cpltx_pktxt(struct sk_buff *skb,
+				  struct net_device *dev,
+				  void *pos,
+				  struct ipsec_sa_entry *sa_entry)
+{
+	struct _key_ctx *key_ctx;
+	int left, eoq, key_len;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	unsigned int qidx;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	key_len = sa_entry->kctx_len;
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	left = eoq;
+	if (!eoq) {
+		pos = q->q.desc;
+		left = 64 * q->q.size;
+	}
+
+	/* Copy the Key context header */
+	key_ctx = (struct _key_ctx *)pos;
+	key_ctx->ctx_hdr = sa_entry->key_ctx_hdr;
+	memcpy(key_ctx->salt, sa_entry->salt, MAX_SALT);
+	pos += sizeof(struct _key_ctx);
+	left -= sizeof(struct _key_ctx);
+
+	if (likely(key_len <= left)) {
+		memcpy(key_ctx->key, sa_entry->key, key_len);
+		pos += key_len;
+	} else {
+		memcpy(pos, sa_entry->key, left);
+		memcpy(q->q.desc, sa_entry->key + left,
+		       key_len - left);
+		pos = (u8 *)q->q.desc + (key_len - left);
+	}
+	/* Copy CPL TX PKT XT */
+	pos = copy_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+static void *ch_ipsec_crypto_wreq(struct sk_buff *skb,
+				  struct net_device *dev,
+				  void *pos,
+				  int credits,
+				  struct ipsec_sa_entry *sa_entry)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	unsigned int ivsize = GCM_ESP_IV_SIZE;
+	struct chcr_ipsec_wr *wr;
+	bool immediate = false;
+	u16 immdatalen = 0;
+	unsigned int flits;
+	u32 ivinoffset;
+	u32 aadstart;
+	u32 aadstop;
+	u32 ciphstart;
+	u16 sc_more = 0;
+	u32 ivdrop = 0;
+	u32 esnlen = 0;
+	u32 wr_mid;
+	u16 ndesc;
+	int qidx = skb_get_queue_mapping(skb);
+	struct sge_eth_txq *q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	unsigned int kctx_len = sa_entry->kctx_len;
+	int qid = q->q.cntxt_id;
+
+	atomic_inc(&adap->ch_ipsec_stats.ipsec_cnt);
+
+	flits = calc_tx_sec_flits(skb, sa_entry, &immediate);
+	ndesc = DIV_ROUND_UP(flits, 2);
+	if (sa_entry->esn)
+		ivdrop = 1;
+
+	if (immediate)
+		immdatalen = skb->len;
+
+	if (sa_entry->esn) {
+		esnlen = sizeof(struct chcr_ipsec_aadiv);
+		if (!skb_is_nonlinear(skb))
+			sc_more  = 1;
+	}
+
+	/* WR Header */
+	wr = (struct chcr_ipsec_wr *)pos;
+	wr->wreq.op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr_mid = FW_CRYPTO_LOOKASIDE_WR_LEN16_V(ndesc);
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		netif_tx_stop_queue(q->txq);
+		q->q.stops++;
+		if (!q->dbqt)
+			wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+	wr_mid |= FW_ULPTX_WR_DATA_F;
+	wr->wreq.flowid_len16 = htonl(wr_mid);
+
+	/* ULPTX */
+	wr->req.ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(pi->port_id, qid);
+	wr->req.ulptx.len = htonl(ndesc - 1);
+
+	/* Sub-command */
+	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(!immdatalen || sc_more);
+	wr->req.sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+					 sizeof(wr->req.key_ctx) +
+					 kctx_len +
+					 sizeof(struct cpl_tx_pkt_core) +
+					 esnlen +
+					 (esnlen ? 0 : immdatalen));
+
+	/* CPL_SEC_PDU */
+	ivinoffset = sa_entry->esn ? (ESN_IV_INSERT_OFFSET + 1) :
+				     (skb_transport_offset(skb) +
+				      sizeof(struct ip_esp_hdr) + 1);
+	wr->req.sec_cpl.op_ivinsrtofst = htonl(
+				CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+				CPL_TX_SEC_PDU_CPLLEN_V(2) |
+				CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+				CPL_TX_SEC_PDU_IVINSRTOFST_V(
+							     ivinoffset));
+
+	wr->req.sec_cpl.pldlen = htonl(skb->len + esnlen);
+	aadstart = sa_entry->esn ? 1 : (skb_transport_offset(skb) + 1);
+	aadstop = sa_entry->esn ? ESN_IV_INSERT_OFFSET :
+				  (skb_transport_offset(skb) +
+				   sizeof(struct ip_esp_hdr));
+	ciphstart = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr) +
+		    GCM_ESP_IV_SIZE + 1;
+	ciphstart += sa_entry->esn ?  esnlen : 0;
+
+	wr->req.sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+							aadstart,
+							aadstop,
+							ciphstart, 0);
+
+	wr->req.sec_cpl.cipherstop_lo_authinsert =
+		FILL_SEC_CPL_AUTHINSERT(0, ciphstart,
+					sa_entry->authsize,
+					 sa_entry->authsize);
+	wr->req.sec_cpl.seqno_numivs =
+		FILL_SEC_CPL_SCMD0_SEQNO(CHCR_ENCRYPT_OP, 1,
+					 CHCR_SCMD_CIPHER_MODE_AES_GCM,
+					 CHCR_SCMD_AUTH_MODE_GHASH,
+					 sa_entry->hmac_ctrl,
+					 ivsize >> 1);
+	wr->req.sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+								  0, ivdrop, 0);
+
+	pos += sizeof(struct fw_ulptx_wr) +
+	       sizeof(struct ulp_txpkt) +
+	       sizeof(struct ulptx_idata) +
+	       sizeof(struct cpl_tx_sec_pdu);
+
+	pos = copy_key_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+/**
+ *      flits_to_desc - returns the num of Tx descriptors for the given flits
+ *      @n: the number of flits
+ *
+ *      Returns the number of Tx descriptors needed for the supplied number
+ *      of flits.
+ */
+static unsigned int flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+
+static unsigned int txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static void eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static void txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+/*
+ *      ch_ipsec_xmit called from ULD Tx handler
+ */
+int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	unsigned int last_desc, ndesc, flits = 0;
+	struct ipsec_sa_entry *sa_entry;
+	u64 *pos, *end, *before, *sgl;
+	struct tx_sw_desc *sgl_sdesc;
+	int qidx, left, credits;
+	bool immediate = false;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sec_path *sp;
+
+	if (!x->xso.offload_handle)
+		return NETDEV_TX_BUSY;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+
+	sp = skb_sec_path(skb);
+	if (sp->len != 1) {
+out_free:       dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+
+	flits = calc_tx_sec_flits(skb, sa_entry, &immediate);
+	ndesc = flits_to_desc(flits);
+	credits = txq_avail(&q->q) - ndesc;
+
+	if (unlikely(credits < 0)) {
+		eth_txq_stop(q);
+		dev_err(adap->pdev_dev,
+			"%s: Tx ring %u full while queue awake! cred:%d %d %d flits:%d\n",
+			dev->name, qidx, credits, ndesc, txq_avail(&q->q),
+			flits);
+		return NETDEV_TX_BUSY;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (!immediate &&
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		goto out_free;
+	}
+
+	pos = (u64 *)&q->q.desc[q->q.pidx];
+	before = (u64 *)pos;
+	end = (u64 *)pos + flits;
+	/* Setup IPSec CPL */
+	pos = (void *)ch_ipsec_crypto_wreq(skb, dev, (void *)pos,
+					   credits, sa_entry);
+	if (before > (u64 *)pos) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+	}
+	if (pos == (u64 *)q->q.stat) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		pos = (void *)q->q.desc;
+	}
+
+	sgl = (void *)pos;
+	if (immediate) {
+		cxgb4_inline_tx_skb(skb, &q->q, sgl);
+		dev_consume_skb_any(skb);
+	} else {
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end,
+				0, sgl_sdesc->addr);
+		skb_orphan(skb);
+		sgl_sdesc->skb = skb;
+	}
+	txq_advance(&q->q, ndesc);
+
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	return NETDEV_TX_OK;
+}
+
+static int __init ch_ipsec_init(void)
+{
+	cxgb4_register_uld(CXGB4_ULD_IPSEC, &ch_ipsec_uld_info);
+
+	return 0;
+}
+
+static void __exit ch_ipsec_exit(void)
+{
+	struct ipsec_uld_ctx *u_ctx, *tmp;
+	struct adapter *adap;
+
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+		adap = pci_get_drvdata(u_ctx->lldi.pdev);
+		atomic_set(&adap->ch_ipsec_stats.ipsec_cnt, 0);
+		list_del(&u_ctx->entry);
+		kfree(u_ctx);
+	}
+	mutex_unlock(&dev_mutex);
+	cxgb4_unregister_uld(CXGB4_ULD_IPSEC);
+}
+
+module_init(ch_ipsec_init);
+module_exit(ch_ipsec_exit);
+
+MODULE_DESCRIPTION("Crypto IPSEC for Chelsio Terminator cards.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHIPSEC_DRV_VERSION);
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
new file mode 100644
index 000000000000..1d110d2edd64
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2018 Chelsio Communications, Inc. */
+
+#ifndef __CHCR_IPSEC_H__
+#define __CHCR_IPSEC_H__
+
+#include <crypto/algapi.h>
+#include "t4_hw.h"
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "cxgb4_uld.h"
+
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+#define CHIPSEC_DRV_MODULE_NAME "ch_ipsec"
+#define CHIPSEC_DRV_VERSION "1.0.0.0-ko"
+#define CHIPSEC_DRV_DESC "Chelsio T6 Crypto Ipsec offload Driver"
+
+struct ipsec_uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+};
+
+struct chcr_ipsec_req {
+	struct ulp_txpkt ulptx;
+	struct ulptx_idata sc_imm;
+	struct cpl_tx_sec_pdu sec_cpl;
+	struct _key_ctx key_ctx;
+};
+
+struct chcr_ipsec_wr {
+	struct fw_ulptx_wr wreq;
+	struct chcr_ipsec_req req;
+};
+
+#define ESN_IV_INSERT_OFFSET 12
+struct chcr_ipsec_aadiv {
+	__be32 spi;
+	u8 seq_no[8];
+	u8 iv[8];
+};
+
+struct ipsec_sa_entry {
+	int hmac_ctrl;
+	u16 esn;
+	u16 resv;
+	unsigned int enckey_len;
+	unsigned int kctx_len;
+	unsigned int authsize;
+	__be32 key_ctx_hdr;
+	char salt[MAX_SALT];
+	char key[2 * AES_MAX_KEY_SIZE];
+};
+
+#endif /* __CHCR_IPSEC_H__ */
+
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile
new file mode 100644
index 000000000000..5e7d161c3199
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
+
+obj-$(CONFIG_CHELSIO_TLS_DEVICE) += ch_ktls.o
+ch_ktls-objs := chcr_ktls.o
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h
new file mode 100644
index 000000000000..38319f4c3121
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_common.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CHCR_COMMON_H__
+#define __CHCR_COMMON_H__
+
+#include "cxgb4.h"
+
+#define CHCR_MAX_SALT                      4
+#define CHCR_KEYCTX_MAC_KEY_SIZE_128       0
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128    0
+#define CHCR_SCMD_CIPHER_MODE_AES_GCM      2
+#define CHCR_SCMD_CIPHER_MODE_AES_CTR      3
+#define CHCR_CPL_TX_SEC_PDU_LEN_64BIT      2
+#define CHCR_SCMD_SEQ_NO_CTRL_64BIT        3
+#define CHCR_SCMD_PROTO_VERSION_TLS        0
+#define CHCR_SCMD_PROTO_VERSION_GENERIC    4
+#define CHCR_SCMD_AUTH_MODE_GHASH          4
+#define AES_BLOCK_LEN                      16
+
+struct ktls_key_ctx {
+	__be32 ctx_hdr;
+	u8 salt[CHCR_MAX_SALT];
+	__be64 iv_to_auth;
+	unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE +
+			  TLS_CIPHER_AES_GCM_256_TAG_SIZE];
+};
+
+/* Crypto key context */
+#define KEY_CONTEXT_CTX_LEN_S           24
+#define KEY_CONTEXT_CTX_LEN_V(x)        ((x) << KEY_CONTEXT_CTX_LEN_S)
+
+#define KEY_CONTEXT_SALT_PRESENT_S      10
+#define KEY_CONTEXT_SALT_PRESENT_V(x)   ((x) << KEY_CONTEXT_SALT_PRESENT_S)
+#define KEY_CONTEXT_SALT_PRESENT_F      KEY_CONTEXT_SALT_PRESENT_V(1U)
+
+#define KEY_CONTEXT_VALID_S     0
+#define KEY_CONTEXT_VALID_V(x)  ((x) << KEY_CONTEXT_VALID_S)
+#define KEY_CONTEXT_VALID_F     KEY_CONTEXT_VALID_V(1U)
+
+#define KEY_CONTEXT_CK_SIZE_S           6
+#define KEY_CONTEXT_CK_SIZE_V(x)        ((x) << KEY_CONTEXT_CK_SIZE_S)
+
+#define KEY_CONTEXT_MK_SIZE_S           2
+#define KEY_CONTEXT_MK_SIZE_V(x)        ((x) << KEY_CONTEXT_MK_SIZE_S)
+
+#define KEY_CONTEXT_OPAD_PRESENT_S      11
+#define KEY_CONTEXT_OPAD_PRESENT_V(x)   ((x) << KEY_CONTEXT_OPAD_PRESENT_S)
+#define KEY_CONTEXT_OPAD_PRESENT_F      KEY_CONTEXT_OPAD_PRESENT_V(1U)
+
+#define FILL_KEY_CTX_HDR(ck_size, mk_size, ctx_len) \
+		htonl(KEY_CONTEXT_MK_SIZE_V(mk_size) | \
+		      KEY_CONTEXT_CK_SIZE_V(ck_size) | \
+		      KEY_CONTEXT_VALID_F | \
+		      KEY_CONTEXT_SALT_PRESENT_F | \
+		      KEY_CONTEXT_CTX_LEN_V((ctx_len)))
+
+static inline void *chcr_copy_to_txd(const void *src, const struct sge_txq *q,
+				     void *pos, int length)
+{
+	int left = (void *)q->stat - pos;
+	u64 *p;
+
+	if (likely(length <= left)) {
+		memcpy(pos, src, length);
+		pos += length;
+	} else {
+		memcpy(pos, src, left);
+		memcpy(q->desc, src + left, length - left);
+		pos = (void *)q->desc + (length - left);
+	}
+	/* 0-pad to multiple of 16 */
+	p = PTR_ALIGN(pos, 8);
+	if ((uintptr_t)p & 8) {
+		*p = 0;
+		return p + 1;
+	}
+	return p;
+}
+
+static inline unsigned int chcr_txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static inline void chcr_txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+static inline void chcr_eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static inline unsigned int chcr_sgl_len(unsigned int n)
+{
+	n--;
+	return (3 * n) / 2 + (n & 1) + 2;
+}
+
+static inline unsigned int chcr_flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+#endif /* __CHCR_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
new file mode 100644
index 000000000000..da9973b711f4
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -0,0 +1,2277 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <linux/netdevice.h>
+#include <crypto/aes.h>
+#include "chcr_ktls.h"
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
+/* chcr_get_nfrags_to_send: get the remaining nfrags after start offset
+ * @skb: skb
+ * @start: start offset.
+ * @len: how much data to send after @start
+ */
+static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
+{
+	struct skb_shared_info *si = skb_shinfo(skb);
+	u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+	u8 nfrags = 0, frag_idx = 0;
+	skb_frag_t *frag;
+
+	/* if its a linear skb then return 1 */
+	if (!skb_is_nonlinear(skb))
+		return 1;
+
+	if (unlikely(start < skb_linear_data_len)) {
+		frag_size = min(len, skb_linear_data_len - start);
+	} else {
+		start -= skb_linear_data_len;
+
+		frag = &si->frags[frag_idx];
+		frag_size = skb_frag_size(frag);
+		while (start >= frag_size) {
+			start -= frag_size;
+			frag_idx++;
+			frag = &si->frags[frag_idx];
+			frag_size = skb_frag_size(frag);
+		}
+		frag_size = min(len, skb_frag_size(frag) - start);
+	}
+	len -= frag_size;
+	nfrags++;
+
+	while (len) {
+		frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+		len -= frag_size;
+		nfrags++;
+		frag_idx++;
+	}
+	return nfrags;
+}
+
+static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
+static void clear_conn_resources(struct chcr_ktls_info *tx_info);
+/*
+ * chcr_ktls_save_keys: calculate and save crypto keys.
+ * @tx_info - driver specific tls info.
+ * @crypto_info - tls crypto information.
+ * @direction - TX/RX direction.
+ * return - SUCCESS/FAILURE.
+ */
+static int chcr_ktls_save_keys(struct chcr_ktls_info *tx_info,
+			       struct tls_crypto_info *crypto_info,
+			       enum tls_offload_ctx_dir direction)
+{
+	int ck_size, key_ctx_size, mac_key_size, keylen, ghash_size, ret;
+	unsigned char ghash_h[TLS_CIPHER_AES_GCM_256_TAG_SIZE];
+	struct tls12_crypto_info_aes_gcm_128 *info_128_gcm;
+	struct ktls_key_ctx *kctx = &tx_info->key_ctx;
+	struct crypto_aes_ctx aes_ctx;
+	unsigned char *key, *salt;
+
+	switch (crypto_info->cipher_type) {
+	case TLS_CIPHER_AES_GCM_128:
+		info_128_gcm =
+			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		tx_info->salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		mac_key_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
+		tx_info->iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+		tx_info->iv = be64_to_cpu(*(__be64 *)info_128_gcm->iv);
+
+		ghash_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+		key = info_128_gcm->key;
+		salt = info_128_gcm->salt;
+		tx_info->record_no = *(u64 *)info_128_gcm->rec_seq;
+
+		/* The SCMD fields used when encrypting a full TLS
+		 * record. Its a one time calculation till the
+		 * connection exists.
+		 */
+		tx_info->scmd0_seqno_numivs =
+			SCMD_SEQ_NO_CTRL_V(CHCR_SCMD_SEQ_NO_CTRL_64BIT) |
+			SCMD_CIPH_AUTH_SEQ_CTRL_F |
+			SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_TLS) |
+			SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_GCM) |
+			SCMD_AUTH_MODE_V(CHCR_SCMD_AUTH_MODE_GHASH) |
+			SCMD_IV_SIZE_V(TLS_CIPHER_AES_GCM_128_IV_SIZE >> 1) |
+			SCMD_NUM_IVS_V(1);
+
+		/* keys will be sent inline. */
+		tx_info->scmd0_ivgen_hdrlen = SCMD_KEY_CTX_INLINE_F;
+
+		/* The SCMD fields used when encrypting a partial TLS
+		 * record (no trailer and possibly a truncated payload).
+		 */
+		tx_info->scmd0_short_seqno_numivs =
+			SCMD_CIPH_AUTH_SEQ_CTRL_F |
+			SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) |
+			SCMD_CIPH_MODE_V(CHCR_SCMD_CIPHER_MODE_AES_CTR) |
+			SCMD_IV_SIZE_V(AES_BLOCK_LEN >> 1);
+
+		tx_info->scmd0_short_ivgen_hdrlen =
+			tx_info->scmd0_ivgen_hdrlen | SCMD_AADIVDROP_F;
+
+		break;
+
+	default:
+		pr_err("GCM: cipher type 0x%x not supported\n",
+		       crypto_info->cipher_type);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	key_ctx_size = CHCR_KTLS_KEY_CTX_LEN +
+		       roundup(keylen, 16) + ghash_size;
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+
+	ret = aes_expandkey(&aes_ctx, key, keylen);
+	if (ret)
+		goto out;
+
+	memset(ghash_h, 0, ghash_size);
+	aes_encrypt(&aes_ctx, ghash_h, ghash_h);
+	memzero_explicit(&aes_ctx, sizeof(aes_ctx));
+
+	/* fill the Key context */
+	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 mac_key_size,
+						 key_ctx_size >> 4);
+	} else {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(kctx->salt, salt, tx_info->salt_size);
+	memcpy(kctx->key, key, keylen);
+	memcpy(kctx->key + keylen, ghash_h, ghash_size);
+	tx_info->key_ctx_len = key_ctx_size;
+
+out:
+	return ret;
+}
+
+/*
+ * chcr_ktls_act_open_req: creates TCB entry for ipv4 connection.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * @atid - connection active tid.
+ * return - send success/failure.
+ */
+static int chcr_ktls_act_open_req(struct sock *sk,
+				  struct chcr_ktls_info *tx_info,
+				  int atid)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct cpl_t6_act_open_req *cpl6;
+	struct cpl_act_open_req *cpl;
+	struct sk_buff *skb;
+	unsigned int len;
+	int qid_atid;
+	u64 options;
+
+	len = sizeof(*cpl6);
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+	/* mark it a control pkt */
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+
+	cpl6 = __skb_put_zero(skb, len);
+	cpl = (struct cpl_act_open_req *)cpl6;
+	INIT_TP_WR(cpl6, 0);
+	qid_atid = TID_QID_V(tx_info->rx_qid) |
+		   TID_TID_V(atid);
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid));
+	cpl->local_port = inet->inet_sport;
+	cpl->peer_port = inet->inet_dport;
+	cpl->local_ip = inet->inet_rcv_saddr;
+	cpl->peer_ip = inet->inet_daddr;
+
+	/* fill first 64 bit option field. */
+	options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F |
+		  SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan);
+	cpl->opt0 = cpu_to_be64(options);
+
+	/* next 64 bit option field. */
+	options =
+		TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]);
+	cpl->opt2 = htonl(options);
+
+	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/*
+ * chcr_ktls_act_open_req6: creates TCB entry for ipv6 connection.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * @atid - connection active tid.
+ * return - send success/failure.
+ */
+static int chcr_ktls_act_open_req6(struct sock *sk,
+				   struct chcr_ktls_info *tx_info,
+				   int atid)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct cpl_t6_act_open_req6 *cpl6;
+	struct cpl_act_open_req6 *cpl;
+	struct sk_buff *skb;
+	unsigned int len;
+	int qid_atid;
+	u64 options;
+
+	len = sizeof(*cpl6);
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+	/* mark it a control pkt */
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+
+	cpl6 = __skb_put_zero(skb, len);
+	cpl = (struct cpl_act_open_req6 *)cpl6;
+	INIT_TP_WR(cpl6, 0);
+	qid_atid = TID_QID_V(tx_info->rx_qid) | TID_TID_V(atid);
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid));
+	cpl->local_port = inet->inet_sport;
+	cpl->peer_port = inet->inet_dport;
+	cpl->local_ip_hi = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[0];
+	cpl->local_ip_lo = *(__be64 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8[8];
+	cpl->peer_ip_hi = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[0];
+	cpl->peer_ip_lo = *(__be64 *)&sk->sk_v6_daddr.in6_u.u6_addr8[8];
+
+	/* first 64 bit option field. */
+	options = TCAM_BYPASS_F | ULP_MODE_V(ULP_MODE_NONE) | NON_OFFLOAD_F |
+		  SMAC_SEL_V(tx_info->smt_idx) | TX_CHAN_V(tx_info->tx_chan);
+	cpl->opt0 = cpu_to_be64(options);
+	/* next 64 bit option field. */
+	options =
+		TX_QUEUE_V(tx_info->adap->params.tp.tx_modq[tx_info->tx_chan]);
+	cpl->opt2 = htonl(options);
+
+	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
+}
+#endif /* #if IS_ENABLED(CONFIG_IPV6) */
+
+/*
+ * chcr_setup_connection:  create a TCB entry so that TP will form tcp packets.
+ * @sk - tcp socket.
+ * @tx_info - driver specific tls info.
+ * return: NET_TX_OK/NET_XMIT_DROP
+ */
+static int chcr_setup_connection(struct sock *sk,
+				 struct chcr_ktls_info *tx_info)
+{
+	struct tid_info *t = &tx_info->adap->tids;
+	int atid, ret = 0;
+
+	atid = cxgb4_alloc_atid(t, tx_info);
+	if (atid == -1)
+		return -EINVAL;
+
+	tx_info->atid = atid;
+
+	if (tx_info->ip_family == AF_INET) {
+		ret = chcr_ktls_act_open_req(sk, tx_info, atid);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		ret = cxgb4_clip_get(tx_info->netdev, (const u32 *)
+				     &sk->sk_v6_rcv_saddr,
+				     1);
+		if (ret)
+			return ret;
+		ret = chcr_ktls_act_open_req6(sk, tx_info, atid);
+#endif
+	}
+
+	/* if return type is NET_XMIT_CN, msg will be sent but delayed, mark ret
+	 * success, if any other return type clear atid and return that failure.
+	 */
+	if (ret) {
+		if (ret == NET_XMIT_CN) {
+			ret = 0;
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			/* clear clip entry */
+			if (tx_info->ip_family == AF_INET6)
+				cxgb4_clip_release(tx_info->netdev,
+						   (const u32 *)
+						   &sk->sk_v6_rcv_saddr,
+						   1);
+#endif
+			cxgb4_free_atid(t, atid);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * chcr_set_tcb_field: update tcb fields.
+ * @tx_info - driver specific tls info.
+ * @word - TCB word.
+ * @mask - TCB word related mask.
+ * @val - TCB word related value.
+ * @no_reply - set 1 if not looking for TP response.
+ */
+static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word,
+			      u64 mask, u64 val, int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, tx_info->tid);
+	req->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) |
+				NO_REPLY_V(no_reply));
+	req->word_cookie = htons(TCB_WORD_V(word));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, tx_info->port_id);
+	return cxgb4_ofld_send(tx_info->netdev, skb);
+}
+
+/*
+ * chcr_ktls_dev_del:  call back for tls_dev_del.
+ * Remove the tid and l2t entry and close the connection.
+ * it per connection basis.
+ * @netdev - net device.
+ * @tls_cts - tls context.
+ * @direction - TX/RX crypto direction
+ */
+static void chcr_ktls_dev_del(struct net_device *netdev,
+			      struct tls_context *tls_ctx,
+			      enum tls_offload_ctx_dir direction)
+{
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx =
+				chcr_get_ktls_tx_context(tls_ctx);
+	struct chcr_ktls_info *tx_info = tx_ctx->chcr_info;
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_uld_ctx *u_ctx;
+
+	if (!tx_info)
+		return;
+
+	u_ctx = tx_info->adap->uld[CXGB4_ULD_KTLS].handle;
+	if (u_ctx && u_ctx->detach)
+		return;
+	/* clear l2t entry */
+	if (tx_info->l2te)
+		cxgb4_l2t_release(tx_info->l2te);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(netdev, (const u32 *)
+				   &tx_info->sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+
+	/* clear tid */
+	if (tx_info->tid != -1) {
+		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+				 tx_info->tid, tx_info->ip_family);
+
+		xa_erase(&u_ctx->tid_list, tx_info->tid);
+	}
+
+	port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+	atomic64_inc(&port_stats->ktls_tx_connection_close);
+	kvfree(tx_info);
+	tx_ctx->chcr_info = NULL;
+	/* release module refcount */
+	module_put(THIS_MODULE);
+}
+
+/*
+ * chcr_ktls_dev_add:  call back for tls_dev_add.
+ * Create a tcb entry for TP. Also add l2t entry for the connection. And
+ * generate keys & save those keys locally.
+ * @netdev - net device.
+ * @tls_cts - tls context.
+ * @direction - TX/RX crypto direction
+ * return: SUCCESS/FAILURE.
+ */
+static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+			     enum tls_offload_ctx_dir direction,
+			     struct tls_crypto_info *crypto_info,
+			     u32 start_offload_tcp_sn)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
+	struct chcr_ktls_info *tx_info;
+	struct dst_entry *dst;
+	struct adapter *adap;
+	struct port_info *pi;
+	struct neighbour *n;
+	u8 daaddr[16];
+	int ret = -1;
+
+	tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+
+	pi = netdev_priv(netdev);
+	adap = pi->adapter;
+	port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id];
+	atomic64_inc(&port_stats->ktls_tx_connection_open);
+	u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+
+	if (direction == TLS_OFFLOAD_CTX_DIR_RX) {
+		pr_err("not expecting for RX direction\n");
+		goto out;
+	}
+
+	if (tx_ctx->chcr_info)
+		goto out;
+
+	if (u_ctx && u_ctx->detach)
+		goto out;
+
+	tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL);
+	if (!tx_info)
+		goto out;
+
+	tx_info->sk = sk;
+	spin_lock_init(&tx_info->lock);
+	/* initialize tid and atid to -1, 0 is a also a valid id. */
+	tx_info->tid = -1;
+	tx_info->atid = -1;
+
+	tx_info->adap = adap;
+	tx_info->netdev = netdev;
+	tx_info->first_qset = pi->first_qset;
+	tx_info->tx_chan = pi->tx_chan;
+	tx_info->smt_idx = pi->smt_idx;
+	tx_info->port_id = pi->port_id;
+	tx_info->prev_ack = 0;
+	tx_info->prev_win = 0;
+
+	tx_info->rx_qid = chcr_get_first_rx_qid(adap);
+	if (unlikely(tx_info->rx_qid < 0))
+		goto free_tx_info;
+
+	tx_info->prev_seq = start_offload_tcp_sn;
+	tx_info->tcp_start_seq_number = start_offload_tcp_sn;
+
+	/* save crypto keys */
+	ret = chcr_ktls_save_keys(tx_info, crypto_info, direction);
+	if (ret < 0)
+		goto free_tx_info;
+
+	/* get peer ip */
+	if (sk->sk_family == AF_INET) {
+		memcpy(daaddr, &sk->sk_daddr, 4);
+		tx_info->ip_family = AF_INET;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (!ipv6_only_sock(sk) &&
+		    ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+			memcpy(daaddr, &sk->sk_daddr, 4);
+			tx_info->ip_family = AF_INET;
+		} else {
+			memcpy(daaddr, sk->sk_v6_daddr.in6_u.u6_addr8, 16);
+			tx_info->ip_family = AF_INET6;
+		}
+#endif
+	}
+
+	/* get the l2t index */
+	dst = sk_dst_get(sk);
+	if (!dst) {
+		pr_err("DST entry not found\n");
+		goto free_tx_info;
+	}
+	n = dst_neigh_lookup(dst, daaddr);
+	if (!n || !n->dev) {
+		pr_err("neighbour not found\n");
+		dst_release(dst);
+		goto free_tx_info;
+	}
+	tx_info->l2te  = cxgb4_l2t_get(adap->l2t, n, n->dev, 0);
+
+	neigh_release(n);
+	dst_release(dst);
+
+	if (!tx_info->l2te) {
+		pr_err("l2t entry not found\n");
+		goto free_tx_info;
+	}
+
+	/* Driver shouldn't be removed until any single connection exists */
+	if (!try_module_get(THIS_MODULE))
+		goto free_l2t;
+
+	init_completion(&tx_info->completion);
+	/* create a filter and call cxgb4_l2t_send to send the packet out, which
+	 * will take care of updating l2t entry in hw if not already done.
+	 */
+	tx_info->open_state = CH_KTLS_OPEN_PENDING;
+
+	if (chcr_setup_connection(sk, tx_info))
+		goto put_module;
+
+	/* Wait for reply */
+	wait_for_completion_timeout(&tx_info->completion, 30 * HZ);
+	spin_lock_bh(&tx_info->lock);
+	if (tx_info->open_state) {
+		/* need to wait for hw response, can't free tx_info yet. */
+		if (tx_info->open_state == CH_KTLS_OPEN_PENDING)
+			tx_info->pending_close = true;
+		else
+			spin_unlock_bh(&tx_info->lock);
+		/* if in pending close, free the lock after the cleanup */
+		goto put_module;
+	}
+	spin_unlock_bh(&tx_info->lock);
+
+	/* initialize tcb */
+	reinit_completion(&tx_info->completion);
+	/* mark it pending for hw response */
+	tx_info->open_state = CH_KTLS_OPEN_PENDING;
+
+	if (chcr_init_tcb_fields(tx_info))
+		goto free_tid;
+
+	/* Wait for reply */
+	wait_for_completion_timeout(&tx_info->completion, 30 * HZ);
+	spin_lock_bh(&tx_info->lock);
+	if (tx_info->open_state) {
+		/* need to wait for hw response, can't free tx_info yet. */
+		tx_info->pending_close = true;
+		/* free the lock after cleanup */
+		goto free_tid;
+	}
+	spin_unlock_bh(&tx_info->lock);
+
+	if (!cxgb4_check_l2t_valid(tx_info->l2te))
+		goto free_tid;
+
+	atomic64_inc(&port_stats->ktls_tx_ctx);
+	tx_ctx->chcr_info = tx_info;
+
+	return 0;
+
+free_tid:
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(netdev, (const u32 *)
+				   &sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+	cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+			 tx_info->tid, tx_info->ip_family);
+
+	xa_erase(&u_ctx->tid_list, tx_info->tid);
+
+put_module:
+	/* release module refcount */
+	module_put(THIS_MODULE);
+free_l2t:
+	cxgb4_l2t_release(tx_info->l2te);
+free_tx_info:
+	if (tx_info->pending_close)
+		spin_unlock_bh(&tx_info->lock);
+	else
+		kvfree(tx_info);
+out:
+	atomic64_inc(&port_stats->ktls_tx_connection_fail);
+	return -1;
+}
+
+/*
+ * chcr_init_tcb_fields:  Initialize tcb fields to handle TCP seq number
+ *			  handling.
+ * @tx_info - driver specific tls info.
+ * return: NET_TX_OK/NET_XMIT_DROP
+ */
+static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info)
+{
+	int  ret = 0;
+
+	/* set tcb in offload and bypass */
+	ret =
+	chcr_set_tcb_field(tx_info, TCB_T_FLAGS_W,
+			   TCB_T_FLAGS_V(TF_CORE_BYPASS_F | TF_NON_OFFLOAD_F),
+			   TCB_T_FLAGS_V(TF_CORE_BYPASS_F), 1);
+	if (ret)
+		return ret;
+	/* reset snd_una and snd_next fields in tcb */
+	ret = chcr_set_tcb_field(tx_info, TCB_SND_UNA_RAW_W,
+				 TCB_SND_NXT_RAW_V(TCB_SND_NXT_RAW_M) |
+				 TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M),
+				 0, 1);
+	if (ret)
+		return ret;
+
+	/* reset send max */
+	ret = chcr_set_tcb_field(tx_info, TCB_SND_MAX_RAW_W,
+				 TCB_SND_MAX_RAW_V(TCB_SND_MAX_RAW_M),
+				 0, 1);
+	if (ret)
+		return ret;
+
+	/* update l2t index and request for tp reply to confirm tcb is
+	 * initialised to handle tx traffic.
+	 */
+	ret = chcr_set_tcb_field(tx_info, TCB_L2T_IX_W,
+				 TCB_L2T_IX_V(TCB_L2T_IX_M),
+				 TCB_L2T_IX_V(tx_info->l2te->idx), 0);
+	return ret;
+}
+
+/*
+ * chcr_ktls_cpl_act_open_rpl: connection reply received from TP.
+ */
+static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
+				      unsigned char *input)
+{
+	const struct cpl_act_open_rpl *p = (void *)input;
+	struct chcr_ktls_info *tx_info = NULL;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
+	unsigned int atid, tid, status;
+	struct tls_context *tls_ctx;
+	struct tid_info *t;
+	int ret = 0;
+
+	tid = GET_TID(p);
+	status = AOPEN_STATUS_G(ntohl(p->atid_status));
+	atid = TID_TID_G(AOPEN_ATID_G(ntohl(p->atid_status)));
+
+	t = &adap->tids;
+	tx_info = lookup_atid(t, atid);
+
+	if (!tx_info || tx_info->atid != atid) {
+		pr_err("%s: incorrect tx_info or atid\n", __func__);
+		return -1;
+	}
+
+	cxgb4_free_atid(t, atid);
+	tx_info->atid = -1;
+
+	spin_lock(&tx_info->lock);
+	/* HW response is very close, finish pending cleanup */
+	if (tx_info->pending_close) {
+		spin_unlock(&tx_info->lock);
+		if (!status) {
+			cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+					 tid, tx_info->ip_family);
+		}
+		kvfree(tx_info);
+		return 0;
+	}
+
+	if (!status) {
+		tx_info->tid = tid;
+		cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family);
+		/* Adding tid */
+		tls_ctx = tls_get_ctx(tx_info->sk);
+		tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+		u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+		if (u_ctx) {
+			ret = xa_insert_bh(&u_ctx->tid_list, tid, tx_ctx,
+					   GFP_NOWAIT);
+			if (ret < 0) {
+				pr_err("%s: Failed to allocate tid XA entry = %d\n",
+				       __func__, tx_info->tid);
+				tx_info->open_state = CH_KTLS_OPEN_FAILURE;
+				goto out;
+			}
+		}
+		tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
+	} else {
+		tx_info->open_state = CH_KTLS_OPEN_FAILURE;
+	}
+out:
+	spin_unlock(&tx_info->lock);
+
+	complete(&tx_info->completion);
+	return ret;
+}
+
+/*
+ * chcr_ktls_cpl_set_tcb_rpl: TCB reply received from TP.
+ */
+static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
+{
+	const struct cpl_set_tcb_rpl *p = (void *)input;
+	struct chcr_ktls_info *tx_info = NULL;
+	struct tid_info *t;
+	u32 tid;
+
+	tid = GET_TID(p);
+
+	t = &adap->tids;
+	tx_info = lookup_tid(t, tid);
+
+	if (!tx_info || tx_info->tid != tid) {
+		pr_err("%s: incorrect tx_info or tid\n", __func__);
+		return -1;
+	}
+
+	spin_lock(&tx_info->lock);
+	if (tx_info->pending_close) {
+		spin_unlock(&tx_info->lock);
+		kvfree(tx_info);
+		return 0;
+	}
+	tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
+	spin_unlock(&tx_info->lock);
+
+	complete(&tx_info->completion);
+	return 0;
+}
+
+static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+					u32 tid, void *pos, u16 word,
+					struct sge_eth_txq *q, u64 mask,
+					u64 val, u32 reply)
+{
+	struct cpl_set_tcb_field_core *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *txpkt;
+
+	/* ULP_TXPKT */
+	txpkt = pos;
+	txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+				ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+				ULP_TXPKT_FID_V(q->q.cntxt_id) |
+				ULP_TXPKT_RO_F);
+	txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16));
+
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(txpkt + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	idata->len = htonl(sizeof(*cpl));
+	pos = idata + 1;
+
+	cpl = pos;
+	/* CPL_SET_TCB_FIELD */
+	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
+	cpl->reply_ctrl = htons(QUEUENO_V(tx_info->rx_qid) |
+			NO_REPLY_V(!reply));
+	cpl->word_cookie = htons(TCB_WORD_V(word));
+	cpl->mask = cpu_to_be64(mask);
+	cpl->val = cpu_to_be64(val);
+
+	/* ULPTX_NOOP */
+	idata = (struct ulptx_idata *)(cpl + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	idata->len = htonl(0);
+	pos = idata + 1;
+
+	return pos;
+}
+
+
+/*
+ * chcr_write_cpl_set_tcb_ulp: update tcb values.
+ * TCB is responsible to create tcp headers, so all the related values
+ * should be correctly updated.
+ * @tx_info - driver specific tls info.
+ * @q - tx queue on which packet is going out.
+ * @tid - TCB identifier.
+ * @pos - current index where should we start writing.
+ * @word - TCB word.
+ * @mask - TCB word related mask.
+ * @val - TCB word related value.
+ * @reply - set 1 if looking for TP response.
+ * return - next position to write.
+ */
+static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+					struct sge_eth_txq *q, u32 tid,
+					void *pos, u16 word, u64 mask,
+					u64 val, u32 reply)
+{
+	int left = (void *)q->q.stat - pos;
+
+	if (unlikely(left < CHCR_SET_TCB_FIELD_LEN)) {
+		if (!left) {
+			pos = q->q.desc;
+		} else {
+			u8 buf[48] = {0};
+
+			__chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, q,
+						     mask, val, reply);
+
+			return chcr_copy_to_txd(buf, &q->q, pos,
+						CHCR_SET_TCB_FIELD_LEN);
+		}
+	}
+
+	pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, q,
+					   mask, val, reply);
+
+	/* check again if we are at the end of the queue */
+	if (left == CHCR_SET_TCB_FIELD_LEN)
+		pos = q->q.desc;
+
+	return pos;
+}
+
+/*
+ * chcr_ktls_xmit_tcb_cpls: update tcb entry so that TP will create the header
+ * with updated values like tcp seq, ack, window etc.
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_ack
+ * @tcp_win
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
+				   struct sge_eth_txq *q, u64 tcp_seq,
+				   u64 tcp_ack, u64 tcp_win, bool offset)
+{
+	bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0));
+	struct ch_ktls_port_stats_debug *port_stats;
+	u32 len, cpl = 0, ndesc, wr_len, wr_mid = 0;
+	struct fw_ulptx_wr *wr;
+	int credits;
+	void *pos;
+
+	wr_len = sizeof(*wr);
+	/* there can be max 4 cpls, check if we have enough credits */
+	len = wr_len + 4 * roundup(CHCR_SET_TCB_FIELD_LEN, 16);
+	ndesc = DIV_ROUND_UP(len, 64);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	/* make space for WR, we'll fill it later when we know all the cpls
+	 * being sent out and have complete length.
+	 */
+	wr = pos;
+	pos += wr_len;
+	/* update tx_max if its a re-transmit or the first wr */
+	if (first_wr || tcp_seq != tx_info->prev_seq) {
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_TX_MAX_W,
+						 TCB_TX_MAX_V(TCB_TX_MAX_M),
+						 TCB_TX_MAX_V(tcp_seq), 0);
+		cpl++;
+	}
+	/* reset snd una if it's a re-transmit pkt */
+	if (tcp_seq != tx_info->prev_seq || offset) {
+		/* reset snd_una */
+		port_stats =
+			&tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_SND_UNA_RAW_W,
+						 TCB_SND_UNA_RAW_V
+						 (TCB_SND_UNA_RAW_M),
+						 TCB_SND_UNA_RAW_V(0), 0);
+		if (tcp_seq != tx_info->prev_seq)
+			atomic64_inc(&port_stats->ktls_tx_ooo);
+		cpl++;
+	}
+	/* update ack */
+	if (first_wr || tx_info->prev_ack != tcp_ack) {
+		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+						 TCB_RCV_NXT_W,
+						 TCB_RCV_NXT_V(TCB_RCV_NXT_M),
+						 TCB_RCV_NXT_V(tcp_ack), 0);
+		tx_info->prev_ack = tcp_ack;
+		cpl++;
+	}
+	/* update receive window */
+	if (first_wr || tx_info->prev_win != tcp_win) {
+		chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+					   TCB_RCV_WND_W,
+					   TCB_RCV_WND_V(TCB_RCV_WND_M),
+					   TCB_RCV_WND_V(tcp_win), 0);
+		tx_info->prev_win = tcp_win;
+		cpl++;
+	}
+
+	if (cpl) {
+		/* get the actual length */
+		len = wr_len + cpl * roundup(CHCR_SET_TCB_FIELD_LEN, 16);
+		/* ULPTX wr */
+		wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+		wr->cookie = 0;
+		/* fill len in wr field */
+		wr->flowid_len16 = htonl(wr_mid |
+					 FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
+
+		ndesc = DIV_ROUND_UP(len, 64);
+		chcr_txq_advance(&q->q, ndesc);
+		cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	}
+	return 0;
+}
+
+/*
+ * chcr_ktls_get_tx_flits
+ * returns number of flits to be sent out, it includes key context length, WR
+ * size and skb fragments.
+ */
+static unsigned int
+chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len)
+{
+	return chcr_sgl_len(nr_frags) +
+	       DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8);
+}
+
+/*
+ * chcr_ktls_check_tcp_options: To check if there is any TCP option available
+ * other than timestamp.
+ * @skb - skb contains partial record..
+ * return: 1 / 0
+ */
+static int
+chcr_ktls_check_tcp_options(struct tcphdr *tcp)
+{
+	int cnt, opt, optlen;
+	u_char *cp;
+
+	cp = (u_char *)(tcp + 1);
+	cnt = (tcp->doff << 2) - sizeof(struct tcphdr);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP) {
+			optlen = 1;
+		} else {
+			if (cnt < 2)
+				break;
+			optlen = cp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+		case TCPOPT_NOP:
+			break;
+		default:
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * chcr_ktls_write_tcp_options : TP can't send out all the options, we need to
+ * send out separately.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @q - TX queue.
+ * @tx_chan - channel number.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int
+chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
+			    struct sge_eth_txq *q, uint32_t tx_chan)
+{
+	struct fw_eth_tx_pkt_wr *wr;
+	struct cpl_tx_pkt_core *cpl;
+	u32 ctrl, iplen, maclen;
+	struct ipv6hdr *ip6;
+	unsigned int ndesc;
+	struct tcphdr *tcp;
+	int len16, pktlen;
+	struct iphdr *ip;
+	u32 wr_mid = 0;
+	int credits;
+	u8 buf[150];
+	u64 cntrl1;
+	void *pos;
+
+	iplen = skb_network_header_len(skb);
+	maclen = skb_mac_header_len(skb);
+
+	/* packet length = eth hdr len + ip hdr len + tcp hdr len
+	 * (including options).
+	 */
+	pktlen = skb_tcp_all_headers(skb);
+
+	ctrl = sizeof(*cpl) + pktlen;
+	len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16);
+	/* check how many descriptors needed */
+	ndesc = DIV_ROUND_UP(len16, 4);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	wr = pos;
+
+	/* Firmware work request header */
+	wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+			       FW_WR_IMMDLEN_V(ctrl));
+
+	wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->r3 = 0;
+
+	cpl = (void *)(wr + 1);
+
+	/* CPL header */
+	cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) | TXPKT_INTF_V(tx_chan) |
+			   TXPKT_PF_V(tx_info->adap->pf));
+	cpl->pack = 0;
+	cpl->len = htons(pktlen);
+
+	memcpy(buf, skb->data, pktlen);
+	if (!IS_ENABLED(CONFIG_IPV6) || tx_info->ip_family == AF_INET) {
+		/* we need to correct ip header len */
+		ip = (struct iphdr *)(buf + maclen);
+		ip->tot_len = htons(pktlen - maclen);
+		cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP);
+	} else {
+		ip6 = (struct ipv6hdr *)(buf + maclen);
+		ip6->payload_len = htons(pktlen - maclen - iplen);
+		cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6);
+	}
+
+	cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+		  TXPKT_IPHDR_LEN_V(iplen);
+	/* checksum offload */
+	cpl->ctrl1 = cpu_to_be64(cntrl1);
+
+	pos = cpl + 1;
+
+	/* now take care of the tcp header, if fin is not set then clear push
+	 * bit as well, and if fin is set, it will be sent at the last so we
+	 * need to update the tcp sequence number as per the last packet.
+	 */
+	tcp = (struct tcphdr *)(buf + maclen + iplen);
+
+	if (!tcp->fin)
+		tcp->psh = 0;
+	else
+		tcp->seq = htonl(tx_info->prev_seq);
+
+	chcr_copy_to_txd(buf, &q->q, pos, pktlen);
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	return 0;
+}
+
+/*
+ * chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb
+ * received has partial end part of the record, send out the complete record, so
+ * that crypto block will be able to generate TAG/HASH.
+ * @skb - segment which has complete or partial end part.
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_push - tcp push bit.
+ * @mss - segment size.
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
+				      struct chcr_ktls_info *tx_info,
+				      struct sge_eth_txq *q, u32 tcp_seq,
+				      bool is_last_wr, u32 data_len,
+				      u32 skb_offset, u32 nfrags,
+				      bool tcp_push, u32 mss)
+{
+	u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start;
+	struct adapter *adap = tx_info->adap;
+	int credits, left, last_desc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct cpl_tx_sec_pdu *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	void *pos;
+	u64 *end;
+
+	/* get the number of flits required */
+	flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len);
+	/* number of descriptors */
+	ndesc = chcr_flits_to_desc(flits);
+	/* check if enough credits available */
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		/* Credits are below the threshold values, stop the queue after
+		 * injecting the Work Request for this packet.
+		 */
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	if (!is_last_wr)
+		skb_get(skb);
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	pos += sizeof(*wr);
+	/* ULP_TXPKT */
+	ulptx = pos;
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+				ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+				ULP_TXPKT_FID_V(q->q.cntxt_id) |
+				ULP_TXPKT_RO_F);
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	/* idata length will include cpl_tx_sec_pdu + key context size +
+	 * cpl_tx_data header.
+	 */
+	idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len +
+			   sizeof(*tx_data));
+	/* SEC CPL */
+	cpl = (struct cpl_tx_sec_pdu *)(idata + 1);
+	cpl->op_ivinsrtofst =
+		htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+		      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
+		      CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+		      CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1));
+	cpl->pldlen = htonl(data_len);
+
+	/* encryption should start after tls header size + iv size */
+	cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1;
+
+	cpl->aadstart_cipherstop_hi =
+		htonl(CPL_TX_SEC_PDU_AADSTART_V(1) |
+		      CPL_TX_SEC_PDU_AADSTOP_V(TLS_HEADER_SIZE) |
+		      CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
+
+	/* authentication will also start after tls header + iv size */
+	cpl->cipherstop_lo_authinsert =
+	htonl(CPL_TX_SEC_PDU_AUTHSTART_V(cipher_start) |
+	      CPL_TX_SEC_PDU_AUTHSTOP_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE) |
+	      CPL_TX_SEC_PDU_AUTHINSERT_V(TLS_CIPHER_AES_GCM_128_TAG_SIZE));
+
+	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
+	cpl->seqno_numivs = htonl(tx_info->scmd0_seqno_numivs);
+	cpl->ivgen_hdrlen = htonl(tx_info->scmd0_ivgen_hdrlen);
+	cpl->scmd1 = cpu_to_be64(tx_info->record_no);
+
+	pos = cpl + 1;
+	/* check if space left to fill the keys */
+	left = (void *)q->q.stat - pos;
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos,
+			       tx_info->key_ctx_len);
+	left = (void *)q->q.stat - pos;
+
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* CPL_TX_DATA */
+	tx_data = (void *)pos;
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(data_len));
+
+	tx_data->rsvd = htonl(tcp_seq);
+
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	/* check left again, it might go beyond queue limit */
+	pos = tx_data + 1;
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	/* send the complete packet except the header */
+	cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+				skb_offset, data_len);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	atomic64_inc(&adap->ch_ktls_stats.ktls_tx_send_records);
+
+	return 0;
+}
+
+/*
+ * chcr_ktls_xmit_wr_short: This is to send out partial records. If its
+ * a middle part of a record, fetch the prior data to make it 16 byte aligned
+ * and then only send it out.
+ *
+ * @skb - skb contains partial record..
+ * @tx_info - driver specific tls info.
+ * @q - TX queue.
+ * @tcp_seq
+ * @tcp_push - tcp push bit.
+ * @mss - segment size.
+ * @tls_rec_offset - offset from start of the tls record.
+ * @perior_data - data before the current segment, required to make this record
+ *		  16 byte aligned.
+ * @prior_data_len - prior_data length (less than 16)
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
+				   struct chcr_ktls_info *tx_info,
+				   struct sge_eth_txq *q,
+				   u32 tcp_seq, bool tcp_push, u32 mss,
+				   u32 tls_rec_offset, u8 *prior_data,
+				   u32 prior_data_len, u32 data_len,
+				   u32 skb_offset)
+{
+	u32 len16, wr_mid = 0, cipher_start, nfrags;
+	struct adapter *adap = tx_info->adap;
+	unsigned int flits = 0, ndesc;
+	int credits, left, last_desc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct cpl_tx_sec_pdu *cpl;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	__be64 iv_record;
+	void *pos;
+	u64 *end;
+
+	nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
+	/* get the number of flits required, it's a partial record so 2 flits
+	 * (AES_BLOCK_SIZE) will be added.
+	 */
+	flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len) + 2;
+	/* get the correct 8 byte IV of this record */
+	iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no);
+	/* If it's a middle record and not 16 byte aligned to run AES CTR, need
+	 * to make it 16 byte aligned. So atleadt 2 extra flits of immediate
+	 * data will be added.
+	 */
+	if (prior_data_len)
+		flits += 2;
+	/* number of descriptors */
+	ndesc = chcr_flits_to_desc(flits);
+	/* check if enough credits available */
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	pos += sizeof(*wr);
+	/* ULP_TXPKT */
+	ulptx = pos;
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+				ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+				ULP_TXPKT_FID_V(q->q.cntxt_id) |
+				ULP_TXPKT_RO_F);
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	/* idata length will include cpl_tx_sec_pdu + key context size +
+	 * cpl_tx_data header.
+	 */
+	idata->len = htonl(sizeof(*cpl) + tx_info->key_ctx_len +
+			   sizeof(*tx_data) + AES_BLOCK_LEN + prior_data_len);
+	/* SEC CPL */
+	cpl = (struct cpl_tx_sec_pdu *)(idata + 1);
+	/* cipher start will have tls header + iv size extra if its a header
+	 * part of tls record. else only 16 byte IV will be added.
+	 */
+	cipher_start =
+		AES_BLOCK_LEN + 1 +
+		(!tls_rec_offset ? TLS_HEADER_SIZE + tx_info->iv_size : 0);
+
+	cpl->op_ivinsrtofst =
+		htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+		      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
+		      CPL_TX_SEC_PDU_IVINSRTOFST_V(1));
+	cpl->pldlen = htonl(data_len + AES_BLOCK_LEN + prior_data_len);
+	cpl->aadstart_cipherstop_hi =
+		htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
+	cpl->cipherstop_lo_authinsert = 0;
+	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
+	cpl->seqno_numivs = htonl(tx_info->scmd0_short_seqno_numivs);
+	cpl->ivgen_hdrlen = htonl(tx_info->scmd0_short_ivgen_hdrlen);
+	cpl->scmd1 = 0;
+
+	pos = cpl + 1;
+	/* check if space left to fill the keys */
+	left = (void *)q->q.stat - pos;
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+
+	pos = chcr_copy_to_txd(&tx_info->key_ctx, &q->q, pos,
+			       tx_info->key_ctx_len);
+	left = (void *)q->q.stat - pos;
+
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* CPL_TX_DATA */
+	tx_data = (void *)pos;
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) |
+			     TX_LENGTH_V(data_len + prior_data_len));
+	tx_data->rsvd = htonl(tcp_seq);
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	/* check left again, it might go beyond queue limit */
+	pos = tx_data + 1;
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* copy the 16 byte IV for AES-CTR, which includes 4 bytes of salt, 8
+	 * bytes of actual IV and 4 bytes of 16 byte-sequence.
+	 */
+	memcpy(pos, tx_info->key_ctx.salt, tx_info->salt_size);
+	memcpy(pos + tx_info->salt_size, &iv_record, tx_info->iv_size);
+	*(__be32 *)(pos + tx_info->salt_size + tx_info->iv_size) =
+		htonl(2 + (tls_rec_offset ? ((tls_rec_offset -
+		(TLS_HEADER_SIZE + tx_info->iv_size)) / AES_BLOCK_LEN) : 0));
+
+	pos += 16;
+	/* Prior_data_len will always be less than 16 bytes, fill the
+	 * prio_data_len after AES_CTRL_BLOCK and clear the remaining length
+	 * to 0.
+	 */
+	if (prior_data_len)
+		pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
+	/* send the complete packet except the header */
+	cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+				skb_offset, data_len);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+
+	return 0;
+}
+
+/*
+ * chcr_ktls_tx_plaintxt: This handler will take care of the records which has
+ * only plain text (only tls header and iv)
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @tcp_seq
+ * @mss - segment size.
+ * @tcp_push - tcp push bit.
+ * @q - TX queue.
+ * @port_id : port number
+ * @perior_data - data before the current segment, required to make this record
+ *		 16 byte aligned.
+ * @prior_data_len - prior_data length (less than 16)
+ * return: NETDEV_TX_BUSY/NET_TX_OK.
+ */
+static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
+				 struct sk_buff *skb, u32 tcp_seq, u32 mss,
+				 bool tcp_push, struct sge_eth_txq *q,
+				 u32 port_id, u8 *prior_data,
+				 u32 data_len, u32 skb_offset,
+				 u32 prior_data_len)
+{
+	int credits, left, len16, last_desc;
+	unsigned int flits = 0, ndesc;
+	struct tx_sw_desc *sgl_sdesc;
+	struct cpl_tx_data *tx_data;
+	struct ulptx_idata *idata;
+	struct ulp_txpkt *ulptx;
+	struct fw_ulptx_wr *wr;
+	u32 wr_mid = 0, nfrags;
+	void *pos;
+	u64 *end;
+
+	flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8);
+	nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
+	flits += chcr_sgl_len(nfrags);
+	if (prior_data_len)
+		flits += 2;
+
+	/* WR will need len16 */
+	len16 = DIV_ROUND_UP(flits, 2);
+	/* check how many descriptors needed */
+	ndesc = DIV_ROUND_UP(flits, 8);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb,
+				   sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return NETDEV_TX_BUSY;
+	}
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	/* FW_ULPTX_WR */
+	wr = pos;
+	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->cookie = 0;
+	/* ULP_TXPKT */
+	ulptx = (struct ulp_txpkt *)(wr + 1);
+	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+			ULP_TXPKT_DATAMODIFY_V(0) |
+			ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+			ULP_TXPKT_DEST_V(0) |
+			ULP_TXPKT_FID_V(q->q.cntxt_id) | ULP_TXPKT_RO_V(1));
+	ulptx->len = htonl(len16 - 1);
+	/* ULPTX_IDATA sub-command */
+	idata = (struct ulptx_idata *)(ulptx + 1);
+	idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) | ULP_TX_SC_MORE_F);
+	idata->len = htonl(sizeof(*tx_data) + prior_data_len);
+	/* CPL_TX_DATA */
+	tx_data = (struct cpl_tx_data *)(idata + 1);
+	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
+	tx_data->len = htonl(TX_DATA_MSS_V(mss) |
+			     TX_LENGTH_V(data_len + prior_data_len));
+	/* set tcp seq number */
+	tx_data->rsvd = htonl(tcp_seq);
+	tx_data->flags = htonl(TX_BYPASS_F);
+	if (tcp_push)
+		tx_data->flags |= htonl(TX_PUSH_F | TX_SHOVE_F);
+
+	pos = tx_data + 1;
+	/* apart from prior_data_len, we should set remaining part of 16 bytes
+	 * to be zero.
+	 */
+	if (prior_data_len)
+		pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
+
+	/* check left again, it might go beyond queue limit */
+	left = (void *)q->q.stat - pos;
+
+	/* check the position again */
+	if (!left) {
+		left = (void *)end - (void *)q->q.stat;
+		pos = q->q.desc;
+		end = pos + left;
+	}
+	/* send the complete packet including the header */
+	cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+				skb_offset, data_len);
+	sgl_sdesc->skb = skb;
+
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	return 0;
+}
+
+static int chcr_ktls_tunnel_pkt(struct chcr_ktls_info *tx_info,
+				struct sk_buff *skb,
+				struct sge_eth_txq *q)
+{
+	u32 ctrl, iplen, maclen, wr_mid = 0, len16;
+	struct tx_sw_desc *sgl_sdesc;
+	struct fw_eth_tx_pkt_wr *wr;
+	struct cpl_tx_pkt_core *cpl;
+	unsigned int flits, ndesc;
+	int credits, last_desc;
+	u64 cntrl1, *end;
+	void *pos;
+
+	ctrl = sizeof(*cpl);
+	flits = DIV_ROUND_UP(sizeof(*wr) + ctrl, 8);
+
+	flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags + 1);
+	len16 = DIV_ROUND_UP(flits, 2);
+	/* check how many descriptors needed */
+	ndesc = DIV_ROUND_UP(flits, 8);
+
+	credits = chcr_txq_avail(&q->q) - ndesc;
+	if (unlikely(credits < 0)) {
+		chcr_eth_txq_stop(q);
+		return -ENOMEM;
+	}
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		chcr_eth_txq_stop(q);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
+	if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb,
+				   sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+		q->mapping_err++;
+		return -ENOMEM;
+	}
+
+	iplen = skb_network_header_len(skb);
+	maclen = skb_mac_header_len(skb);
+
+	pos = &q->q.desc[q->q.pidx];
+	end = (u64 *)pos + flits;
+	wr = pos;
+
+	/* Firmware work request header */
+	wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+			       FW_WR_IMMDLEN_V(ctrl));
+
+	wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+	wr->r3 = 0;
+
+	cpl = (void *)(wr + 1);
+
+	/* CPL header */
+	cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) |
+			   TXPKT_INTF_V(tx_info->tx_chan) |
+			   TXPKT_PF_V(tx_info->adap->pf));
+	cpl->pack = 0;
+	cntrl1 = TXPKT_CSUM_TYPE_V(tx_info->ip_family == AF_INET ?
+				   TX_CSUM_TCPIP : TX_CSUM_TCPIP6);
+	cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+		  TXPKT_IPHDR_LEN_V(iplen);
+	/* checksum offload */
+	cpl->ctrl1 = cpu_to_be64(cntrl1);
+	cpl->len = htons(skb->len);
+
+	pos = cpl + 1;
+
+	cxgb4_write_sgl(skb, &q->q, pos, end, 0, sgl_sdesc->addr);
+	sgl_sdesc->skb = skb;
+	chcr_txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+	return 0;
+}
+
+/*
+ * chcr_ktls_copy_record_in_skb
+ * @nskb - new skb where the frags to be added.
+ * @skb - old skb, to copy socket and destructor details.
+ * @record - specific record which has complete 16k record in frags.
+ */
+static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
+					 struct sk_buff *skb,
+					 struct tls_record_info *record)
+{
+	int i = 0;
+
+	for (i = 0; i < record->num_frags; i++) {
+		skb_shinfo(nskb)->frags[i] = record->frags[i];
+		/* increase the frag ref count */
+		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+	}
+
+	skb_shinfo(nskb)->nr_frags = record->num_frags;
+	nskb->data_len = record->len;
+	nskb->len += record->len;
+	nskb->truesize += record->len;
+	nskb->sk = skb->sk;
+	nskb->destructor = skb->destructor;
+	refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc);
+}
+
+/*
+ * chcr_end_part_handler: This handler will handle the record which
+ * is complete or if record's end part is received. T6 adapter has a issue that
+ * it can't send out TAG with partial record so if its an end part then we have
+ * to send TAG as well and for which we need to fetch the complete record and
+ * send it to crypto module.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record.
+ * @record - complete record of 16K size.
+ * @tcp_seq
+ * @mss - segment size in which TP needs to chop a packet.
+ * @tcp_push_no_fin - tcp push if fin is not set.
+ * @q - TX queue.
+ * @tls_end_offset - offset from end of the record.
+ * @last wr : check if this is the last part of the skb going out.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
+				 struct sk_buff *skb,
+				 struct tls_record_info *record,
+				 u32 tcp_seq, int mss, bool tcp_push_no_fin,
+				 struct sge_eth_txq *q, u32 skb_offset,
+				 u32 tls_end_offset, bool last_wr)
+{
+	bool free_skb_if_tx_fails = false;
+	struct sk_buff *nskb = NULL;
+
+	/* check if it is a complete record */
+	if (tls_end_offset == record->len) {
+		nskb = skb;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_complete_pkts);
+	} else {
+		nskb = alloc_skb(0, GFP_ATOMIC);
+		if (!nskb) {
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_BUSY;
+		}
+
+		/* copy complete record in skb */
+		chcr_ktls_copy_record_in_skb(nskb, skb, record);
+		/* packet is being sent from the beginning, update the tcp_seq
+		 * accordingly.
+		 */
+		tcp_seq = tls_record_start_seq(record);
+		/* reset skb offset */
+		skb_offset = 0;
+
+		if (last_wr)
+			dev_kfree_skb_any(skb);
+		else
+			free_skb_if_tx_fails = true;
+
+		last_wr = true;
+
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_end_pkts);
+	}
+
+	if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq,
+				       last_wr, record->len, skb_offset,
+				       record->num_frags,
+				       (last_wr && tcp_push_no_fin),
+				       mss)) {
+		if (free_skb_if_tx_fails)
+			dev_kfree_skb_any(skb);
+		goto out;
+	}
+	tx_info->prev_seq = record->end_seq;
+	return 0;
+out:
+	dev_kfree_skb_any(nskb);
+	return NETDEV_TX_BUSY;
+}
+
+/*
+ * chcr_short_record_handler: This handler will take care of the records which
+ * doesn't have end part (1st part or the middle part(/s) of a record). In such
+ * cases, AES CTR will be used in place of AES GCM to send out partial packet.
+ * This partial record might be the first part of the record, or the middle
+ * part. In case of middle record we should fetch the prior data to make it 16
+ * byte aligned. If it has a partial tls header or iv then get to the start of
+ * tls header. And if it has partial TAG, then remove the complete TAG and send
+ * only the payload.
+ * There is one more possibility that it gets a partial header, send that
+ * portion as a plaintext.
+ * @tx_info - driver specific tls info.
+ * @skb - skb contains partial record..
+ * @record - complete record of 16K size.
+ * @tcp_seq
+ * @mss - segment size in which TP needs to chop a packet.
+ * @tcp_push_no_fin - tcp push if fin is not set.
+ * @q - TX queue.
+ * @tls_end_offset - offset from end of the record.
+ * return: NETDEV_TX_OK/NETDEV_TX_BUSY.
+ */
+static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
+				     struct sk_buff *skb,
+				     struct tls_record_info *record,
+				     u32 tcp_seq, int mss, bool tcp_push_no_fin,
+				     u32 data_len, u32 skb_offset,
+				     struct sge_eth_txq *q, u32 tls_end_offset)
+{
+	u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record);
+	u8 prior_data[16] = {0};
+	u32 prior_data_len = 0;
+
+	/* check if the skb is ending in middle of tag/HASH, its a big
+	 * trouble, send the packet before the HASH.
+	 */
+	int remaining_record = tls_end_offset - data_len;
+
+	if (remaining_record > 0 &&
+	    remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) {
+		int trimmed_len = 0;
+
+		if (tls_end_offset > TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+			trimmed_len = data_len -
+				      (TLS_CIPHER_AES_GCM_128_TAG_SIZE -
+				       remaining_record);
+		if (!trimmed_len)
+			return FALLBACK;
+
+		WARN_ON(trimmed_len > data_len);
+
+		data_len = trimmed_len;
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts);
+	}
+
+	/* check if it is only the header part. */
+	if (tls_rec_offset + data_len <= (TLS_HEADER_SIZE + tx_info->iv_size)) {
+		if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+					  tcp_push_no_fin, q,
+					  tx_info->port_id, prior_data,
+					  data_len, skb_offset, prior_data_len))
+			goto out;
+
+		tx_info->prev_seq = tcp_seq + data_len;
+		return 0;
+	}
+
+	/* check if the middle record's start point is 16 byte aligned. CTR
+	 * needs 16 byte aligned start point to start encryption.
+	 */
+	if (tls_rec_offset) {
+		/* there is an offset from start, means its a middle record */
+		int remaining = 0;
+
+		if (tls_rec_offset < (TLS_HEADER_SIZE + tx_info->iv_size)) {
+			prior_data_len = tls_rec_offset;
+			tls_rec_offset = 0;
+			remaining = 0;
+		} else {
+			prior_data_len =
+				(tls_rec_offset -
+				(TLS_HEADER_SIZE + tx_info->iv_size))
+				% AES_BLOCK_LEN;
+			remaining = tls_rec_offset - prior_data_len;
+		}
+
+		/* if prior_data_len is not zero, means we need to fetch prior
+		 * data to make this record 16 byte aligned, or we need to reach
+		 * to start offset.
+		 */
+		if (prior_data_len) {
+			int i = 0;
+			u8 *data = NULL;
+			skb_frag_t *f;
+			u8 *vaddr;
+			int frag_size = 0, frag_delta = 0;
+
+			while (remaining > 0) {
+				frag_size = skb_frag_size(&record->frags[i]);
+				if (remaining < frag_size)
+					break;
+
+				remaining -= frag_size;
+				i++;
+			}
+			f = &record->frags[i];
+			vaddr = kmap_atomic(skb_frag_page(f));
+
+			data = vaddr + skb_frag_off(f)  + remaining;
+			frag_delta = skb_frag_size(f) - remaining;
+
+			if (frag_delta >= prior_data_len) {
+				memcpy(prior_data, data, prior_data_len);
+				kunmap_atomic(vaddr);
+			} else {
+				memcpy(prior_data, data, frag_delta);
+				kunmap_atomic(vaddr);
+				/* get the next page */
+				f = &record->frags[i + 1];
+				vaddr = kmap_atomic(skb_frag_page(f));
+				data = vaddr + skb_frag_off(f);
+				memcpy(prior_data + frag_delta,
+				       data, (prior_data_len - frag_delta));
+				kunmap_atomic(vaddr);
+			}
+			/* reset tcp_seq as per the prior_data_required len */
+			tcp_seq -= prior_data_len;
+		}
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts);
+	} else {
+		atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts);
+	}
+
+	if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin,
+				    mss, tls_rec_offset, prior_data,
+				    prior_data_len, data_len, skb_offset)) {
+		goto out;
+	}
+
+	tx_info->prev_seq = tcp_seq + data_len + prior_data_len;
+	return 0;
+out:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_BUSY;
+}
+
+static int chcr_ktls_sw_fallback(struct sk_buff *skb,
+				 struct chcr_ktls_info *tx_info,
+				 struct sge_eth_txq *q)
+{
+	u32 data_len, skb_offset;
+	struct sk_buff *nskb;
+	struct tcphdr *th;
+
+	nskb = tls_encrypt_skb(skb);
+
+	if (!nskb)
+		return 0;
+
+	th = tcp_hdr(nskb);
+	skb_offset = skb_tcp_all_headers(nskb);
+	data_len = nskb->len - skb_offset;
+	skb_tx_timestamp(nskb);
+
+	if (chcr_ktls_tunnel_pkt(tx_info, nskb, q))
+		goto out;
+
+	tx_info->prev_seq = ntohl(th->seq) + data_len;
+	atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_fallback);
+	return 0;
+out:
+	dev_kfree_skb_any(nskb);
+	return 0;
+}
+/* nic tls TX handler */
+static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	u32 tls_end_offset, tcp_seq, skb_data_len, skb_offset;
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct ch_ktls_stats_debug *stats;
+	struct tcphdr *th = tcp_hdr(skb);
+	int data_len, qidx, ret = 0, mss;
+	struct tls_record_info *record;
+	struct chcr_ktls_info *tx_info;
+	struct net_device *tls_netdev;
+	struct tls_context *tls_ctx;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	unsigned long flags;
+
+	tcp_seq = ntohl(th->seq);
+	skb_offset = skb_tcp_all_headers(skb);
+	skb_data_len = skb->len - skb_offset;
+	data_len = skb_data_len;
+
+	mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len;
+
+	tls_ctx = tls_get_ctx(skb->sk);
+	tls_netdev = rcu_dereference_bh(tls_ctx->netdev);
+	/* Don't quit on NULL: if tls_device_down is running in parallel,
+	 * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was
+	 * true. Rather continue processing this packet.
+	 */
+	if (unlikely(tls_netdev && tls_netdev != dev))
+		goto out;
+
+	tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+	tx_info = tx_ctx->chcr_info;
+
+	if (unlikely(!tx_info))
+		goto out;
+
+	adap = tx_info->adap;
+	stats = &adap->ch_ktls_stats;
+	port_stats = &stats->ktls_port[tx_info->port_id];
+
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + tx_info->first_qset];
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+	/* if tcp options are set but finish is not send the options first */
+	if (!th->fin && chcr_ktls_check_tcp_options(th)) {
+		ret = chcr_ktls_write_tcp_options(tx_info, skb, q,
+						  tx_info->tx_chan);
+		if (ret)
+			return NETDEV_TX_BUSY;
+	}
+
+	/* TCP segments can be in received either complete or partial.
+	 * chcr_end_part_handler will handle cases if complete record or end
+	 * part of the record is received. In case of partial end part of record,
+	 * we will send the complete record again.
+	 */
+
+	spin_lock_irqsave(&tx_ctx->base.lock, flags);
+
+	do {
+
+		cxgb4_reclaim_completed_tx(adap, &q->q, true);
+		/* fetch the tls record */
+		record = tls_get_record(&tx_ctx->base, tcp_seq,
+					&tx_info->record_no);
+		/* By the time packet reached to us, ACK is received, and record
+		 * won't be found in that case, handle it gracefully.
+		 */
+		if (unlikely(!record)) {
+			spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+			atomic64_inc(&port_stats->ktls_tx_drop_no_sync_data);
+			goto out;
+		}
+
+		tls_end_offset = record->end_seq - tcp_seq;
+
+		pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
+			 tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
+		/* update tcb for the skb */
+		if (skb_data_len == data_len) {
+			u32 tx_max = tcp_seq;
+
+			if (!tls_record_is_start_marker(record) &&
+			    tls_end_offset < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+				tx_max = record->end_seq -
+					TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+
+			ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, tx_max,
+						      ntohl(th->ack_seq),
+						      ntohs(th->window),
+						      tls_end_offset !=
+						      record->len);
+			if (ret) {
+				spin_unlock_irqrestore(&tx_ctx->base.lock,
+						       flags);
+				goto out;
+			}
+
+			if (th->fin)
+				skb_get(skb);
+		}
+
+		if (unlikely(tls_record_is_start_marker(record))) {
+			atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data);
+			/* If tls_end_offset < data_len, means there is some
+			 * data after start marker, which needs encryption, send
+			 * plaintext first and take skb refcount. else send out
+			 * complete pkt as plaintext.
+			 */
+			if (tls_end_offset < data_len)
+				skb_get(skb);
+			else
+				tls_end_offset = data_len;
+
+			ret = chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+						    (!th->fin && th->psh), q,
+						    tx_info->port_id, NULL,
+						    tls_end_offset, skb_offset,
+						    0);
+
+			if (ret) {
+				/* free the refcount taken earlier */
+				if (tls_end_offset < data_len)
+					dev_kfree_skb_any(skb);
+				spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+				goto out;
+			}
+
+			data_len -= tls_end_offset;
+			tcp_seq = record->end_seq;
+			skb_offset += tls_end_offset;
+			continue;
+		}
+
+		/* if a tls record is finishing in this SKB */
+		if (tls_end_offset <= data_len) {
+			ret = chcr_end_part_handler(tx_info, skb, record,
+						    tcp_seq, mss,
+						    (!th->fin && th->psh), q,
+						    skb_offset,
+						    tls_end_offset,
+						    skb_offset +
+						    tls_end_offset == skb->len);
+
+			data_len -= tls_end_offset;
+			/* tcp_seq increment is required to handle next record.
+			 */
+			tcp_seq += tls_end_offset;
+			skb_offset += tls_end_offset;
+		} else {
+			ret = chcr_short_record_handler(tx_info, skb,
+							record, tcp_seq, mss,
+							(!th->fin && th->psh),
+							data_len, skb_offset,
+							q, tls_end_offset);
+			data_len = 0;
+		}
+
+		/* if any failure, come out from the loop. */
+		if (ret) {
+			spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+			if (th->fin)
+				dev_kfree_skb_any(skb);
+
+			if (ret == FALLBACK)
+				return chcr_ktls_sw_fallback(skb, tx_info, q);
+
+			return NETDEV_TX_OK;
+		}
+
+		/* length should never be less than 0 */
+		WARN_ON(data_len < 0);
+
+	} while (data_len > 0);
+
+	spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+	atomic64_inc(&port_stats->ktls_tx_encrypted_packets);
+	atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes);
+
+	/* tcp finish is set, send a separate tcp msg including all the options
+	 * as well.
+	 */
+	if (th->fin) {
+		chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan);
+		dev_kfree_skb_any(skb);
+	}
+
+	return NETDEV_TX_OK;
+out:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static void *chcr_ktls_uld_add(const struct cxgb4_lld_info *lldi)
+{
+	struct chcr_ktls_uld_ctx *u_ctx;
+
+	pr_info_once("%s - version %s\n", CHCR_KTLS_DRV_DESC,
+		     CHCR_KTLS_DRV_VERSION);
+	u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+	if (!u_ctx) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	u_ctx->lldi = *lldi;
+	u_ctx->detach = false;
+	xa_init_flags(&u_ctx->tid_list, XA_FLAGS_LOCK_BH);
+out:
+	return u_ctx;
+}
+
+static const struct tlsdev_ops chcr_ktls_ops = {
+	.tls_dev_add = chcr_ktls_dev_add,
+	.tls_dev_del = chcr_ktls_dev_del,
+};
+
+static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
+	[CPL_ACT_OPEN_RPL] = chcr_ktls_cpl_act_open_rpl,
+	[CPL_SET_TCB_RPL] = chcr_ktls_cpl_set_tcb_rpl,
+};
+
+static int chcr_ktls_uld_rx_handler(void *handle, const __be64 *rsp,
+				    const struct pkt_gl *pgl)
+{
+	const struct cpl_act_open_rpl *rpl = (struct cpl_act_open_rpl *)rsp;
+	struct chcr_ktls_uld_ctx *u_ctx = handle;
+	u8 opcode = rpl->ot.opcode;
+	struct adapter *adap;
+
+	adap = pci_get_drvdata(u_ctx->lldi.pdev);
+
+	if (!work_handlers[opcode]) {
+		pr_err("Unsupported opcode %d received\n", opcode);
+		return 0;
+	}
+
+	work_handlers[opcode](adap, (unsigned char *)&rsp[1]);
+	return 0;
+}
+
+static void clear_conn_resources(struct chcr_ktls_info *tx_info)
+{
+	/* clear l2t entry */
+	if (tx_info->l2te)
+		cxgb4_l2t_release(tx_info->l2te);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(tx_info->netdev, (const u32 *)
+				   &tx_info->sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+
+	/* clear tid */
+	if (tx_info->tid != -1)
+		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+				 tx_info->tid, tx_info->ip_family);
+}
+
+static void ch_ktls_reset_all_conn(struct chcr_ktls_uld_ctx *u_ctx)
+{
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_info *tx_info;
+	unsigned long index;
+
+	xa_for_each(&u_ctx->tid_list, index, tx_ctx) {
+		tx_info = tx_ctx->chcr_info;
+		clear_conn_resources(tx_info);
+		port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+		atomic64_inc(&port_stats->ktls_tx_connection_close);
+		kvfree(tx_info);
+		tx_ctx->chcr_info = NULL;
+		/* release module refcount */
+		module_put(THIS_MODULE);
+	}
+}
+
+static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct chcr_ktls_uld_ctx *u_ctx = handle;
+
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		pr_info("%s: Up\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		list_add_tail(&u_ctx->entry, &uld_ctx_list);
+		mutex_unlock(&dev_mutex);
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+	case CXGB4_STATE_DOWN:
+	case CXGB4_STATE_DETACH:
+		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
+		mutex_lock(&dev_mutex);
+		u_ctx->detach = true;
+		list_del(&u_ctx->entry);
+		ch_ktls_reset_all_conn(u_ctx);
+		xa_destroy(&u_ctx->tid_list);
+		mutex_unlock(&dev_mutex);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct cxgb4_uld_info chcr_ktls_uld_info = {
+	.name = CHCR_KTLS_DRV_MODULE_NAME,
+	.nrxq = 1,
+	.rxq_size = 1024,
+	.add = chcr_ktls_uld_add,
+	.tx_handler = chcr_ktls_xmit,
+	.rx_handler = chcr_ktls_uld_rx_handler,
+	.state_change = chcr_ktls_uld_state_change,
+	.tlsdev_ops = &chcr_ktls_ops,
+};
+
+static int __init chcr_ktls_init(void)
+{
+	cxgb4_register_uld(CXGB4_ULD_KTLS, &chcr_ktls_uld_info);
+	return 0;
+}
+
+static void __exit chcr_ktls_exit(void)
+{
+	struct chcr_ktls_uld_ctx *u_ctx, *tmp;
+	struct adapter *adap;
+
+	pr_info("driver unloaded\n");
+
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+		adap = pci_get_drvdata(u_ctx->lldi.pdev);
+		memset(&adap->ch_ktls_stats, 0, sizeof(adap->ch_ktls_stats));
+		list_del(&u_ctx->entry);
+		xa_destroy(&u_ctx->tid_list);
+		kfree(u_ctx);
+	}
+	mutex_unlock(&dev_mutex);
+	cxgb4_unregister_uld(CXGB4_ULD_KTLS);
+}
+
+module_init(chcr_ktls_init);
+module_exit(chcr_ktls_exit);
+
+MODULE_DESCRIPTION("Chelsio NIC TLS ULD driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHCR_KTLS_DRV_VERSION);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
new file mode 100644
index 000000000000..10572dc55365
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2020 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CHCR_KTLS_H__
+#define __CHCR_KTLS_H__
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "t4_tcb.h"
+#include "l2t.h"
+#include "chcr_common.h"
+#include "cxgb4_uld.h"
+#include "clip_tbl.h"
+
+#define CHCR_KTLS_DRV_MODULE_NAME "ch_ktls"
+#define CHCR_KTLS_DRV_VERSION "1.0.0.0-ko"
+#define CHCR_KTLS_DRV_DESC "Chelsio NIC TLS ULD Driver"
+
+#define CHCR_TCB_STATE_CLOSED	0
+#define CHCR_KTLS_KEY_CTX_LEN	16
+#define CHCR_SET_TCB_FIELD_LEN	sizeof(struct cpl_set_tcb_field)
+#define CHCR_PLAIN_TX_DATA_LEN	(sizeof(struct fw_ulptx_wr) +\
+				 sizeof(struct ulp_txpkt) +\
+				 sizeof(struct ulptx_idata) +\
+				 sizeof(struct cpl_tx_data))
+
+#define CHCR_KTLS_WR_SIZE	(CHCR_PLAIN_TX_DATA_LEN +\
+				 sizeof(struct cpl_tx_sec_pdu))
+#define FALLBACK		35
+
+enum ch_ktls_open_state {
+	CH_KTLS_OPEN_SUCCESS = 0,
+	CH_KTLS_OPEN_PENDING = 1,
+	CH_KTLS_OPEN_FAILURE = 2,
+};
+
+struct chcr_ktls_info {
+	struct sock *sk;
+	spinlock_t lock; /* lock for pending_close */
+	struct ktls_key_ctx key_ctx;
+	struct adapter *adap;
+	struct l2t_entry *l2te;
+	struct net_device *netdev;
+	struct completion completion;
+	u64 iv;
+	u64 record_no;
+	int tid;
+	int atid;
+	int rx_qid;
+	u32 iv_size;
+	u32 prev_seq;
+	u32 prev_ack;
+	u32 salt_size;
+	u32 key_ctx_len;
+	u32 scmd0_seqno_numivs;
+	u32 scmd0_ivgen_hdrlen;
+	u32 tcp_start_seq_number;
+	u32 scmd0_short_seqno_numivs;
+	u32 scmd0_short_ivgen_hdrlen;
+	u16 prev_win;
+	u8 tx_chan;
+	u8 smt_idx;
+	u8 port_id;
+	u8 ip_family;
+	u8 first_qset;
+	enum ch_ktls_open_state open_state;
+	bool pending_close;
+};
+
+struct chcr_ktls_ofld_ctx_tx {
+	struct tls_offload_context_tx base;
+	struct chcr_ktls_info *chcr_info;
+};
+
+struct chcr_ktls_uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+	struct xarray tid_list;
+	bool detach;
+};
+
+static inline struct chcr_ktls_ofld_ctx_tx *
+chcr_get_ktls_tx_context(struct tls_context *tls_ctx)
+{
+	BUILD_BUG_ON(sizeof(struct chcr_ktls_ofld_ctx_tx) >
+		     TLS_OFFLOAD_CONTEXT_SIZE_TX);
+	return container_of(tls_offload_ctx_tx(tls_ctx),
+			    struct chcr_ktls_ofld_ctx_tx,
+			    base);
+}
+
+static inline int chcr_get_first_rx_qid(struct adapter *adap)
+{
+	/* u_ctx is saved in adap, fetch it */
+	struct chcr_ktls_uld_ctx *u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+
+	if (!u_ctx)
+		return -1;
+	return u_ctx->lldi.rxq_ids[0];
+}
+
+typedef int (*chcr_handler_func)(struct adapter *adap, unsigned char *input);
+#endif /* __CHCR_KTLS_H__ */
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile b/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
new file mode 100644
index 000000000000..bc11495acdb3
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 \
+	     -I $(srctree)/drivers/crypto/chelsio
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls.o
+chtls-objs := chtls_main.o chtls_cm.o chtls_io.o chtls_hw.o
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
new file mode 100644
index 000000000000..41714203ace8
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
@@ -0,0 +1,586 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ */
+
+#ifndef __CHTLS_H__
+#define __CHTLS_H__
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/authenc.h>
+#include <crypto/ctr.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+#include <net/tls_toe.h>
+
+#include "t4fw_api.h"
+#include "t4_msg.h"
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "l2t.h"
+#include "chcr_algo.h"
+#include "chcr_core.h"
+#include "chcr_crypto.h"
+
+#define CHTLS_DRV_VERSION "1.0.0.0-ko"
+
+#define TLS_KEYCTX_RXFLIT_CNT_S 24
+#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
+
+#define TLS_KEYCTX_RXPROT_VER_S 20
+#define TLS_KEYCTX_RXPROT_VER_M 0xf
+#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
+
+#define TLS_KEYCTX_RXCIPH_MODE_S 16
+#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
+#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
+
+#define TLS_KEYCTX_RXAUTH_MODE_S 12
+#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
+#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
+
+#define TLS_KEYCTX_RXCIAU_CTRL_S 11
+#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
+
+#define TLS_KEYCTX_RX_SEQCTR_S 9
+#define TLS_KEYCTX_RX_SEQCTR_M 0x3
+#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
+
+#define TLS_KEYCTX_RX_VALID_S 8
+#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
+
+#define TLS_KEYCTX_RXCK_SIZE_S 3
+#define TLS_KEYCTX_RXCK_SIZE_M 0x7
+#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
+
+#define TLS_KEYCTX_RXMK_SIZE_S 0
+#define TLS_KEYCTX_RXMK_SIZE_M 0x7
+#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
+
+#define KEYCTX_TX_WR_IV_S  55
+#define KEYCTX_TX_WR_IV_M  0x1ffULL
+#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
+#define KEYCTX_TX_WR_IV_G(x) \
+	(((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
+
+#define KEYCTX_TX_WR_AAD_S 47
+#define KEYCTX_TX_WR_AAD_M 0xffULL
+#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
+#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
+				KEYCTX_TX_WR_AAD_M)
+
+#define KEYCTX_TX_WR_AADST_S 39
+#define KEYCTX_TX_WR_AADST_M 0xffULL
+#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
+#define KEYCTX_TX_WR_AADST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
+
+#define KEYCTX_TX_WR_CIPHER_S 30
+#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
+#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
+#define KEYCTX_TX_WR_CIPHER_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
+
+#define KEYCTX_TX_WR_CIPHERST_S 23
+#define KEYCTX_TX_WR_CIPHERST_M 0x7f
+#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
+#define KEYCTX_TX_WR_CIPHERST_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
+
+#define KEYCTX_TX_WR_AUTH_S 14
+#define KEYCTX_TX_WR_AUTH_M 0x1ff
+#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
+#define KEYCTX_TX_WR_AUTH_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
+
+#define KEYCTX_TX_WR_AUTHST_S 7
+#define KEYCTX_TX_WR_AUTHST_M 0x7f
+#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
+#define KEYCTX_TX_WR_AUTHST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
+
+#define KEYCTX_TX_WR_AUTHIN_S 0
+#define KEYCTX_TX_WR_AUTHIN_M 0x7f
+#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
+#define KEYCTX_TX_WR_AUTHIN_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
+
+struct sge_opaque_hdr {
+	void *dev;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
+#define MAX_IVS_PAGE			256
+#define TLS_KEY_CONTEXT_SZ		64
+#define CIPHER_BLOCK_SIZE		16
+#define GCM_TAG_SIZE			16
+#define KEY_ON_MEM_SZ			16
+#define AEAD_EXPLICIT_DATA_SIZE		8
+#define TLS_HEADER_LENGTH		5
+#define SCMD_CIPH_MODE_AES_GCM		2
+/* Any MFS size should work and come from openssl */
+#define TLS_MFS				16384
+
+#define RSS_HDR sizeof(struct rss_header)
+#define TLS_WR_CPL_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo))
+
+enum {
+	CHTLS_KEY_CONTEXT_DSGL,
+	CHTLS_KEY_CONTEXT_IMM,
+	CHTLS_KEY_CONTEXT_DDR,
+};
+
+enum {
+	CHTLS_LISTEN_START,
+	CHTLS_LISTEN_STOP,
+};
+
+/* Flags for return value of CPL message handlers */
+enum {
+	CPL_RET_BUF_DONE =    1,   /* buffer processing done */
+	CPL_RET_BAD_MSG =     2,   /* bad CPL message */
+	CPL_RET_UNKNOWN_TID = 4    /* unexpected unknown TID */
+};
+
+#define LISTEN_INFO_HASH_SIZE 32
+#define RSPQ_HASH_BITS 5
+struct listen_info {
+	struct listen_info *next;  /* Link to next entry */
+	struct sock *sk;           /* The listening socket */
+	unsigned int stid;         /* The server TID */
+};
+
+enum {
+	T4_LISTEN_START_PENDING,
+	T4_LISTEN_STARTED
+};
+
+enum csk_flags {
+	CSK_CALLBACKS_CHKD,	/* socket callbacks have been sanitized */
+	CSK_ABORT_REQ_RCVD,	/* received one ABORT_REQ_RSS message */
+	CSK_TX_MORE_DATA,	/* sending ULP data; don't set SHOVE bit */
+	CSK_TX_WAIT_IDLE,	/* suspend Tx until in-flight data is ACKed */
+	CSK_ABORT_SHUTDOWN,	/* shouldn't send more abort requests */
+	CSK_ABORT_RPL_PENDING,	/* expecting an abort reply */
+	CSK_CLOSE_CON_REQUESTED,/* we've sent a close_conn_req */
+	CSK_TX_DATA_SENT,	/* sent a TX_DATA WR on this connection */
+	CSK_TX_FAILOVER,	/* Tx traffic failing over */
+	CSK_UPDATE_RCV_WND,	/* Need to update rcv window */
+	CSK_RST_ABORTED,	/* outgoing RST was aborted */
+	CSK_TLS_HANDSHK,	/* TLS Handshake */
+	CSK_CONN_INLINE,	/* Connection on HW */
+};
+
+enum chtls_cdev_state {
+	CHTLS_CDEV_STATE_UP = 1
+};
+
+struct listen_ctx {
+	struct sock *lsk;
+	struct chtls_dev *cdev;
+	struct sk_buff_head synq;
+	u32 state;
+};
+
+struct key_map {
+	unsigned long *addr;
+	unsigned int start;
+	unsigned int available;
+	unsigned int size;
+	spinlock_t lock; /* lock for key id request from map */
+} __packed;
+
+struct tls_scmd {
+	u32 seqno_numivs;
+	u32 ivgen_hdrlen;
+};
+
+struct chtls_dev {
+	struct tls_toe_device tlsdev;
+	struct list_head list;
+	struct cxgb4_lld_info *lldi;
+	struct pci_dev *pdev;
+	struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
+	spinlock_t listen_lock; /* lock for listen list */
+	struct net_device **ports;
+	struct tid_info *tids;
+	unsigned int pfvf;
+	const unsigned short *mtus;
+
+	struct idr hwtid_idr;
+	struct idr stid_idr;
+
+	spinlock_t idr_lock ____cacheline_aligned_in_smp;
+
+	struct net_device *egr_dev[NCHAN * 2];
+	struct sk_buff *rspq_skb_cache[1 << RSPQ_HASH_BITS];
+	struct sk_buff *askb;
+
+	struct sk_buff_head deferq;
+	struct work_struct deferq_task;
+
+	struct list_head list_node;
+	struct list_head rcu_node;
+	struct list_head na_node;
+	unsigned int send_page_order;
+	int max_host_sndbuf;
+	u32 round_robin_cnt;
+	struct key_map kmap;
+	unsigned int cdev_state;
+};
+
+struct chtls_listen {
+	struct chtls_dev *cdev;
+	struct sock *sk;
+};
+
+struct chtls_hws {
+	struct sk_buff_head sk_recv_queue;
+	u8 txqid;
+	u8 ofld;
+	u16 type;
+	u16 rstate;
+	u16 keyrpl;
+	u16 pldlen;
+	u16 rcvpld;
+	u16 compute;
+	u16 expansion;
+	u16 keylen;
+	u16 pdus;
+	u16 adjustlen;
+	u16 ivsize;
+	u16 txleft;
+	u32 mfs;
+	s32 txkey;
+	s32 rxkey;
+	u32 fcplenmax;
+	u32 copied_seq;
+	u64 tx_seq_no;
+	struct tls_scmd scmd;
+	union {
+		struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+		struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+	} crypto_info;
+};
+
+struct chtls_sock {
+	struct sock *sk;
+	struct chtls_dev *cdev;
+	struct l2t_entry *l2t_entry;    /* pointer to the L2T entry */
+	struct net_device *egress_dev;  /* TX_CHAN for act open retry */
+
+	struct sk_buff_head txq;
+	struct sk_buff *wr_skb_head;
+	struct sk_buff *wr_skb_tail;
+	struct sk_buff *ctrl_skb_cache;
+	struct sk_buff *txdata_skb_cache; /* abort path messages */
+	struct kref kref;
+	unsigned long flags;
+	u32 opt2;
+	u32 wr_credits;
+	u32 wr_unacked;
+	u32 wr_max_credits;
+	u32 wr_nondata;
+	u32 hwtid;               /* TCP Control Block ID */
+	u32 txq_idx;
+	u32 rss_qid;
+	u32 tid;
+	u32 idr;
+	u32 mss;
+	u32 ulp_mode;
+	u32 tx_chan;
+	u32 rx_chan;
+	u32 sndbuf;
+	u32 txplen_max;
+	u32 mtu_idx;           /* MTU table index */
+	u32 smac_idx;
+	u8 port_id;
+	u8 tos;
+	u16 resv2;
+	u32 delack_mode;
+	u32 delack_seq;
+	u32 snd_win;
+	u32 rcv_win;
+
+	void *passive_reap_next;        /* placeholder for passive */
+	struct chtls_hws tlshws;
+	struct synq {
+		struct sk_buff *next;
+		struct sk_buff *prev;
+	} synq;
+	struct listen_ctx *listen_ctx;
+};
+
+struct tls_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+} __packed;
+
+struct tlsrx_cmp_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+
+	u64 tls_seq;
+	u16 reserved1;
+	u8  res_to_mac_error;
+} __packed;
+
+/* res_to_mac_error fields */
+#define TLSRX_HDR_PKT_INT_ERROR_S   4
+#define TLSRX_HDR_PKT_INT_ERROR_M   0x1
+#define TLSRX_HDR_PKT_INT_ERROR_V(x) \
+	((x) << TLSRX_HDR_PKT_INT_ERROR_S)
+#define TLSRX_HDR_PKT_INT_ERROR_G(x) \
+	(((x) >> TLSRX_HDR_PKT_INT_ERROR_S) & TLSRX_HDR_PKT_INT_ERROR_M)
+#define TLSRX_HDR_PKT_INT_ERROR_F   TLSRX_HDR_PKT_INT_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_SPP_ERROR_S        3
+#define TLSRX_HDR_PKT_SPP_ERROR_M        0x1
+#define TLSRX_HDR_PKT_SPP_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_SPP_ERROR)
+#define TLSRX_HDR_PKT_SPP_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_SPP_ERROR_S) & TLSRX_HDR_PKT_SPP_ERROR_M)
+#define TLSRX_HDR_PKT_SPP_ERROR_F        TLSRX_HDR_PKT_SPP_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_CCDX_ERROR_S       2
+#define TLSRX_HDR_PKT_CCDX_ERROR_M       0x1
+#define TLSRX_HDR_PKT_CCDX_ERROR_V(x)    ((x) << TLSRX_HDR_PKT_CCDX_ERROR_S)
+#define TLSRX_HDR_PKT_CCDX_ERROR_G(x)    \
+	(((x) >> TLSRX_HDR_PKT_CCDX_ERROR_S) & TLSRX_HDR_PKT_CCDX_ERROR_M)
+#define TLSRX_HDR_PKT_CCDX_ERROR_F       TLSRX_HDR_PKT_CCDX_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_PAD_ERROR_S        1
+#define TLSRX_HDR_PKT_PAD_ERROR_M        0x1
+#define TLSRX_HDR_PKT_PAD_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_PAD_ERROR_S)
+#define TLSRX_HDR_PKT_PAD_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_PAD_ERROR_S) & TLSRX_HDR_PKT_PAD_ERROR_M)
+#define TLSRX_HDR_PKT_PAD_ERROR_F        TLSRX_HDR_PKT_PAD_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_MAC_ERROR_S        0
+#define TLSRX_HDR_PKT_MAC_ERROR_M        0x1
+#define TLSRX_HDR_PKT_MAC_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_MAC_ERROR)
+#define TLSRX_HDR_PKT_MAC_ERROR_G(x)     \
+	(((x) >> S_TLSRX_HDR_PKT_MAC_ERROR_S) & TLSRX_HDR_PKT_MAC_ERROR_M)
+#define TLSRX_HDR_PKT_MAC_ERROR_F        TLSRX_HDR_PKT_MAC_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_ERROR_M           0x1F
+#define CONTENT_TYPE_ERROR		0x7F
+
+struct ulp_mem_rw {
+	__be32 cmd;
+	__be32 len16;             /* command length */
+	__be32 dlen;              /* data length in 32-byte units */
+	__be32 lock_addr;
+};
+
+struct tls_key_wr {
+	__be32 op_to_compl;
+	__be32 flowid_len16;
+	__be32 ftid;
+	u8   reneg_to_write_rx;
+	u8   protocol;
+	__be16 mfs;
+};
+
+struct tls_key_req {
+	struct tls_key_wr wr;
+	struct ulp_mem_rw req;
+	struct ulptx_idata sc_imm;
+};
+
+/*
+ * This lives in skb->cb and is used to chain WRs in a linked list.
+ */
+struct wr_skb_cb {
+	struct l2t_skb_cb l2t;          /* reserve space for l2t CB */
+	struct sk_buff *next_wr;        /* next write request */
+};
+
+/* Per-skb backlog handler.  Run when a socket's backlog is processed. */
+struct blog_skb_cb {
+	void (*backlog_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct chtls_dev *cdev;
+};
+
+/*
+ * Similar to tcp_skb_cb but with ULP elements added to support TLS,
+ * etc.
+ */
+struct ulp_skb_cb {
+	struct wr_skb_cb wr;		/* reserve space for write request */
+	u16 flags;			/* TCP-like flags */
+	u8 psh;
+	u8 ulp_mode;			/* ULP mode/submode of sk_buff */
+	u32 seq;			/* TCP sequence number */
+	union { /* ULP-specific fields */
+		struct {
+			u8  type;
+			u8  ofld;
+			u8  iv;
+		} tls;
+	} ulp;
+};
+
+#define ULP_SKB_CB(skb) ((struct ulp_skb_cb *)&((skb)->cb[0]))
+#define BLOG_SKB_CB(skb) ((struct blog_skb_cb *)(skb)->cb)
+
+/*
+ * Flags for ulp_skb_cb.flags.
+ */
+enum {
+	ULPCB_FLAG_NEED_HDR  = 1 << 0,	/* packet needs a TX_DATA_WR header */
+	ULPCB_FLAG_NO_APPEND = 1 << 1,	/* don't grow this skb */
+	ULPCB_FLAG_BARRIER   = 1 << 2,	/* set TX_WAIT_IDLE after sending */
+	ULPCB_FLAG_HOLD      = 1 << 3,	/* skb not ready for Tx yet */
+	ULPCB_FLAG_COMPL     = 1 << 4,	/* request WR completion */
+	ULPCB_FLAG_URG       = 1 << 5,	/* urgent data */
+	ULPCB_FLAG_TLS_HDR   = 1 << 6,  /* payload with tls hdr */
+	ULPCB_FLAG_NO_HDR    = 1 << 7,  /* not a ofld wr */
+};
+
+/* The ULP mode/submode of an skbuff */
+#define skb_ulp_mode(skb)  (ULP_SKB_CB(skb)->ulp_mode)
+#define TCP_PAGE(sk)   (sk->sk_frag.page)
+#define TCP_OFF(sk)    (sk->sk_frag.offset)
+
+static inline struct chtls_dev *to_chtls_dev(struct tls_toe_device *tlsdev)
+{
+	return container_of(tlsdev, struct chtls_dev, tlsdev);
+}
+
+static inline void csk_set_flag(struct chtls_sock *csk,
+				enum csk_flags flag)
+{
+	__set_bit(flag, &csk->flags);
+}
+
+static inline void csk_reset_flag(struct chtls_sock *csk,
+				  enum csk_flags flag)
+{
+	__clear_bit(flag, &csk->flags);
+}
+
+static inline bool csk_conn_inline(const struct chtls_sock *csk)
+{
+	return test_bit(CSK_CONN_INLINE, &csk->flags);
+}
+
+static inline int csk_flag(const struct sock *sk, enum csk_flags flag)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (!csk_conn_inline(csk))
+		return 0;
+	return test_bit(flag, &csk->flags);
+}
+
+static inline int csk_flag_nochk(const struct chtls_sock *csk,
+				 enum csk_flags flag)
+{
+	return test_bit(flag, &csk->flags);
+}
+
+static inline void *cplhdr(struct sk_buff *skb)
+{
+	return skb->data;
+}
+
+static inline int is_neg_adv(unsigned int status)
+{
+	return status == CPL_ERR_RTX_NEG_ADVICE ||
+	       status == CPL_ERR_KEEPALV_NEG_ADVICE ||
+	       status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static inline void process_cpl_msg(void (*fn)(struct sock *, struct sk_buff *),
+				   struct sock *sk,
+				   struct sk_buff *skb)
+{
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		BLOG_SKB_CB(skb)->backlog_rcv = fn;
+		__sk_add_backlog(sk, skb);
+	} else {
+		fn(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+static inline void chtls_sock_free(struct kref *ref)
+{
+	struct chtls_sock *csk = container_of(ref, struct chtls_sock,
+					      kref);
+	kfree(csk);
+}
+
+static inline void __chtls_sock_put(const char *fn, struct chtls_sock *csk)
+{
+	kref_put(&csk->kref, chtls_sock_free);
+}
+
+static inline void __chtls_sock_get(const char *fn,
+				    struct chtls_sock *csk)
+{
+	kref_get(&csk->kref);
+}
+
+static inline void send_or_defer(struct sock *sk, struct tcp_sock *tp,
+				 struct sk_buff *skb, int through_l2t)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (through_l2t) {
+		/* send through L2T */
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	} else {
+		/* send directly */
+		cxgb4_ofld_send(csk->egress_dev, skb);
+	}
+}
+
+typedef int (*chtls_handler_func)(struct chtls_dev *, struct sk_buff *);
+extern chtls_handler_func chtls_handlers[NUM_CPL_CMDS];
+void chtls_install_cpl_ops(struct sock *sk);
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi);
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk);
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk);
+void chtls_close(struct sock *sk, long timeout);
+int chtls_disconnect(struct sock *sk, int flags);
+void chtls_shutdown(struct sock *sk, int how);
+void chtls_destroy_sock(struct sock *sk);
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
+		  size_t len, int flags, int *addr_len);
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags);
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt);
+void chtls_tcp_push(struct sock *sk, int flags);
+int chtls_push_frames(struct chtls_sock *csk, int comp);
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val);
+void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word,
+				 u64 mask, u64 val, u8 cookie,
+				 int through_l2t);
+int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type);
+void chtls_set_quiesce_ctrl(struct sock *sk, int val);
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags);
+unsigned int keyid_to_addr(int start_addr, int keyid);
+void free_tls_keyid(struct sock *sk);
+#endif
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
new file mode 100644
index 000000000000..c2e7037c7ba1
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -0,0 +1,2334 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/dst.h>
+#include <net/tls.h>
+#include <net/addrconf.h>
+#include <net/secure_seq.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+#include "clip_tbl.h"
+#include "t4_tcb.h"
+
+/*
+ * State transitions and actions for close.  Note that if we are in SYN_SENT
+ * we remain in that state as we cannot control a connection while it's in
+ * SYN_SENT; such connections are allowed to establish and are then aborted.
+ */
+static unsigned char new_state[16] = {
+	/* current state:     new state:      action: */
+	/* (Invalid)       */ TCP_CLOSE,
+	/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_SYN_SENT    */ TCP_SYN_SENT,
+	/* TCP_SYN_RECV    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
+	/* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
+	/* TCP_TIME_WAIT   */ TCP_CLOSE,
+	/* TCP_CLOSE       */ TCP_CLOSE,
+	/* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
+	/* TCP_LAST_ACK    */ TCP_LAST_ACK,
+	/* TCP_LISTEN      */ TCP_CLOSE,
+	/* TCP_CLOSING     */ TCP_CLOSING,
+};
+
+static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
+{
+	struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
+
+	if (!csk)
+		return NULL;
+
+	csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
+	if (!csk->txdata_skb_cache) {
+		kfree(csk);
+		return NULL;
+	}
+
+	kref_init(&csk->kref);
+	csk->cdev = cdev;
+	skb_queue_head_init(&csk->txq);
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+	csk->mss = MAX_MSS;
+	csk->tlshws.ofld = 1;
+	csk->tlshws.txkey = -1;
+	csk->tlshws.rxkey = -1;
+	csk->tlshws.mfs = TLS_MFS;
+	skb_queue_head_init(&csk->tlshws.sk_recv_queue);
+	return csk;
+}
+
+static void chtls_sock_release(struct kref *ref)
+{
+	struct chtls_sock *csk =
+		container_of(ref, struct chtls_sock, kref);
+
+	kfree(csk);
+}
+
+static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
+					    struct sock *sk)
+{
+	struct adapter *adap = pci_get_drvdata(cdev->pdev);
+	struct net_device *ndev = cdev->ports[0];
+#if IS_ENABLED(CONFIG_IPV6)
+	struct net_device *temp;
+	int addr_type;
+#endif
+	int i;
+
+	switch (sk->sk_family) {
+	case PF_INET:
+		if (likely(!inet_sk(sk)->inet_rcv_saddr))
+			return ndev;
+		ndev = __ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr, false);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case PF_INET6:
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (likely(addr_type == IPV6_ADDR_ANY))
+			return ndev;
+
+		for_each_netdev_rcu(&init_net, temp) {
+			if (ipv6_chk_addr(&init_net, (struct in6_addr *)
+					  &sk->sk_v6_rcv_saddr, temp, 1)) {
+				ndev = temp;
+				break;
+			}
+		}
+	break;
+#endif
+	default:
+		return NULL;
+	}
+
+	if (!ndev)
+		return NULL;
+
+	if (is_vlan_dev(ndev))
+		ndev = vlan_dev_real_dev(ndev);
+
+	for_each_port(adap, i)
+		if (cdev->ports[i] == ndev)
+			return ndev;
+	return NULL;
+}
+
+static void assign_rxopt(struct sock *sk, unsigned int opt)
+{
+	const struct chtls_dev *cdev;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	cdev = csk->cdev;
+	tp->tcp_header_len           = sizeof(struct tcphdr);
+	tp->rx_opt.mss_clamp         = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
+	tp->mss_cache                = tp->rx_opt.mss_clamp;
+	tp->rx_opt.tstamp_ok         = TCPOPT_TSTAMP_G(opt);
+	tp->rx_opt.snd_wscale        = TCPOPT_SACK_G(opt);
+	tp->rx_opt.wscale_ok         = TCPOPT_WSCALE_OK_G(opt);
+	SND_WSCALE(tp)               = TCPOPT_SND_WSCALE_G(opt);
+	if (!tp->rx_opt.wscale_ok)
+		tp->rx_opt.rcv_wscale = 0;
+	if (tp->rx_opt.tstamp_ok) {
+		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+		tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
+	} else if (csk->opt2 & TSTAMPS_EN_F) {
+		csk->opt2 &= ~TSTAMPS_EN_F;
+		csk->mtu_idx = TCPOPT_MSS_G(opt);
+	}
+}
+
+static void chtls_purge_receive_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		skb_dst_set(skb, (void *)NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_write_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&csk->txq))) {
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_recv_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *tlsk = &csk->tlshws;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
+		skb_dst_set(skb, NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void abort_arp_failure(void *handle, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req = cplhdr(skb);
+	struct chtls_dev *cdev;
+
+	cdev = (struct chtls_dev *)handle;
+	req->cmd = CPL_ABORT_NO_RST;
+	cxgb4_ofld_send(cdev->lldi->ports[0], skb);
+}
+
+static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len)
+{
+	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
+		__skb_trim(skb, 0);
+		refcount_inc(&skb->users);
+	} else {
+		skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	}
+	return skb;
+}
+
+static void chtls_send_abort(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (!skb)
+		skb = alloc_ctrl_skb(csk->txdata_skb_cache, sizeof(*req));
+
+	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_ABORT_REQ, csk->tid);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	req->rsvd0 = htonl(tp->snd_nxt);
+	req->rsvd1 = !csk_flag_nochk(csk, CSK_TX_DATA_SENT);
+	req->cmd = mode;
+	t4_set_arp_err_handler(skb, csk->cdev, abort_arp_failure);
+	send_or_defer(sk, tp, skb, mode == CPL_ABORT_SEND_RST);
+}
+
+static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (unlikely(csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) ||
+		     !csk->cdev)) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			csk_set_flag(csk, CSK_RST_ABORTED);
+		goto out;
+	}
+
+	if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send tx flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_RPL_PENDING);
+	chtls_purge_write_queue(sk);
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+	if (sk->sk_state != TCP_SYN_RECV)
+		chtls_send_abort(sk, mode, skb);
+	else
+		chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W,
+					    TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0,
+					    TCB_FIELD_COOKIE_TFLAG, 1);
+
+	return;
+out:
+	kfree_skb(skb);
+}
+
+static void release_tcp_port(struct sock *sk)
+{
+	if (inet_csk(sk)->icsk_bind_hash)
+		inet_put_port(sk);
+}
+
+static void tcp_uncork(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tp->nonagle & TCP_NAGLE_CORK) {
+		tp->nonagle &= ~TCP_NAGLE_CORK;
+		chtls_tcp_push(sk, 0);
+	}
+}
+
+static void chtls_close_conn(struct sock *sk)
+{
+	struct cpl_close_con_req *req;
+	struct chtls_sock *csk;
+	struct sk_buff *skb;
+	unsigned int tid;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_close_con_req), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = csk->tid;
+
+	skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	req = (struct cpl_close_con_req *)__skb_put(skb, len);
+	memset(req, 0, len);
+	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_TP_WR) |
+			      FW_WR_IMMDLEN_V(sizeof(*req) -
+					      sizeof(req->wr)));
+	req->wr.wr_mid = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)) |
+			       FW_WR_FLOWID_V(tid));
+
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+
+	tcp_uncork(sk);
+	skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+	if (sk->sk_state != TCP_SYN_SENT)
+		chtls_push_frames(csk, 1);
+}
+
+/*
+ * Perform a state transition during close and return the actions indicated
+ * for the transition.  Do not make this function inline, the main reason
+ * it exists at all is to avoid multiple inlining of tcp_set_state.
+ */
+static int make_close_transition(struct sock *sk)
+{
+	int next = (int)new_state[sk->sk_state];
+
+	tcp_set_state(sk, next & TCP_STATE_MASK);
+	return next & TCP_ACTION_FIN;
+}
+
+void chtls_close(struct sock *sk, long timeout)
+{
+	int data_lost, prev_state;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	lock_sock(sk);
+	sk->sk_shutdown |= SHUTDOWN_MASK;
+
+	data_lost = skb_queue_len(&sk->sk_receive_queue);
+	data_lost |= skb_queue_len(&csk->tlshws.sk_recv_queue);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+
+	if (sk->sk_state == TCP_CLOSE) {
+		goto wait;
+	} else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		release_tcp_port(sk);
+		goto unlock;
+	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (make_close_transition(sk)) {
+		chtls_close_conn(sk);
+	}
+wait:
+	if (timeout)
+		sk_stream_wait_close(sk, timeout);
+
+unlock:
+	prev_state = sk->sk_state;
+	sock_hold(sk);
+	sock_orphan(sk);
+
+	release_sock(sk);
+
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (prev_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
+	    !csk_flag(sk, CSK_ABORT_SHUTDOWN)) {
+		struct sk_buff *skb;
+
+		skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+		if (skb)
+			chtls_send_reset(sk, CPL_ABORT_SEND_RST, skb);
+	}
+
+	if (sk->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(sk);
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	sock_put(sk);
+}
+
+/*
+ * Wait until a socket enters on of the given states.
+ */
+static int wait_for_states(struct sock *sk, unsigned int states)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct socket_wq _sk_wq;
+	long current_timeo;
+	int err = 0;
+
+	current_timeo = 200;
+
+	/*
+	 * We want this to work even when there's no associated struct socket.
+	 * In that case we provide a temporary wait_queue_head_t.
+	 */
+	if (!sk->sk_wq) {
+		init_waitqueue_head(&_sk_wq.wait);
+		_sk_wq.fasync_list = NULL;
+		init_rcu_head_on_stack(&_sk_wq.rcu);
+		RCU_INIT_POINTER(sk->sk_wq, &_sk_wq);
+	}
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (!sk_in_state(sk, states)) {
+		if (!current_timeo) {
+			err = -EBUSY;
+			break;
+		}
+		if (signal_pending(current)) {
+			err = sock_intr_errno(current_timeo);
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		release_sock(sk);
+		if (!sk_in_state(sk, states))
+			current_timeo = schedule_timeout(current_timeo);
+		__set_current_state(TASK_RUNNING);
+		lock_sock(sk);
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (rcu_dereference(sk->sk_wq) == &_sk_wq)
+		sk->sk_wq = NULL;
+	return err;
+}
+
+int chtls_disconnect(struct sock *sk, int flags)
+{
+	struct tcp_sock *tp;
+	int err;
+
+	tp = tcp_sk(sk);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	chtls_purge_write_queue(sk);
+
+	if (sk->sk_state != TCP_CLOSE) {
+		sk->sk_err = ECONNRESET;
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		err = wait_for_states(sk, TCPF_CLOSE);
+		if (err)
+			return err;
+	}
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	tp->max_window = 0xFFFF << (tp->rx_opt.snd_wscale);
+	return tcp_disconnect(sk, flags);
+}
+
+#define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
+				 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
+void chtls_shutdown(struct sock *sk, int how)
+{
+	if ((how & SEND_SHUTDOWN) &&
+	    sk_in_state(sk, SHUTDOWN_ELIGIBLE_STATE) &&
+	    make_close_transition(sk))
+		chtls_close_conn(sk);
+}
+
+void chtls_destroy_sock(struct sock *sk)
+{
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	chtls_purge_recv_queue(sk);
+	csk->ulp_mode = ULP_MODE_NONE;
+	chtls_purge_write_queue(sk);
+	free_tls_keyid(sk);
+	kref_put(&csk->kref, chtls_sock_release);
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &tcp_prot;
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		sk->sk_prot = &tcpv6_prot;
+#endif
+	sk->sk_prot->destroy(sk);
+}
+
+static void reset_listen_child(struct sock *child)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(child);
+	struct sk_buff *skb;
+
+	skb = alloc_ctrl_skb(csk->txdata_skb_cache,
+			     sizeof(struct cpl_abort_req));
+
+	chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
+	sock_orphan(child);
+	INC_ORPHAN_COUNT(child);
+	if (child->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(child);
+}
+
+static void chtls_disconnect_acceptq(struct sock *listen_sk)
+{
+	struct request_sock **pprev;
+
+	pprev = ACCEPT_QUEUE(listen_sk);
+	while (*pprev) {
+		struct request_sock *req = *pprev;
+
+		if (req->rsk_ops == &chtls_rsk_ops ||
+		    req->rsk_ops == &chtls_rsk_opsv6) {
+			struct sock *child = req->sk;
+
+			*pprev = req->dl_next;
+			sk_acceptq_removed(listen_sk);
+			reqsk_put(req);
+			sock_hold(child);
+			local_bh_disable();
+			bh_lock_sock(child);
+			release_tcp_port(child);
+			reset_listen_child(child);
+			bh_unlock_sock(child);
+			local_bh_enable();
+			sock_put(child);
+		} else {
+			pprev = &req->dl_next;
+		}
+	}
+}
+
+static int listen_hashfn(const struct sock *sk)
+{
+	return ((unsigned long)sk >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+}
+
+static struct listen_info *listen_hash_add(struct chtls_dev *cdev,
+					   struct sock *sk,
+					   unsigned int stid)
+{
+	struct listen_info *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (p) {
+		int key = listen_hashfn(sk);
+
+		p->sk = sk;
+		p->stid = stid;
+		spin_lock(&cdev->listen_lock);
+		p->next = cdev->listen_hash_tab[key];
+		cdev->listen_hash_tab[key] = p;
+		spin_unlock(&cdev->listen_lock);
+	}
+	return p;
+}
+
+static int listen_hash_find(struct chtls_dev *cdev,
+			    struct sock *sk)
+{
+	struct listen_info *p;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+
+	spin_lock(&cdev->listen_lock);
+	for (p = cdev->listen_hash_tab[key]; p; p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static int listen_hash_del(struct chtls_dev *cdev,
+			   struct sock *sk)
+{
+	struct listen_info *p, **prev;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+	prev = &cdev->listen_hash_tab[key];
+
+	spin_lock(&cdev->listen_lock);
+	for (p = *prev; p; prev = &p->next, p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			*prev = p->next;
+			kfree(p);
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static void cleanup_syn_rcv_conn(struct sock *child, struct sock *parent)
+{
+	struct request_sock *req;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(child);
+	req = csk->passive_reap_next;
+
+	reqsk_queue_removed(&inet_csk(parent)->icsk_accept_queue, req);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+	chtls_reqsk_free(req);
+	csk->passive_reap_next = NULL;
+}
+
+static void chtls_reset_synq(struct listen_ctx *listen_ctx)
+{
+	struct sock *listen_sk = listen_ctx->lsk;
+
+	while (!skb_queue_empty(&listen_ctx->synq)) {
+		struct chtls_sock *csk =
+			container_of((struct synq *)skb_peek
+				(&listen_ctx->synq), struct chtls_sock, synq);
+		struct sock *child = csk->sk;
+
+		cleanup_syn_rcv_conn(child, listen_sk);
+		sock_hold(child);
+		local_bh_disable();
+		bh_lock_sock(child);
+		release_tcp_port(child);
+		reset_listen_child(child);
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+	}
+}
+
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct net_device *ndev;
+#if IS_ENABLED(CONFIG_IPV6)
+	bool clip_valid = false;
+#endif
+	struct listen_ctx *ctx;
+	struct adapter *adap;
+	struct port_info *pi;
+	int ret = 0;
+	int stid;
+
+	rcu_read_lock();
+	ndev = chtls_find_netdev(cdev, sk);
+	rcu_read_unlock();
+	if (!ndev)
+		return -EBADF;
+
+	pi = netdev_priv(ndev);
+	adap = pi->adapter;
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE))
+		return -EBADF;
+
+	if (listen_hash_find(cdev, sk) >= 0)   /* already have it */
+		return -EADDRINUSE;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	__module_get(THIS_MODULE);
+	ctx->lsk = sk;
+	ctx->cdev = cdev;
+	ctx->state = T4_LISTEN_START_PENDING;
+	skb_queue_head_init(&ctx->synq);
+
+	stid = cxgb4_alloc_stid(cdev->tids, sk->sk_family, ctx);
+	if (stid < 0)
+		goto free_ctx;
+
+	sock_hold(sk);
+	if (!listen_hash_add(cdev, sk, stid))
+		goto free_stid;
+
+	if (sk->sk_family == PF_INET) {
+		ret = cxgb4_create_server(ndev, stid,
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_sport, 0,
+					  cdev->lldi->rxq_ids[0]);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		int addr_type;
+
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY) {
+			ret = cxgb4_clip_get(ndev, (const u32 *)
+					     &sk->sk_v6_rcv_saddr, 1);
+			if (ret)
+				goto del_hash;
+			clip_valid = true;
+		}
+		ret = cxgb4_create_server6(ndev, stid,
+					   &sk->sk_v6_rcv_saddr,
+					   inet_sk(sk)->inet_sport,
+					   cdev->lldi->rxq_ids[0]);
+#endif
+	}
+	if (ret > 0)
+		ret = net_xmit_errno(ret);
+	if (ret)
+		goto del_hash;
+	return 0;
+del_hash:
+#if IS_ENABLED(CONFIG_IPV6)
+	if (clip_valid)
+		cxgb4_clip_release(ndev, (const u32 *)&sk->sk_v6_rcv_saddr, 1);
+#endif
+	listen_hash_del(cdev, sk);
+free_stid:
+	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
+	sock_put(sk);
+free_ctx:
+	kfree(ctx);
+	module_put(THIS_MODULE);
+	return -EBADF;
+}
+
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct listen_ctx *listen_ctx;
+	int stid;
+
+	stid = listen_hash_del(cdev, sk);
+	if (stid < 0)
+		return;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	chtls_reset_synq(listen_ctx);
+
+	cxgb4_remove_server(cdev->lldi->ports[0], stid,
+			    cdev->lldi->rxq_ids[0], sk->sk_family == PF_INET6);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == PF_INET6) {
+		struct net_device *ndev = chtls_find_netdev(cdev, sk);
+		int addr_type = 0;
+
+		addr_type = ipv6_addr_type((const struct in6_addr *)
+					  &sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY)
+			cxgb4_clip_release(ndev, (const u32 *)
+					   &sk->sk_v6_rcv_saddr, 1);
+	}
+#endif
+	chtls_disconnect_acceptq(sk);
+}
+
+static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_open_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int stid = GET_TID(rpl);
+	struct listen_ctx *listen_ctx;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	if (!listen_ctx)
+		return CPL_RET_BUF_DONE;
+
+	if (listen_ctx->state == T4_LISTEN_START_PENDING) {
+		listen_ctx->state = T4_LISTEN_STARTED;
+		return CPL_RET_BUF_DONE;
+	}
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
+			rpl->status, stid);
+	} else {
+		cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+		sock_put(listen_ctx->lsk);
+		kfree(listen_ctx);
+		module_put(THIS_MODULE);
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *listen_ctx;
+	unsigned int stid;
+	void *data;
+
+	stid = GET_TID(rpl);
+	data = lookup_stid(cdev->tids, stid);
+	listen_ctx = (struct listen_ctx *)data;
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
+			rpl->status, stid);
+	} else {
+		cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+		sock_put(listen_ctx->lsk);
+		kfree(listen_ctx);
+		module_put(THIS_MODULE);
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+static void chtls_purge_wr_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = dequeue_wr(sk)) != NULL)
+		kfree_skb(skb);
+}
+
+static void chtls_release_resources(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	unsigned int tid = csk->tid;
+	struct tid_info *tids;
+
+	if (!cdev)
+		return;
+
+	tids = cdev->tids;
+	kfree_skb(csk->txdata_skb_cache);
+	csk->txdata_skb_cache = NULL;
+
+	if (csk->wr_credits != csk->wr_max_credits) {
+		chtls_purge_wr_queue(sk);
+		chtls_reset_wr_list(csk);
+	}
+
+	if (csk->l2t_entry) {
+		cxgb4_l2t_release(csk->l2t_entry);
+		csk->l2t_entry = NULL;
+	}
+
+	if (sk->sk_state != TCP_SYN_SENT) {
+		cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family);
+		sock_put(sk);
+	}
+}
+
+static void chtls_conn_done(struct sock *sk)
+{
+	if (sock_flag(sk, SOCK_DEAD))
+		chtls_purge_receive_queue(sk);
+	sk_wakeup_sleepers(sk, 0);
+	tcp_done(sk);
+}
+
+static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
+{
+	/*
+	 * If the server is still open we clean up the child connection,
+	 * otherwise the server already did the clean up as it was purging
+	 * its SYN queue and the skb was just sitting in its backlog.
+	 */
+	if (likely(parent->sk_state == TCP_LISTEN)) {
+		cleanup_syn_rcv_conn(child, parent);
+		/* Without the below call to sock_orphan,
+		 * we leak the socket resource with syn_flood test
+		 * as inet_csk_destroy_sock will not be called
+		 * in tcp_done since SOCK_DEAD flag is not set.
+		 * Kernel handles this differently where new socket is
+		 * created only after 3 way handshake is done.
+		 */
+		sock_orphan(child);
+		INC_ORPHAN_COUNT(child);
+		chtls_release_resources(child);
+		chtls_conn_done(child);
+	} else {
+		if (csk_flag(child, CSK_RST_ABORTED)) {
+			chtls_release_resources(child);
+			chtls_conn_done(child);
+		}
+	}
+}
+
+static void pass_open_abort(struct sock *child, struct sock *parent,
+			    struct sk_buff *skb)
+{
+	do_abort_syn_rcv(child, parent);
+	kfree_skb(skb);
+}
+
+static void bl_pass_open_abort(struct sock *lsk, struct sk_buff *skb)
+{
+	pass_open_abort(skb->sk, lsk, skb);
+}
+
+static void chtls_pass_open_arp_failure(struct sock *sk,
+					struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *parent;
+	void *data;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	/*
+	 * If the connection is being aborted due to the parent listening
+	 * socket going away there's nothing to do, the ABORT_REQ will close
+	 * the connection.
+	 */
+	if (csk_flag(sk, CSK_ABORT_RPL_PENDING)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	oreq = csk->passive_reap_next;
+	data = lookup_stid(cdev->tids, oreq->ts_recent);
+	parent = ((struct listen_ctx *)data)->lsk;
+
+	bh_lock_sock(parent);
+	if (!sock_owned_by_user(parent)) {
+		pass_open_abort(sk, parent, skb);
+	} else {
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_pass_open_abort;
+		__sk_add_backlog(parent, skb);
+	}
+	bh_unlock_sock(parent);
+}
+
+static void chtls_accept_rpl_arp_failure(void *handle,
+					 struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)handle;
+
+	sock_hold(sk);
+	process_cpl_msg(chtls_pass_open_arp_failure, sk, skb);
+	sock_put(sk);
+}
+
+static unsigned int chtls_select_mss(const struct chtls_sock *csk,
+				     unsigned int pmtu,
+				     struct cpl_pass_accept_req *req)
+{
+	struct chtls_dev *cdev;
+	struct dst_entry *dst;
+	unsigned int tcpoptsz;
+	unsigned int iphdrsz;
+	unsigned int mtu_idx;
+	struct tcp_sock *tp;
+	unsigned int mss;
+	struct sock *sk;
+
+	mss = ntohs(req->tcpopt.mss);
+	sk = csk->sk;
+	dst = __sk_dst_get(sk);
+	cdev = csk->cdev;
+	tp = tcp_sk(sk);
+	tcpoptsz = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		iphdrsz = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+	else
+#endif
+		iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
+	if (req->tcpopt.tstamp)
+		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
+
+	tp->advmss = dst_metric_advmss(dst);
+	if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
+		tp->advmss = USER_MSS(tp);
+	if (tp->advmss > pmtu - iphdrsz)
+		tp->advmss = pmtu - iphdrsz;
+	if (mss && tp->advmss > mss)
+		tp->advmss = mss;
+
+	tp->advmss = cxgb4_best_aligned_mtu(cdev->lldi->mtus,
+					    iphdrsz + tcpoptsz,
+					    tp->advmss - tcpoptsz,
+					    8, &mtu_idx);
+	tp->advmss -= iphdrsz;
+
+	inet_csk(sk)->icsk_pmtu_cookie = pmtu;
+	return mtu_idx;
+}
+
+static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp)
+{
+	int wscale = 0;
+
+	if (space > MAX_RCV_WND)
+		space = MAX_RCV_WND;
+	if (win_clamp && win_clamp < space)
+		space = win_clamp;
+
+	if (wscale_ok) {
+		while (wscale < 14 && (65535 << wscale) < space)
+			wscale++;
+	}
+	return wscale;
+}
+
+static void chtls_pass_accept_rpl(struct sk_buff *skb,
+				  struct cpl_pass_accept_req *req,
+				  unsigned int tid)
+
+{
+	struct cpl_t5_pass_accept_rpl *rpl5;
+	struct cxgb4_lld_info *lldi;
+	const struct tcphdr *tcph;
+	const struct tcp_sock *tp;
+	struct chtls_sock *csk;
+	unsigned int len;
+	struct sock *sk;
+	u32 opt2, hlen;
+	u64 opt0;
+
+	sk = skb->sk;
+	tp = tcp_sk(sk);
+	csk = sk->sk_user_data;
+	csk->tid = tid;
+	lldi = csk->cdev->lldi;
+	len = roundup(sizeof(*rpl5), 16);
+
+	rpl5 = __skb_put_zero(skb, len);
+	INIT_TP_WR(rpl5, tid);
+
+	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						     csk->tid));
+	csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)),
+					req);
+	opt0 = TCAM_BYPASS_F |
+	       WND_SCALE_V(RCV_WSCALE(tp)) |
+	       MSS_IDX_V(csk->mtu_idx) |
+	       L2T_IDX_V(csk->l2t_entry->idx) |
+	       NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) |
+	       TX_CHAN_V(csk->tx_chan) |
+	       SMAC_SEL_V(csk->smac_idx) |
+	       DSCP_V(csk->tos >> 2) |
+	       ULP_MODE_V(ULP_MODE_TLS) |
+	       RCV_BUFSIZ_V(min(tp->rcv_wnd >> 10, RCV_BUFSIZ_M));
+
+	opt2 = RX_CHANNEL_V(0) |
+		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
+
+	if (!is_t5(lldi->adapter_type))
+		opt2 |= RX_FC_DISABLE_F;
+	if (req->tcpopt.tstamp)
+		opt2 |= TSTAMPS_EN_F;
+	if (req->tcpopt.sack)
+		opt2 |= SACK_EN_F;
+	hlen = ntohl(req->hdr_len);
+
+	tcph = (struct tcphdr *)((u8 *)(req + 1) +
+			T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
+	if (tcph->ece && tcph->cwr)
+		opt2 |= CCTRL_ECN_V(1);
+	opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);
+	opt2 |= T5_ISS_F;
+	opt2 |= T5_OPT_2_VALID_F;
+	opt2 |= WND_SCALE_EN_V(WSCALE_OK(tp));
+	rpl5->opt0 = cpu_to_be64(opt0);
+	rpl5->opt2 = cpu_to_be32(opt2);
+	rpl5->iss = cpu_to_be32((get_random_u32() & ~7UL) - 1);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->port_id);
+	t4_set_arp_err_handler(skb, sk, chtls_accept_rpl_arp_failure);
+	cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+}
+
+static void inet_inherit_port(struct sock *lsk, struct sock *newsk)
+{
+	local_bh_disable();
+	__inet_inherit_port(lsk, newsk);
+	local_bh_enable();
+}
+
+static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb->protocol) {
+		kfree_skb(skb);
+		return 0;
+	}
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static void chtls_set_tcp_window(struct chtls_sock *csk)
+{
+	struct net_device *ndev = csk->egress_dev;
+	struct port_info *pi = netdev_priv(ndev);
+	unsigned int linkspeed;
+	u8 scale;
+
+	linkspeed = pi->link_cfg.speed;
+	scale = linkspeed / SPEED_10000;
+#define CHTLS_10G_RCVWIN (256 * 1024)
+	csk->rcv_win = CHTLS_10G_RCVWIN;
+	if (scale)
+		csk->rcv_win *= scale;
+#define CHTLS_10G_SNDWIN (256 * 1024)
+	csk->snd_win = CHTLS_10G_SNDWIN;
+	if (scale)
+		csk->snd_win *= scale;
+}
+
+static struct sock *chtls_recv_sock(struct sock *lsk,
+				    struct request_sock *oreq,
+				    void *network_hdr,
+				    const struct cpl_pass_accept_req *req,
+				    struct chtls_dev *cdev)
+{
+	struct adapter *adap = pci_get_drvdata(cdev->pdev);
+	struct neighbour *n = NULL;
+	struct inet_sock *newinet;
+	const struct iphdr *iph;
+	struct tls_context *ctx;
+	struct net_device *ndev;
+	struct chtls_sock *csk;
+	struct dst_entry *dst;
+	struct tcp_sock *tp;
+	struct sock *newsk;
+	bool found = false;
+	u16 port_id;
+	int rxq_idx;
+	int step, i;
+
+	iph = (const struct iphdr *)network_hdr;
+	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
+	if (!newsk)
+		goto free_oreq;
+
+	if (lsk->sk_family == AF_INET) {
+		dst = inet_csk_route_child_sock(lsk, newsk, oreq);
+		if (!dst)
+			goto free_sk;
+
+		n = dst_neigh_lookup(dst, &iph->saddr);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		const struct ipv6hdr *ip6h;
+		struct flowi6 fl6;
+
+		ip6h = (const struct ipv6hdr *)network_hdr;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_TCP;
+		fl6.saddr = ip6h->daddr;
+		fl6.daddr = ip6h->saddr;
+		fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port;
+		fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num);
+		security_req_classify_flow(oreq, flowi6_to_flowi_common(&fl6));
+		dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL);
+		if (IS_ERR(dst))
+			goto free_sk;
+		n = dst_neigh_lookup(dst, &ip6h->saddr);
+#endif
+	}
+	if (!n || !n->dev)
+		goto free_dst;
+
+	ndev = n->dev;
+	if (is_vlan_dev(ndev))
+		ndev = vlan_dev_real_dev(ndev);
+
+	for_each_port(adap, i)
+		if (cdev->ports[i] == ndev)
+			found = true;
+
+	if (!found)
+		goto free_dst;
+
+	port_id = cxgb4_port_idx(ndev);
+
+	csk = chtls_sock_create(cdev);
+	if (!csk)
+		goto free_dst;
+
+	csk->l2t_entry = cxgb4_l2t_get(cdev->lldi->l2t, n, ndev, 0);
+	if (!csk->l2t_entry)
+		goto free_csk;
+
+	newsk->sk_user_data = csk;
+	newsk->sk_backlog_rcv = chtls_backlog_rcv;
+
+	tp = tcp_sk(newsk);
+	newinet = inet_sk(newsk);
+
+	if (iph->version == 0x4) {
+		newinet->inet_daddr = iph->saddr;
+		newinet->inet_rcv_saddr = iph->daddr;
+		newinet->inet_saddr = iph->daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct tcp6_sock *newtcp6sk = (struct tcp6_sock *)newsk;
+		struct inet_request_sock *treq = inet_rsk(oreq);
+		struct ipv6_pinfo *newnp = inet6_sk(newsk);
+		struct ipv6_pinfo *np = inet6_sk(lsk);
+
+		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+		newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
+		newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
+		inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
+		newnp->ipv6_fl_list = NULL;
+		newnp->pktoptions = NULL;
+		newsk->sk_bound_dev_if = treq->ir_iif;
+		newinet->inet_opt = NULL;
+		newinet->inet_daddr = LOOPBACK4_IPV6;
+		newinet->inet_saddr = LOOPBACK4_IPV6;
+#endif
+	}
+
+	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	sk_setup_caps(newsk, dst);
+	ctx = tls_get_ctx(lsk);
+	newsk->sk_destruct = ctx->sk_destruct;
+	newsk->sk_prot_creator = lsk->sk_prot_creator;
+	csk->sk = newsk;
+	csk->passive_reap_next = oreq;
+	csk->tx_chan = cxgb4_port_chan(ndev);
+	csk->port_id = port_id;
+	csk->egress_dev = ndev;
+	csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+	chtls_set_tcp_window(csk);
+	tp->rcv_wnd = csk->rcv_win;
+	csk->sndbuf = csk->snd_win;
+	csk->ulp_mode = ULP_MODE_TLS;
+	step = cdev->lldi->nrxq / cdev->lldi->nchan;
+	rxq_idx = port_id * step;
+	rxq_idx += cdev->round_robin_cnt++ % step;
+	csk->rss_qid = cdev->lldi->rxq_ids[rxq_idx];
+	csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx :
+			port_id * step;
+	csk->sndbuf = newsk->sk_sndbuf;
+	csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
+	RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
+					   READ_ONCE(sock_net(newsk)->
+						     ipv4.sysctl_tcp_window_scaling),
+					   tp->window_clamp);
+	neigh_release(n);
+	inet_inherit_port(lsk, newsk);
+	csk_set_flag(csk, CSK_CONN_INLINE);
+	bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
+
+	return newsk;
+free_csk:
+	chtls_sock_release(&csk->kref);
+free_dst:
+	if (n)
+		neigh_release(n);
+	dst_release(dst);
+free_sk:
+	inet_csk_prepare_forced_close(newsk);
+	tcp_done(newsk);
+free_oreq:
+	chtls_reqsk_free(oreq);
+	return NULL;
+}
+
+/*
+ * Populate a TID_RELEASE WR.  The skb must be already propely sized.
+ */
+static  void mk_tid_release(struct sk_buff *skb,
+			    unsigned int chan, unsigned int tid)
+{
+	struct cpl_tid_release *req;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_tid_release), 16);
+	req = (struct cpl_tid_release *)__skb_put(skb, len);
+	memset(req, 0, len);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
+	INIT_TP_WR_CPL(req, CPL_TID_RELEASE, tid);
+}
+
+static int chtls_get_module(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (!try_module_get(icsk->icsk_ulp_ops->owner))
+		return -1;
+
+	return 0;
+}
+
+static void chtls_pass_accept_request(struct sock *sk,
+				      struct sk_buff *skb)
+{
+	struct cpl_t5_pass_accept_rpl *rpl;
+	struct cpl_pass_accept_req *req;
+	struct listen_ctx *listen_ctx;
+	struct vlan_ethhdr *vlan_eh;
+	struct request_sock *oreq;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	struct sock *newsk;
+	struct ethhdr *eh;
+	struct iphdr *iph;
+	void *network_hdr;
+	unsigned int stid;
+	unsigned int len;
+	unsigned int tid;
+	bool th_ecn, ect;
+	__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
+	u16 eth_hdr_len;
+	bool ecn_ok;
+
+	req = cplhdr(skb) + RSS_HDR;
+	tid = GET_TID(req);
+	cdev = BLOG_SKB_CB(skb)->cdev;
+	newsk = lookup_tid(cdev->tids, tid);
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	if (newsk) {
+		pr_info("tid (%d) already in use\n", tid);
+		return;
+	}
+
+	len = roundup(sizeof(*rpl), 16);
+	reply_skb = alloc_skb(len, GFP_ATOMIC);
+	if (!reply_skb) {
+		cxgb4_remove_tid(cdev->tids, 0, tid, sk->sk_family);
+		kfree_skb(skb);
+		return;
+	}
+
+	if (sk->sk_state != TCP_LISTEN)
+		goto reject;
+
+	if (inet_csk_reqsk_queue_is_full(sk))
+		goto reject;
+
+	if (sk_acceptq_is_full(sk))
+		goto reject;
+
+
+	eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len));
+	if (eth_hdr_len == ETH_HLEN) {
+		eh = (struct ethhdr *)(req + 1);
+		iph = (struct iphdr *)(eh + 1);
+		ip6h = (struct ipv6hdr *)(eh + 1);
+		network_hdr = (void *)(eh + 1);
+	} else {
+		vlan_eh = (struct vlan_ethhdr *)(req + 1);
+		iph = (struct iphdr *)(vlan_eh + 1);
+		ip6h = (struct ipv6hdr *)(vlan_eh + 1);
+		network_hdr = (void *)(vlan_eh + 1);
+	}
+
+	if (iph->version == 0x4) {
+		tcph = (struct tcphdr *)(iph + 1);
+		skb_set_network_header(skb, (void *)iph - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
+	} else {
+		tcph = (struct tcphdr *)(ip6h + 1);
+		skb_set_network_header(skb, (void *)ip6h - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_opsv6, sk, false);
+	}
+
+	if (!oreq)
+		goto reject;
+
+	oreq->rsk_rcv_wnd = 0;
+	oreq->rsk_window_clamp = 0;
+	oreq->syncookie = 0;
+	oreq->mss = 0;
+	oreq->ts_recent = 0;
+
+	tcp_rsk(oreq)->tfo_listener = false;
+	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
+	chtls_set_req_port(oreq, tcph->source, tcph->dest);
+	if (iph->version == 0x4) {
+		chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
+		ip_dsfield = ipv4_get_dsfield(iph);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		inet_rsk(oreq)->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+		inet_rsk(oreq)->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+		ip_dsfield = ipv6_get_dsfield(ipv6_hdr(skb));
+#endif
+	}
+	if (req->tcpopt.wsf <= 14 &&
+	    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+		inet_rsk(oreq)->wscale_ok = 1;
+		inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
+	}
+	inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if;
+	th_ecn = tcph->ece && tcph->cwr;
+	if (th_ecn) {
+		ect = !INET_ECN_is_not_ect(ip_dsfield);
+		ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
+		if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
+			inet_rsk(oreq)->ecn_ok = 1;
+	}
+
+	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
+	if (!newsk)
+		goto reject;
+
+	if (chtls_get_module(newsk))
+		goto reject;
+	inet_csk_reqsk_queue_added(sk);
+	reply_skb->sk = newsk;
+	chtls_install_cpl_ops(newsk);
+	cxgb4_insert_tid(cdev->tids, newsk, tid, newsk->sk_family);
+	csk = rcu_dereference_sk_user_data(newsk);
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	csk->listen_ctx = listen_ctx;
+	__skb_queue_tail(&listen_ctx->synq, (struct sk_buff *)&csk->synq);
+	chtls_pass_accept_rpl(reply_skb, req, tid);
+	kfree_skb(skb);
+	return;
+
+reject:
+	mk_tid_release(reply_skb, 0, tid);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
+ */
+static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_accept_req *req = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *ctx;
+	unsigned int stid;
+	unsigned int tid;
+	struct sock *lsk;
+	void *data;
+
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	tid = GET_TID(req);
+
+	data = lookup_stid(cdev->tids, stid);
+	if (!data)
+		return 1;
+
+	ctx = (struct listen_ctx *)data;
+	lsk = ctx->lsk;
+
+	if (unlikely(tid_out_of_range(cdev->tids, tid))) {
+		pr_info("passive open TID %u too large\n", tid);
+		return 1;
+	}
+
+	BLOG_SKB_CB(skb)->cdev = cdev;
+	process_cpl_msg(chtls_pass_accept_request, lsk, skb);
+	return 0;
+}
+
+/*
+ * Completes some final bits of initialization for just established connections
+ * and changes their state to TCP_ESTABLISHED.
+ *
+ * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
+ */
+static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->pushed_seq = snd_isn;
+	tp->write_seq = snd_isn;
+	tp->snd_nxt = snd_isn;
+	tp->snd_una = snd_isn;
+	inet_sk(sk)->inet_id = get_random_u16();
+	assign_rxopt(sk, opt);
+
+	if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
+		tp->rcv_wup -= tp->rcv_wnd - (RCV_BUFSIZ_M << 10);
+
+	smp_mb();
+	tcp_set_state(sk, TCP_ESTABLISHED);
+}
+
+static void chtls_abort_conn(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *abort_skb;
+
+	abort_skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+	if (abort_skb)
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, abort_skb);
+}
+
+static struct sock *reap_list;
+static DEFINE_SPINLOCK(reap_list_lock);
+
+/*
+ * Process the reap list.
+ */
+DECLARE_TASK_FUNC(process_reap_list, task_param)
+{
+	spin_lock_bh(&reap_list_lock);
+	while (reap_list) {
+		struct sock *sk = reap_list;
+		struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+		reap_list = csk->passive_reap_next;
+		csk->passive_reap_next = NULL;
+		spin_unlock(&reap_list_lock);
+		sock_hold(sk);
+
+		bh_lock_sock(sk);
+		chtls_abort_conn(sk, NULL);
+		sock_orphan(sk);
+		if (sk->sk_state == TCP_CLOSE)
+			inet_csk_destroy_sock(sk);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		spin_lock(&reap_list_lock);
+	}
+	spin_unlock_bh(&reap_list_lock);
+}
+
+static DECLARE_WORK(reap_task, process_reap_list);
+
+static void add_to_reap_list(struct sock *sk)
+{
+	struct chtls_sock *csk = sk->sk_user_data;
+
+	local_bh_disable();
+	release_tcp_port(sk); /* release the port immediately */
+
+	spin_lock(&reap_list_lock);
+	csk->passive_reap_next = reap_list;
+	reap_list = sk;
+	if (!csk->passive_reap_next)
+		schedule_work(&reap_task);
+	spin_unlock(&reap_list_lock);
+	local_bh_enable();
+}
+
+static void add_pass_open_to_parent(struct sock *child, struct sock *lsk,
+				    struct chtls_dev *cdev)
+{
+	struct request_sock *oreq;
+	struct chtls_sock *csk;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return;
+
+	csk = child->sk_user_data;
+	oreq = csk->passive_reap_next;
+	csk->passive_reap_next = NULL;
+
+	reqsk_queue_removed(&inet_csk(lsk)->icsk_accept_queue, oreq);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+
+	if (sk_acceptq_is_full(lsk)) {
+		chtls_reqsk_free(oreq);
+		add_to_reap_list(child);
+	} else {
+		refcount_set(&oreq->rsk_refcnt, 1);
+		inet_csk_reqsk_queue_add(lsk, oreq, child);
+		lsk->sk_data_ready(lsk);
+	}
+}
+
+static void bl_add_pass_open_to_parent(struct sock *lsk, struct sk_buff *skb)
+{
+	struct sock *child = skb->sk;
+
+	skb->sk = NULL;
+	add_pass_open_to_parent(child, lsk, BLOG_SKB_CB(skb)->cdev);
+	kfree_skb(skb);
+}
+
+static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_establish *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct sock *lsk, *sk;
+	unsigned int hwtid;
+
+	hwtid = GET_TID(req);
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		kfree_skb(skb);
+	} else {
+		unsigned int stid;
+		void *data;
+
+		csk = sk->sk_user_data;
+		csk->wr_max_credits = 64;
+		csk->wr_credits = 64;
+		csk->wr_unacked = 0;
+		make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+		stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+		sk->sk_state_change(sk);
+		if (unlikely(sk->sk_socket))
+			sk_wake_async(sk, 0, POLL_OUT);
+
+		data = lookup_stid(cdev->tids, stid);
+		if (!data) {
+			/* listening server close */
+			kfree_skb(skb);
+			goto unlock;
+		}
+		lsk = ((struct listen_ctx *)data)->lsk;
+
+		bh_lock_sock(lsk);
+		if (unlikely(skb_queue_empty(&csk->listen_ctx->synq))) {
+			/* removed from synq */
+			bh_unlock_sock(lsk);
+			kfree_skb(skb);
+			goto unlock;
+		}
+
+		if (likely(!sock_owned_by_user(lsk))) {
+			kfree_skb(skb);
+			add_pass_open_to_parent(sk, lsk, cdev);
+		} else {
+			skb->sk = sk;
+			BLOG_SKB_CB(skb)->cdev = cdev;
+			BLOG_SKB_CB(skb)->backlog_rcv =
+				bl_add_pass_open_to_parent;
+			__sk_add_backlog(lsk, skb);
+		}
+		bh_unlock_sock(lsk);
+	}
+unlock:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+/*
+ * Handle receipt of an urgent pointer.
+ */
+static void handle_urg_ptr(struct sock *sk, u32 urg_seq)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	urg_seq--;
+	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
+		return;	/* duplicate pointer */
+
+	sk_send_sigurg(sk);
+	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+	    !sock_flag(sk, SOCK_URGINLINE) &&
+	    tp->copied_seq != tp->rcv_nxt) {
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+		tp->copied_seq++;
+		if (skb && tp->copied_seq - ULP_SKB_CB(skb)->seq >= skb->len)
+			chtls_free_skb(sk, skb);
+	}
+
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = urg_seq;
+}
+
+static void check_sk_callbacks(struct chtls_sock *csk)
+{
+	struct sock *sk = csk->sk;
+
+	if (unlikely(sk->sk_user_data &&
+		     !csk_flag_nochk(csk, CSK_CALLBACKS_CHKD)))
+		csk_set_flag(csk, CSK_CALLBACKS_CHKD);
+}
+
+/*
+ * Handles Rx data that arrives in a state where the socket isn't accepting
+ * new data.
+ */
+static void handle_excess_rx(struct sock *sk, struct sk_buff *skb)
+{
+	if (!csk_flag(sk, CSK_ABORT_SHUTDOWN))
+		chtls_abort_conn(sk, skb);
+
+	kfree_skb(skb);
+}
+
+static void chtls_recv_data(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_rx_data *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->psh = hdr->psh;
+	skb_ulp_mode(skb) = ULP_MODE_NONE;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr) + RSS_HDR);
+	if (!skb->data_len)
+		__skb_trim(skb, ntohs(hdr->len));
+
+	if (unlikely(hdr->urg))
+		handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg));
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET &&
+		     tp->urg_seq - tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	if (unlikely(hdr->dack_mode != csk->delack_mode)) {
+		csk->delack_mode = hdr->dack_mode;
+		csk->delack_seq = tp->rcv_nxt;
+	}
+
+	tcp_hdr(skb)->fin = 0;
+	tp->rcv_nxt += skb->len;
+
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_data *req = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_data, sk, skb);
+	return 0;
+}
+
+static void chtls_recv_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_tls_data *hdr = cplhdr(skb);
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+	skb_ulp_mode(skb) = ULP_MODE_TLS;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr));
+	if (!skb->data_len)
+		__skb_trim(skb,
+			   CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd)));
+
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq -
+		     tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	tcp_hdr(skb)->fin = 0;
+	tlsk->pldlen = CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd));
+	__skb_queue_tail(&tlsk->sk_recv_queue, skb);
+}
+
+static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_tls_data *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_pdu, sk, skb);
+	return 0;
+}
+
+static void chtls_set_hdrlen(struct sk_buff *skb, unsigned int nlen)
+{
+	struct tlsrx_cmp_hdr *tls_cmp_hdr = cplhdr(skb);
+
+	skb->hdr_len = ntohs((__force __be16)tls_cmp_hdr->length);
+	tls_cmp_hdr->length = ntohs((__force __be16)nlen);
+}
+
+static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
+{
+	struct tlsrx_cmp_hdr *tls_hdr_pkt;
+	struct cpl_rx_tls_cmp *cmp_cpl;
+	struct sk_buff *skb_rec;
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	cmp_cpl = cplhdr(skb);
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = ntohl(cmp_cpl->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*cmp_cpl));
+	tls_hdr_pkt = (struct tlsrx_cmp_hdr *)skb->data;
+	if (tls_hdr_pkt->res_to_mac_error & TLSRX_HDR_PKT_ERROR_M)
+		tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
+	if (!skb->data_len)
+		__skb_trim(skb, TLS_HEADER_LENGTH);
+
+	tp->rcv_nxt +=
+		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
+
+	ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_HDR;
+	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
+	if (!skb_rec) {
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+	} else {
+		chtls_set_hdrlen(skb, tlsk->pldlen);
+		tlsk->pldlen = 0;
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		__skb_queue_tail(&sk->sk_receive_queue, skb_rec);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_tls_cmp *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_rx_hdr, sk, skb);
+
+	return 0;
+}
+
+static void chtls_timewait(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->rcv_nxt++;
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+	tp->srtt_us = 0;
+	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+}
+
+static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+		goto out;
+
+	sk->sk_shutdown |= RCV_SHUTDOWN;
+	sock_set_flag(sk, SOCK_DONE);
+
+	switch (sk->sk_state) {
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		tcp_set_state(sk, TCP_CLOSE_WAIT);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_CLOSING);
+		break;
+	case TCP_FIN_WAIT2:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	default:
+		pr_info("cpl_peer_close in bad state %d\n", sk->sk_state);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		/* Do not send POLL_HUP for half duplex close. */
+
+		if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+		    sk->sk_state == TCP_CLOSE)
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+		else
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	}
+out:
+	kfree_skb(skb);
+}
+
+static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_close_con_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+		goto out;
+
+	tp = tcp_sk(sk);
+
+	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
+
+	switch (sk->sk_state) {
+	case TCP_CLOSING:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	case TCP_LAST_ACK:
+		chtls_release_resources(sk);
+		chtls_conn_done(sk);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_FIN_WAIT2);
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		else if (tcp_sk(sk)->linger2 < 0 &&
+			 !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN))
+			chtls_abort_conn(sk, skb);
+		else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT))
+			chtls_set_quiesce_ctrl(sk, 0);
+		break;
+	default:
+		pr_info("close_con_rpl in bad state %d\n", sk->sk_state);
+	}
+out:
+	kfree_skb(skb);
+}
+
+static struct sk_buff *get_cpl_skb(struct sk_buff *skb,
+				   size_t len, gfp_t gfp)
+{
+	if (likely(!skb_is_nonlinear(skb) && !skb_cloned(skb))) {
+		WARN_ONCE(skb->len < len, "skb alloc error");
+		__skb_trim(skb, len);
+		skb_get(skb);
+	} else {
+		skb = alloc_skb(len, gfp);
+		if (skb)
+			__skb_put(skb, len);
+	}
+	return skb;
+}
+
+static void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
+			     int cmd)
+{
+	struct cpl_abort_rpl *rpl = cplhdr(skb);
+
+	INIT_TP_WR_CPL(rpl, CPL_ABORT_RPL, tid);
+	rpl->cmd = cmd;
+}
+
+static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct sk_buff *reply_skb;
+
+	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
+			      GFP_KERNEL | __GFP_NOFAIL);
+	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
+	set_abort_rpl_wr(reply_skb, GET_TID(req),
+			 (req->status & CPL_ABORT_NO_RST));
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, req->status >> 1);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * Add an skb to the deferred skb queue for processing from process context.
+ */
+static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
+			   defer_handler_t handler)
+{
+	DEFERRED_SKB_CB(skb)->handler = handler;
+	spin_lock_bh(&cdev->deferq.lock);
+	__skb_queue_tail(&cdev->deferq, skb);
+	if (skb_queue_len(&cdev->deferq) == 1)
+		schedule_work(&cdev->deferq_task);
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
+				 struct chtls_dev *cdev,
+				 int status, int queue)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	unsigned int tid;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = GET_TID(req);
+
+	reply_skb = get_cpl_skb(skb, sizeof(struct cpl_abort_rpl), gfp_any());
+	if (!reply_skb) {
+		req->status = (queue << 1) | status;
+		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
+		return;
+	}
+
+	set_abort_rpl_wr(reply_skb, tid, status);
+	kfree_skb(skb);
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
+	if (csk_conn_inline(csk)) {
+		struct l2t_entry *e = csk->l2t_entry;
+
+		if (e && sk->sk_state != TCP_SYN_RECV) {
+			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
+			return;
+		}
+	}
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+}
+
+/*
+ * This is run from a listener's backlog to abort a child connection in
+ * SYN_RCV state (i.e., one on the listener's SYN queue).
+ */
+static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk;
+	struct sock *child;
+	int queue;
+
+	child = skb->sk;
+	csk = rcu_dereference_sk_user_data(child);
+	queue = csk->txq_idx;
+
+	skb->sk	= NULL;
+	chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
+			     CPL_ABORT_NO_RST, queue);
+	do_abort_syn_rcv(child, lsk);
+}
+
+static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct listen_ctx *listen_ctx;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *psk;
+	void *ctx;
+
+	csk = sk->sk_user_data;
+	oreq = csk->passive_reap_next;
+	cdev = csk->cdev;
+
+	if (!oreq)
+		return -1;
+
+	ctx = lookup_stid(cdev->tids, oreq->ts_recent);
+	if (!ctx)
+		return -1;
+
+	listen_ctx = (struct listen_ctx *)ctx;
+	psk = listen_ctx->lsk;
+
+	bh_lock_sock(psk);
+	if (!sock_owned_by_user(psk)) {
+		int queue = csk->txq_idx;
+
+		chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
+		do_abort_syn_rcv(sk, psk);
+	} else {
+		skb->sk = sk;
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
+		__sk_add_backlog(psk, skb);
+	}
+	bh_unlock_sock(psk);
+	return 0;
+}
+
+static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
+{
+	const struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	int rst_status = CPL_ABORT_NO_RST;
+	int queue = csk->txq_idx;
+
+	if (is_neg_adv(req->status)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	csk_reset_flag(csk, CSK_ABORT_REQ_RCVD);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) &&
+	    !csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send_tx_flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		sk->sk_err = ETIMEDOUT;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk_error_report(sk);
+
+		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
+			return;
+
+	}
+
+	chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
+			     rst_status, queue);
+	chtls_release_resources(sk);
+	chtls_conn_done(sk);
+}
+
+static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_abort_rpl_rss *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		csk_reset_flag(csk, CSK_ABORT_RPL_PENDING);
+		if (!csk_flag_nochk(csk, CSK_ABORT_REQ_RCVD)) {
+			if (sk->sk_state == TCP_SYN_SENT) {
+				cxgb4_remove_tid(cdev->tids,
+						 csk->port_id,
+						 GET_TID(rpl),
+						 sk->sk_family);
+				sock_put(sk);
+			}
+			chtls_release_resources(sk);
+			chtls_conn_done(sk);
+		}
+	}
+	kfree_skb(skb);
+}
+
+static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR;
+	void (*fn)(struct sock *sk, struct sk_buff *skb);
+	unsigned int hwtid = GET_TID(req);
+	struct chtls_sock *csk;
+	struct sock *sk;
+	u8 opcode;
+
+	opcode = ((const struct rss_header *)cplhdr(skb))->opcode;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		goto rel_skb;
+
+	csk = sk->sk_user_data;
+
+	switch (opcode) {
+	case CPL_PEER_CLOSE:
+		fn = chtls_peer_close;
+		break;
+	case CPL_CLOSE_CON_RPL:
+		fn = chtls_close_con_rpl;
+		break;
+	case CPL_ABORT_REQ_RSS:
+		/*
+		 * Save the offload device in the skb, we may process this
+		 * message after the socket has closed.
+		 */
+		BLOG_SKB_CB(skb)->cdev = csk->cdev;
+		fn = chtls_abort_req_rss;
+		break;
+	case CPL_ABORT_RPL_RSS:
+		fn = chtls_abort_rpl_rss;
+		break;
+	default:
+		goto rel_skb;
+	}
+
+	process_cpl_msg(fn, sk, skb);
+	return 0;
+
+rel_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
+static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 credits = hdr->credits;
+	u32 snd_una;
+
+	snd_una = ntohl(hdr->snd_una);
+	csk->wr_credits += credits;
+
+	if (csk->wr_unacked > csk->wr_max_credits - csk->wr_credits)
+		csk->wr_unacked = csk->wr_max_credits - csk->wr_credits;
+
+	while (credits) {
+		struct sk_buff *pskb = csk->wr_skb_head;
+		u32 csum;
+
+		if (unlikely(!pskb)) {
+			if (csk->wr_nondata)
+				csk->wr_nondata -= credits;
+			break;
+		}
+		csum = (__force u32)pskb->csum;
+		if (unlikely(credits < csum)) {
+			pskb->csum = (__force __wsum)(csum - credits);
+			break;
+		}
+		dequeue_wr(sk);
+		credits -= csum;
+		kfree_skb(pskb);
+	}
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
+		if (unlikely(before(snd_una, tp->snd_una))) {
+			kfree_skb(skb);
+			return;
+		}
+
+		if (tp->snd_una != snd_una) {
+			tp->snd_una = snd_una;
+			tp->rcv_tstamp = tcp_time_stamp(tp);
+			if (tp->snd_una == tp->snd_nxt &&
+			    !csk_flag_nochk(csk, CSK_TX_FAILOVER))
+				csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		}
+	}
+
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_CH) {
+		unsigned int fclen16 = roundup(failover_flowc_wr_len, 16);
+
+		csk->wr_credits -= fclen16;
+		csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		csk_reset_flag(csk, CSK_TX_FAILOVER);
+	}
+	if (skb_queue_len(&csk->txq) && chtls_push_frames(csk, 0))
+		sk->sk_write_space(sk);
+
+	kfree_skb(skb);
+}
+
+static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(rpl);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
+	process_cpl_msg(chtls_rx_ack, sk, skb);
+
+	return 0;
+}
+
+static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(rpl);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+
+	/* return EINVAL if socket doesn't exist */
+	if (!sk)
+		return -EINVAL;
+
+	/* Reusing the skb as size of cpl_set_tcb_field structure
+	 * is greater than cpl_abort_req
+	 */
+	if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG)
+		chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = {
+	[CPL_PASS_OPEN_RPL]     = chtls_pass_open_rpl,
+	[CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl,
+	[CPL_PASS_ACCEPT_REQ]   = chtls_pass_accept_req,
+	[CPL_PASS_ESTABLISH]    = chtls_pass_establish,
+	[CPL_RX_DATA]           = chtls_rx_data,
+	[CPL_TLS_DATA]          = chtls_rx_pdu,
+	[CPL_RX_TLS_CMP]        = chtls_rx_cmp,
+	[CPL_PEER_CLOSE]        = chtls_conn_cpl,
+	[CPL_CLOSE_CON_RPL]     = chtls_conn_cpl,
+	[CPL_ABORT_REQ_RSS]     = chtls_conn_cpl,
+	[CPL_ABORT_RPL_RSS]     = chtls_conn_cpl,
+	[CPL_FW4_ACK]		= chtls_wr_ack,
+	[CPL_SET_TCB_RPL]	= chtls_set_tcb_rpl,
+};
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
new file mode 100644
index 000000000000..f61ca657601c
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ */
+
+#ifndef __CHTLS_CM_H__
+#define __CHTLS_CM_H__
+
+/*
+ * TCB settings
+ */
+/* 3:0 */
+#define TCB_ULP_TYPE_W    0
+#define TCB_ULP_TYPE_S    0
+#define TCB_ULP_TYPE_M    0xfULL
+#define TCB_ULP_TYPE_V(x) ((x) << TCB_ULP_TYPE_S)
+
+/* 11:4 */
+#define TCB_ULP_RAW_W    0
+#define TCB_ULP_RAW_S    4
+#define TCB_ULP_RAW_M    0xffULL
+#define TCB_ULP_RAW_V(x) ((x) << TCB_ULP_RAW_S)
+
+#define TF_TLS_KEY_SIZE_S    7
+#define TF_TLS_KEY_SIZE_V(x) ((x) << TF_TLS_KEY_SIZE_S)
+
+#define TF_TLS_CONTROL_S     2
+#define TF_TLS_CONTROL_V(x) ((x) << TF_TLS_CONTROL_S)
+
+#define TF_TLS_ACTIVE_S      1
+#define TF_TLS_ACTIVE_V(x) ((x) << TF_TLS_ACTIVE_S)
+
+#define TF_TLS_ENABLE_S      0
+#define TF_TLS_ENABLE_V(x) ((x) << TF_TLS_ENABLE_S)
+
+#define TF_RX_QUIESCE_S    15
+#define TF_RX_QUIESCE_V(x) ((x) << TF_RX_QUIESCE_S)
+
+/*
+ * Max receive window supported by HW in bytes.  Only a small part of it can
+ * be set through option0, the rest needs to be set through RX_DATA_ACK.
+ */
+#define MAX_RCV_WND ((1U << 27) - 1)
+#define MAX_MSS     65536
+
+/*
+ * Min receive window.  We want it to be large enough to accommodate receive
+ * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
+ */
+#define MIN_RCV_WND (24 * 1024U)
+#define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
+
+/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */
+#define TX_HEADER_LEN \
+	(sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr))
+#define TX_TLSHDR_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo) + \
+	 sizeof(struct sge_opaque_hdr))
+#define TXDATA_SKB_LEN 128
+
+enum {
+	CPL_TX_TLS_SFO_TYPE_CCS,
+	CPL_TX_TLS_SFO_TYPE_ALERT,
+	CPL_TX_TLS_SFO_TYPE_HANDSHAKE,
+	CPL_TX_TLS_SFO_TYPE_DATA,
+	CPL_TX_TLS_SFO_TYPE_HEARTBEAT,
+};
+
+enum {
+	TLS_HDR_TYPE_CCS = 20,
+	TLS_HDR_TYPE_ALERT,
+	TLS_HDR_TYPE_HANDSHAKE,
+	TLS_HDR_TYPE_RECORD,
+	TLS_HDR_TYPE_HEARTBEAT,
+};
+
+typedef void (*defer_handler_t)(struct chtls_dev *dev, struct sk_buff *skb);
+extern struct request_sock_ops chtls_rsk_ops;
+extern struct request_sock_ops chtls_rsk_opsv6;
+
+struct deferred_skb_cb {
+	defer_handler_t handler;
+	struct chtls_dev *dev;
+};
+
+#define DEFERRED_SKB_CB(skb) ((struct deferred_skb_cb *)(skb)->cb)
+#define failover_flowc_wr_len offsetof(struct fw_flowc_wr, mnemval[3])
+#define WR_SKB_CB(skb) ((struct wr_skb_cb *)(skb)->cb)
+#define ACCEPT_QUEUE(sk) (&inet_csk(sk)->icsk_accept_queue.rskq_accept_head)
+
+#define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale)
+#define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale)
+#define USER_MSS(tp) ((tp)->rx_opt.user_mss)
+#define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp)
+#define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok)
+#define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok)
+#define SACK_OK(tp) ((tp)->rx_opt.sack_ok)
+#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count)
+
+/* TLS SKB */
+#define skb_ulp_tls_inline(skb)      (ULP_SKB_CB(skb)->ulp.tls.ofld)
+#define skb_ulp_tls_iv_imm(skb)      (ULP_SKB_CB(skb)->ulp.tls.iv)
+
+void chtls_defer_reply(struct sk_buff *skb, struct chtls_dev *dev,
+		       defer_handler_t handler);
+
+/*
+ * Returns true if the socket is in one of the supplied states.
+ */
+static inline unsigned int sk_in_state(const struct sock *sk,
+				       unsigned int states)
+{
+	return states & (1 << sk->sk_state);
+}
+
+static void chtls_rsk_destructor(struct request_sock *req)
+{
+	/* do nothing */
+}
+
+static inline void chtls_init_rsk_ops(struct proto *chtls_tcp_prot,
+				      struct request_sock_ops *chtls_tcp_ops,
+				      struct proto *tcp_prot, int family)
+{
+	memset(chtls_tcp_ops, 0, sizeof(*chtls_tcp_ops));
+	chtls_tcp_ops->family = family;
+	chtls_tcp_ops->obj_size = sizeof(struct tcp_request_sock);
+	chtls_tcp_ops->destructor = chtls_rsk_destructor;
+	chtls_tcp_ops->slab = tcp_prot->rsk_prot->slab;
+	chtls_tcp_prot->rsk_prot = chtls_tcp_ops;
+}
+
+static inline void chtls_reqsk_free(struct request_sock *req)
+{
+	if (req->rsk_listener)
+		sock_put(req->rsk_listener);
+	kmem_cache_free(req->rsk_ops->slab, req);
+}
+
+#define DECLARE_TASK_FUNC(task, task_param) \
+		static void task(struct work_struct *task_param)
+
+static inline void sk_wakeup_sleepers(struct sock *sk, bool interruptable)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq)) {
+		if (interruptable)
+			wake_up_interruptible(sk_sleep(sk));
+		else
+			wake_up_all(sk_sleep(sk));
+	}
+	rcu_read_unlock();
+}
+
+static inline void chtls_set_req_port(struct request_sock *oreq,
+				      __be16 source, __be16 dest)
+{
+	inet_rsk(oreq)->ir_rmt_port = source;
+	inet_rsk(oreq)->ir_num = ntohs(dest);
+}
+
+static inline void chtls_set_req_addr(struct request_sock *oreq,
+				      __be32 local_ip, __be32 peer_ip)
+{
+	inet_rsk(oreq)->ir_loc_addr = local_ip;
+	inet_rsk(oreq)->ir_rmt_addr = peer_ip;
+}
+
+static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	__kfree_skb(skb);
+}
+
+static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	kfree_skb(skb);
+}
+
+static inline void chtls_reset_wr_list(struct chtls_sock *csk)
+{
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+}
+
+static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb)
+{
+	WR_SKB_CB(skb)->next_wr = NULL;
+
+	skb_get(skb);
+
+	if (!csk->wr_skb_head)
+		csk->wr_skb_head = skb;
+	else
+		WR_SKB_CB(csk->wr_skb_tail)->next_wr = skb;
+	csk->wr_skb_tail = skb;
+}
+
+static inline struct sk_buff *dequeue_wr(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb = NULL;
+
+	skb = csk->wr_skb_head;
+
+	if (likely(skb)) {
+	 /* Don't bother clearing the tail */
+		csk->wr_skb_head = WR_SKB_CB(skb)->next_wr;
+		WR_SKB_CB(skb)->next_wr = NULL;
+	}
+	return skb;
+}
+#endif
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
new file mode 100644
index 000000000000..1e67140b0f80
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static void __set_tcb_field_direct(struct chtls_sock *csk,
+				   struct cpl_set_tcb_field *req, u16 word,
+				   u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct ulptx_idata *sc;
+
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, csk->tid);
+	req->wr.wr_mid |= htonl(FW_WR_FLOWID_V(csk->tid));
+	req->reply_ctrl = htons(NO_REPLY_V(no_reply) |
+				QUEUENO_V(csk->rss_qid));
+	req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+	sc = (struct ulptx_idata *)(req + 1);
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	sc->len = htonl(0);
+}
+
+static void __set_tcb_field(struct sock *sk, struct sk_buff *skb, u16 word,
+			    u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	unsigned int wrlen;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen);
+	__set_tcb_field_direct(csk, req, word, mask, val, cookie, no_reply);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id);
+}
+
+/*
+ * Send control message to HW, message go as immediate data and packet
+ * is freed immediately.
+ */
+static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val)
+{
+	struct cpl_set_tcb_field *req;
+	unsigned int credits_needed;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	struct sk_buff *skb;
+	unsigned int wrlen;
+	int ret;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+
+	skb = alloc_skb(wrlen, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	credits_needed = DIV_ROUND_UP(wrlen, 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	__set_tcb_field(sk, skb, word, mask, val, 0, 1);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	csk->wr_credits -= credits_needed;
+	csk->wr_unacked += credits_needed;
+	enqueue_wr(csk, skb);
+	ret = cxgb4_ofld_send(csk->egress_dev, skb);
+	if (ret < 0)
+		kfree_skb(skb);
+	return ret < 0 ? ret : 0;
+}
+
+void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word,
+				 u64 mask, u64 val, u8 cookie,
+				 int through_l2t)
+{
+	struct sk_buff *skb;
+	unsigned int wrlen;
+
+	wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata);
+	wrlen = roundup(wrlen, 16);
+
+	skb = alloc_skb(wrlen, GFP_KERNEL | __GFP_NOFAIL);
+	if (!skb)
+		return;
+
+	__set_tcb_field(sk, skb, word, mask, val, cookie, 0);
+	send_or_defer(sk, tcp_sk(sk), skb, through_l2t);
+}
+
+/*
+ * Set one of the t_flags bits in the TCB.
+ */
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val)
+{
+	return chtls_set_tcb_field(sk, 1, 1ULL << bit_pos,
+				   (u64)val << bit_pos);
+}
+
+static int chtls_set_tcb_keyid(struct sock *sk, int keyid)
+{
+	return chtls_set_tcb_field(sk, 31, 0xFFFFFFFFULL, keyid);
+}
+
+static int chtls_set_tcb_seqno(struct sock *sk)
+{
+	return chtls_set_tcb_field(sk, 28, ~0ULL, 0);
+}
+
+static int chtls_set_tcb_quiesce(struct sock *sk, int val)
+{
+	return chtls_set_tcb_field(sk, 1, (1ULL << TF_RX_QUIESCE_S),
+				   TF_RX_QUIESCE_V(val));
+}
+
+void chtls_set_quiesce_ctrl(struct sock *sk, int val)
+{
+	struct chtls_sock *csk;
+	struct sk_buff *skb;
+	unsigned int wrlen;
+	int ret;
+
+	wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata);
+	wrlen = roundup(wrlen, 16);
+
+	skb = alloc_skb(wrlen, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	__set_tcb_field(sk, skb, 1, TF_RX_QUIESCE_V(1), 0, 0, 1);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id);
+	ret = cxgb4_ofld_send(csk->egress_dev, skb);
+	if (ret < 0)
+		kfree_skb(skb);
+}
+
+/* TLS Key bitmap processing */
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi)
+{
+	unsigned int num_key_ctx, bsize;
+	int ksize;
+
+	num_key_ctx = (lldi->vr->key.size / TLS_KEY_CONTEXT_SZ);
+	bsize = BITS_TO_LONGS(num_key_ctx);
+
+	cdev->kmap.size = num_key_ctx;
+	cdev->kmap.available = bsize;
+	ksize = sizeof(*cdev->kmap.addr) * bsize;
+	cdev->kmap.addr = kvzalloc(ksize, GFP_KERNEL);
+	if (!cdev->kmap.addr)
+		return -ENOMEM;
+
+	cdev->kmap.start = lldi->vr->key.start;
+	spin_lock_init(&cdev->kmap.lock);
+	return 0;
+}
+
+static int get_new_keyid(struct chtls_sock *csk, u32 optname)
+{
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+	int keyid;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	keyid = find_first_zero_bit(cdev->kmap.addr, cdev->kmap.size);
+	if (keyid < cdev->kmap.size) {
+		__set_bit(keyid, cdev->kmap.addr);
+		if (optname == TLS_RX)
+			hws->rxkey = keyid;
+		else
+			hws->txkey = keyid;
+		atomic_inc(&adap->chcr_stats.tls_key);
+	} else {
+		keyid = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+	return keyid;
+}
+
+void free_tls_keyid(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+
+	if (!cdev->kmap.addr)
+		return;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	if (hws->rxkey >= 0) {
+		__clear_bit(hws->rxkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->rxkey = -1;
+	}
+	if (hws->txkey >= 0) {
+		__clear_bit(hws->txkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->txkey = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+}
+
+unsigned int keyid_to_addr(int start_addr, int keyid)
+{
+	return (start_addr + (keyid * TLS_KEY_CONTEXT_SZ)) >> 5;
+}
+
+static void chtls_rxkey_ivauth(struct _key_ctx *kctx)
+{
+	kctx->iv_to_auth = cpu_to_be64(KEYCTX_TX_WR_IV_V(6ULL) |
+				  KEYCTX_TX_WR_AAD_V(1ULL) |
+				  KEYCTX_TX_WR_AADST_V(5ULL) |
+				  KEYCTX_TX_WR_CIPHER_V(14ULL) |
+				  KEYCTX_TX_WR_CIPHERST_V(0ULL) |
+				  KEYCTX_TX_WR_AUTH_V(14ULL) |
+				  KEYCTX_TX_WR_AUTHST_V(16ULL) |
+				  KEYCTX_TX_WR_AUTHIN_V(16ULL));
+}
+
+static int chtls_key_info(struct chtls_sock *csk,
+			  struct _key_ctx *kctx,
+			  u32 keylen, u32 optname,
+			  int cipher_type)
+{
+	unsigned char key[AES_MAX_KEY_SIZE];
+	unsigned char *key_p, *salt;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	int ck_size, key_ctx_size, kctx_mackey_size, salt_size;
+	struct crypto_aes_ctx aes;
+	int ret;
+
+	key_ctx_size = sizeof(struct _key_ctx) +
+		       roundup(keylen, 16) + AEAD_H_SIZE;
+
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * prepare key context base on GCM cipher type
+	 */
+	switch (cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		struct tls12_crypto_info_aes_gcm_128 *gcm_ctx_128 =
+			(struct tls12_crypto_info_aes_gcm_128 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_128->key, keylen);
+
+		key_p            = gcm_ctx_128->key;
+		salt             = gcm_ctx_128->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		salt_size        = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *gcm_ctx_256 =
+			(struct tls12_crypto_info_aes_gcm_256 *)
+					&csk->tlshws.crypto_info;
+		memcpy(key, gcm_ctx_256->key, keylen);
+
+		key_p            = gcm_ctx_256->key;
+		salt             = gcm_ctx_256->salt;
+		ck_size          = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+		salt_size        = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
+		kctx_mackey_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+		break;
+	}
+	default:
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		return -EINVAL;
+	}
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret)
+		return ret;
+
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
+	csk->tlshws.keylen = key_ctx_size;
+
+	/* Copy the Key context */
+	if (optname == TLS_RX) {
+		int key_ctx;
+
+		key_ctx = ((key_ctx_size >> 4) << 3);
+		kctx->ctx_hdr = FILL_KEY_CRX_HDR(ck_size,
+						 kctx_mackey_size,
+						 0, 0, key_ctx);
+		chtls_rxkey_ivauth(kctx);
+	} else {
+		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 kctx_mackey_size,
+						 0, 0, key_ctx_size >> 4);
+	}
+
+	memcpy(kctx->salt, salt, salt_size);
+	memcpy(kctx->key, key_p, keylen);
+	memcpy(kctx->key + keylen, ghash_h, AEAD_H_SIZE);
+	/* erase key info from driver */
+	memset(key_p, 0, keylen);
+
+	return 0;
+}
+
+static void chtls_set_scmd(struct chtls_sock *csk)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+
+	hws->scmd.seqno_numivs =
+		SCMD_SEQ_NO_CTRL_V(3) |
+		SCMD_PROTO_VERSION_V(0) |
+		SCMD_ENC_DEC_CTRL_V(0) |
+		SCMD_CIPH_AUTH_SEQ_CTRL_V(1) |
+		SCMD_CIPH_MODE_V(2) |
+		SCMD_AUTH_MODE_V(4) |
+		SCMD_HMAC_CTRL_V(0) |
+		SCMD_IV_SIZE_V(4) |
+		SCMD_NUM_IVS_V(1);
+
+	hws->scmd.ivgen_hdrlen =
+		SCMD_IV_GEN_CTRL_V(1) |
+		SCMD_KEY_CTX_INLINE_V(0) |
+		SCMD_TLS_FRAG_ENABLE_V(1);
+}
+
+int chtls_setkey(struct chtls_sock *csk, u32 keylen,
+		 u32 optname, int cipher_type)
+{
+	struct tls_key_req *kwr;
+	struct chtls_dev *cdev;
+	struct _key_ctx *kctx;
+	int wrlen, klen, len;
+	struct sk_buff *skb;
+	struct sock *sk;
+	int keyid;
+	int kaddr;
+	int ret;
+
+	cdev = csk->cdev;
+	sk = csk->sk;
+
+	klen = roundup((keylen + AEAD_H_SIZE) + sizeof(*kctx), 32);
+	wrlen = roundup(sizeof(*kwr), 16);
+	len = klen + wrlen;
+
+	/* Flush out-standing data before new key takes effect */
+	if (optname == TLS_TX) {
+		lock_sock(sk);
+		if (skb_queue_len(&csk->txq))
+			chtls_push_frames(csk, 0);
+		release_sock(sk);
+	}
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	keyid = get_new_keyid(csk, optname);
+	if (keyid < 0) {
+		ret = -ENOSPC;
+		goto out_nokey;
+	}
+
+	kaddr = keyid_to_addr(cdev->kmap.start, keyid);
+	kwr = (struct tls_key_req *)__skb_put_zero(skb, len);
+	kwr->wr.op_to_compl =
+		cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F |
+		      FW_WR_ATOMIC_V(1U));
+	kwr->wr.flowid_len16 =
+		cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16) |
+			    FW_WR_FLOWID_V(csk->tid)));
+	kwr->wr.protocol = 0;
+	kwr->wr.mfs = htons(TLS_MFS);
+	kwr->wr.reneg_to_write_rx = optname;
+
+	/* ulptx command */
+	kwr->req.cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+			    T5_ULP_MEMIO_ORDER_V(1) |
+			    T5_ULP_MEMIO_IMM_V(1));
+	kwr->req.len16 = cpu_to_be32((csk->tid << 8) |
+			      DIV_ROUND_UP(len - sizeof(kwr->wr), 16));
+	kwr->req.dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(klen >> 5));
+	kwr->req.lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(kaddr));
+
+	/* sub command */
+	kwr->sc_imm.cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	kwr->sc_imm.len = cpu_to_be32(klen);
+
+	lock_sock(sk);
+	/* key info */
+	kctx = (struct _key_ctx *)(kwr + 1);
+	ret = chtls_key_info(csk, kctx, keylen, optname, cipher_type);
+	if (ret)
+		goto out_notcb;
+
+	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
+		goto out_notcb;
+
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid);
+	csk->wr_credits -= DIV_ROUND_UP(len, 16);
+	csk->wr_unacked += DIV_ROUND_UP(len, 16);
+	enqueue_wr(csk, skb);
+	cxgb4_ofld_send(csk->egress_dev, skb);
+	skb = NULL;
+
+	chtls_set_scmd(csk);
+	/* Clear quiesce for Rx key */
+	if (optname == TLS_RX) {
+		ret = chtls_set_tcb_keyid(sk, keyid);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_field(sk, 0,
+					  TCB_ULP_RAW_V(TCB_ULP_RAW_M),
+					  TCB_ULP_RAW_V((TF_TLS_KEY_SIZE_V(1) |
+					  TF_TLS_CONTROL_V(1) |
+					  TF_TLS_ACTIVE_V(1) |
+					  TF_TLS_ENABLE_V(1))));
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_seqno(sk);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_quiesce(sk, 0);
+		if (ret)
+			goto out_notcb;
+		csk->tlshws.rxkey = keyid;
+	} else {
+		csk->tlshws.tx_seq_no = 0;
+		csk->tlshws.txkey = keyid;
+	}
+
+	release_sock(sk);
+	return ret;
+out_notcb:
+	release_sock(sk);
+	free_tls_keyid(sk);
+out_nokey:
+	kfree_skb(skb);
+	return ret;
+}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
new file mode 100644
index 000000000000..a4256087ac82
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -0,0 +1,1911 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <net/tcp.h>
+#include <net/busy_poll.h>
+#include <crypto/aes.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static bool is_tls_tx(struct chtls_sock *csk)
+{
+	return csk->tlshws.txkey >= 0;
+}
+
+static bool is_tls_rx(struct chtls_sock *csk)
+{
+	return csk->tlshws.rxkey >= 0;
+}
+
+static int data_sgl_len(const struct sk_buff *skb)
+{
+	unsigned int cnt;
+
+	cnt = skb_shinfo(skb)->nr_frags;
+	return sgl_len(cnt) * 8;
+}
+
+static int nos_ivs(struct sock *sk, unsigned int size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	return DIV_ROUND_UP(size, csk->tlshws.mfs);
+}
+
+static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
+{
+	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
+	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
+
+	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
+	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
+		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
+		return 1;
+	}
+	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
+	return 0;
+}
+
+static int max_ivs_size(struct sock *sk, int size)
+{
+	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
+}
+
+static int ivs_size(struct sock *sk, const struct sk_buff *skb)
+{
+	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
+		 CIPHER_BLOCK_SIZE) : 0;
+}
+
+static int flowc_wr_credits(int nparams, int *flowclenp)
+{
+	int flowclen16, flowclen;
+
+	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+	flowclen16 = DIV_ROUND_UP(flowclen, 16);
+	flowclen = flowclen16 * 16;
+
+	if (flowclenp)
+		*flowclenp = flowclen;
+
+	return flowclen16;
+}
+
+static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
+					   struct fw_flowc_wr *flowc,
+					   int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(flowclen, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	__skb_put_data(skb, flowc, flowclen);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+
+	return skb;
+}
+
+static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
+			 int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int flowclen16;
+	int ret;
+
+	flowclen16 = flowclen / 16;
+
+	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
+		skb = create_flowc_wr_skb(sk, flowc, flowclen);
+		if (!skb)
+			return -ENOMEM;
+
+		skb_entail(sk, skb,
+			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+		return 0;
+	}
+
+	ret = cxgb4_immdata_send(csk->egress_dev,
+				 csk->txq_idx,
+				 flowc, flowclen);
+	if (!ret)
+		return flowclen16;
+	skb = create_flowc_wr_skb(sk, flowc, flowclen);
+	if (!skb)
+		return -ENOMEM;
+	send_or_defer(sk, tp, skb, 0);
+	return flowclen16;
+}
+
+static u8 tcp_state_to_flowc_state(u8 state)
+{
+	switch (state) {
+	case TCP_ESTABLISHED:
+		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+	case TCP_CLOSE_WAIT:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
+	case TCP_FIN_WAIT1:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
+	case TCP_CLOSING:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
+	case TCP_LAST_ACK:
+		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
+	case TCP_FIN_WAIT2:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
+	}
+
+	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+}
+
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt)
+{
+	struct flowc_packed {
+		struct fw_flowc_wr fc;
+		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
+	} __packed sflowc;
+	int nparams, paramidx, flowclen16, flowclen;
+	struct fw_flowc_wr *flowc;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+	memset(&sflowc, 0, sizeof(sflowc));
+	flowc = &sflowc.fc;
+
+#define FLOWC_PARAM(__m, __v) \
+	do { \
+		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
+		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
+		paramidx++; \
+	} while (0)
+
+	paramidx = 0;
+
+	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
+	FLOWC_PARAM(CH, csk->tx_chan);
+	FLOWC_PARAM(PORT, csk->tx_chan);
+	FLOWC_PARAM(IQID, csk->rss_qid);
+	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
+	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
+	FLOWC_PARAM(SNDBUF, csk->sndbuf);
+	FLOWC_PARAM(MSS, tp->mss_cache);
+	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
+
+	if (SND_WSCALE(tp))
+		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
+
+	if (csk->ulp_mode == ULP_MODE_TLS)
+		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
+
+	if (csk->tlshws.fcplenmax)
+		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
+
+	nparams = paramidx;
+#undef FLOWC_PARAM
+
+	flowclen16 = flowc_wr_credits(nparams, &flowclen);
+	flowc->op_to_nparams =
+		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+			    FW_WR_COMPL_V(compl) |
+			    FW_FLOWC_WR_NPARAMS_V(nparams));
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
+					  FW_WR_FLOWID_V(csk->tid));
+
+	return send_flowc_wr(sk, flowc, flowclen);
+}
+
+/* Copy IVs to WR */
+static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
+
+{
+	struct chtls_sock *csk;
+	unsigned char *iv_loc;
+	struct chtls_hws *hws;
+	unsigned char *ivs;
+	u16 number_of_ivs;
+	struct page *page;
+	int err = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	number_of_ivs = nos_ivs(sk, skb->len);
+
+	if (number_of_ivs > MAX_IVS_PAGE) {
+		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
+		return -ENOMEM;
+	}
+
+	/* generate the  IVs */
+	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
+	if (!ivs)
+		return -ENOMEM;
+	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+	if (skb_ulp_tls_iv_imm(skb)) {
+		/* send the IVs as immediate data in the WR */
+		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
+						CIPHER_BLOCK_SIZE);
+		if (iv_loc)
+			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
+	} else {
+		/* Send the IVs as sgls */
+		/* Already accounted IV DSGL for credits */
+		skb_shinfo(skb)->nr_frags--;
+		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
+		if (!page) {
+			pr_info("%s : Page allocation for IVs failed\n",
+				__func__);
+			err = -ENOMEM;
+			goto out;
+		}
+		memcpy(page_address(page), ivs, number_of_ivs *
+		       CIPHER_BLOCK_SIZE);
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
+				   number_of_ivs * CIPHER_BLOCK_SIZE);
+		hws->ivsize = 0;
+	}
+out:
+	kfree(ivs);
+	return err;
+}
+
+/* Copy Key to WR */
+static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
+{
+	struct ulptx_sc_memrd *sc_memrd;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct ulptx_idata *sc;
+	struct chtls_hws *hws;
+	u32 immdlen;
+	int kaddr;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	cdev = csk->cdev;
+
+	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
+	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
+	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
+	if (sc) {
+		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+		sc->len = htonl(0);
+		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
+		sc_memrd->cmd_to_len =
+				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
+				ULP_TX_SC_MORE_V(1) |
+				ULPTX_LEN16_V(hws->keylen >> 4));
+		sc_memrd->addr = htonl(kaddr);
+	}
+}
+
+static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
+{
+	return hws->tx_seq_no++;
+}
+
+static bool is_sg_request(const struct sk_buff *skb)
+{
+	return skb->peeked ||
+		(skb->len > MAX_IMM_ULPTX_WR_LEN);
+}
+
+/*
+ * Returns true if an sk_buff carries urgent data.
+ */
+static bool skb_urgent(struct sk_buff *skb)
+{
+	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
+}
+
+/* TLS content type for CPL SFO */
+static unsigned char tls_content_type(unsigned char content_type)
+{
+	switch (content_type) {
+	case TLS_HDR_TYPE_CCS:
+		return CPL_TX_TLS_SFO_TYPE_CCS;
+	case TLS_HDR_TYPE_ALERT:
+		return CPL_TX_TLS_SFO_TYPE_ALERT;
+	case TLS_HDR_TYPE_HANDSHAKE:
+		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
+	case TLS_HDR_TYPE_HEARTBEAT:
+		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
+	}
+	return CPL_TX_TLS_SFO_TYPE_DATA;
+}
+
+static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			   int dlen, int tls_immd, u32 credits,
+			   int expn, int pdus)
+{
+	struct fw_tlstx_data_wr *req_wr;
+	struct cpl_tx_tls_sfo *req_cpl;
+	unsigned int wr_ulp_mode_force;
+	struct tls_scmd *updated_scmd;
+	unsigned char data_type;
+	struct chtls_sock *csk;
+	struct net_device *dev;
+	struct chtls_hws *hws;
+	struct tls_scmd *scmd;
+	struct adapter *adap;
+	unsigned char *req;
+	int immd_len;
+	int iv_imm;
+	int len;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	iv_imm = skb_ulp_tls_iv_imm(skb);
+	dev = csk->egress_dev;
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+	scmd = &hws->scmd;
+	len = dlen + expn;
+
+	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
+	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
+
+	updated_scmd = scmd;
+	updated_scmd->seqno_numivs &= 0xffffff80;
+	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
+	hws->scmd = *updated_scmd;
+
+	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
+	req_cpl = (struct cpl_tx_tls_sfo *)req;
+	req = (unsigned char *)__skb_push(skb, (sizeof(struct
+				fw_tlstx_data_wr)));
+
+	req_wr = (struct fw_tlstx_data_wr *)req;
+	immd_len = (tls_immd ? dlen : 0);
+	req_wr->op_to_immdlen =
+		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
+		FW_TLSTX_DATA_WR_COMPL_V(1) |
+		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
+	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
+				     FW_TLSTX_DATA_WR_LEN16_V(credits));
+	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
+
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+			FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req_wr->lsodisable_to_flags =
+			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
+			      TX_URG_V(skb_urgent(skb)) |
+			      T6_TX_FORCE_F | wr_ulp_mode_force |
+			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
+					 skb_queue_empty(&csk->txq)));
+
+	req_wr->ctxloc_to_exp =
+			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
+			      FW_TLSTX_DATA_WR_EXP_V(expn) |
+			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
+			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
+			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
+
+	/* Fill in the length */
+	req_wr->plen = htonl(len);
+	req_wr->mfs = htons(hws->mfs);
+	req_wr->adjustedplen_pkd =
+		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
+	req_wr->expinplenmax_pkd =
+		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
+	req_wr->pdusinplenmax_pkd =
+		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
+	req_wr->r10 = 0;
+
+	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
+	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
+				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
+				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
+				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
+	req_cpl->pld_len = htonl(len - expn);
+
+	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
+		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
+		TLS_HDR_TYPE_HEARTBEAT : 0) |
+		CPL_TX_TLS_SFO_PROTOVER_V(0));
+
+	/* create the s-command */
+	req_cpl->r1_lo = 0;
+	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
+	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
+	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
+}
+
+/*
+ * Calculate the TLS data expansion size
+ */
+static int chtls_expansion_size(struct sock *sk, int data_len,
+				int fullpdu,
+				unsigned short *pducnt)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tls_scmd *scmd = &hws->scmd;
+	int fragsize = hws->mfs;
+	int expnsize = 0;
+	int fragleft;
+	int fragcnt;
+	int expppdu;
+
+	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
+	    SCMD_CIPH_MODE_AES_GCM) {
+		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
+			  TLS_HEADER_LENGTH;
+
+		if (fullpdu) {
+			*pducnt = data_len / (expppdu + fragsize);
+			if (*pducnt > 32)
+				*pducnt = 32;
+			else if (!*pducnt)
+				*pducnt = 1;
+			expnsize = (*pducnt) * expppdu;
+			return expnsize;
+		}
+		fragcnt = (data_len / fragsize);
+		expnsize =  fragcnt * expppdu;
+		fragleft = data_len % fragsize;
+		if (fragleft > 0)
+			expnsize += expppdu;
+	}
+	return expnsize;
+}
+
+/* WR with IV, KEY and CPL SFO added */
+static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
+			       int tls_tx_imm, int tls_len, u32 credits)
+{
+	unsigned short pdus_per_ulp = 0;
+	struct chtls_sock *csk;
+	struct chtls_hws *hws;
+	int expn_sz;
+	int pdus;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
+	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
+	if (!hws->compute) {
+		hws->expansion = chtls_expansion_size(sk,
+						      hws->fcplenmax,
+						      1, &pdus_per_ulp);
+		hws->pdus = pdus_per_ulp;
+		hws->adjustlen = hws->pdus *
+			((hws->expansion / hws->pdus) + hws->mfs);
+		hws->compute = 1;
+	}
+	if (tls_copy_ivs(sk, skb))
+		return;
+	tls_copy_tx_key(sk, skb);
+	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
+	hws->tx_seq_no += (pdus - 1);
+}
+
+static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			    unsigned int immdlen, int len,
+			    u32 credits, u32 compl)
+{
+	struct fw_ofld_tx_data_wr *req;
+	unsigned int wr_ulp_mode_force;
+	struct chtls_sock *csk;
+	unsigned int opcode;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	opcode = FW_OFLD_TX_DATA_WR;
+
+	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
+	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
+				FW_WR_COMPL_V(compl) |
+				FW_WR_IMMDLEN_V(immdlen));
+	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
+				FW_WR_LEN16_V(credits));
+
+	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+				FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
+			TX_URG_V(skb_urgent(skb)) |
+			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
+				   skb_queue_empty(&csk->txq)));
+	req->plen = htonl(len);
+}
+
+static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
+			 bool size)
+{
+	int wr_size;
+
+	wr_size = TLS_WR_CPL_LEN;
+	wr_size += KEY_ON_MEM_SZ;
+	wr_size += ivs_size(csk->sk, skb);
+
+	if (size)
+		return wr_size;
+
+	/* frags counted for IV dsgl */
+	if (!skb_ulp_tls_iv_imm(skb))
+		skb_shinfo(skb)->nr_frags++;
+
+	return wr_size;
+}
+
+static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
+{
+	int length = skb->len;
+
+	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
+		return false;
+
+	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+		/* Check TLS header len for Immediate */
+		if (csk->ulp_mode == ULP_MODE_TLS &&
+		    skb_ulp_tls_inline(skb))
+			length += chtls_wr_size(csk, skb, true);
+		else
+			length += sizeof(struct fw_ofld_tx_data_wr);
+
+		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
+	}
+	return true;
+}
+
+static unsigned int calc_tx_flits(const struct sk_buff *skb,
+				  unsigned int immdlen)
+{
+	unsigned int flits, cnt;
+
+	flits = immdlen / 8;   /* headers */
+	cnt = skb_shinfo(skb)->nr_frags;
+	if (skb_tail_pointer(skb) != skb_transport_header(skb))
+		cnt++;
+	return flits + sgl_len(cnt);
+}
+
+static void arp_failure_discard(void *handle, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+int chtls_push_frames(struct chtls_sock *csk, int comp)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tcp_sock *tp;
+	struct sk_buff *skb;
+	int total_size = 0;
+	struct sock *sk;
+	int wr_size;
+
+	wr_size = sizeof(struct fw_ofld_tx_data_wr);
+	sk = csk->sk;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
+		return 0;
+
+	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
+		return 0;
+
+	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
+	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
+		skb_queue_len(&csk->txq) > 1)) {
+		unsigned int credit_len = skb->len;
+		unsigned int credits_needed;
+		unsigned int completion = 0;
+		int tls_len = skb->len;/* TLS data len before IV/key */
+		unsigned int immdlen;
+		int len = skb->len;    /* length [ulp bytes] inserted by hw */
+		int flowclen16 = 0;
+		int tls_tx_imm = 0;
+
+		immdlen = skb->len;
+		if (!is_ofld_imm(csk, skb)) {
+			immdlen = skb_transport_offset(skb);
+			if (skb_ulp_tls_inline(skb))
+				wr_size = chtls_wr_size(csk, skb, false);
+			credit_len = 8 * calc_tx_flits(skb, immdlen);
+		} else {
+			if (skb_ulp_tls_inline(skb)) {
+				wr_size = chtls_wr_size(csk, skb, false);
+				tls_tx_imm = 1;
+			}
+		}
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
+			credit_len += wr_size;
+		credits_needed = DIV_ROUND_UP(credit_len, 16);
+		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
+						      tp->rcv_nxt);
+			if (flowclen16 <= 0)
+				break;
+			csk->wr_credits -= flowclen16;
+			csk->wr_unacked += flowclen16;
+			csk->wr_nondata += flowclen16;
+			csk_set_flag(csk, CSK_TX_DATA_SENT);
+		}
+
+		if (csk->wr_credits < credits_needed) {
+			if (skb_ulp_tls_inline(skb) &&
+			    !skb_ulp_tls_iv_imm(skb))
+				skb_shinfo(skb)->nr_frags--;
+			break;
+		}
+
+		__skb_unlink(skb, &csk->txq);
+		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
+				      CPL_PRIORITY_DATA);
+		if (hws->ofld)
+			hws->txqid = (skb->queue_mapping >> 1);
+		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
+		csk->wr_credits -= credits_needed;
+		csk->wr_unacked += credits_needed;
+		csk->wr_nondata = 0;
+		enqueue_wr(csk, skb);
+
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+			if ((comp && csk->wr_unacked == credits_needed) ||
+			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
+			    csk->wr_unacked >= csk->wr_max_credits / 2) {
+				completion = 1;
+				csk->wr_unacked = 0;
+			}
+			if (skb_ulp_tls_inline(skb))
+				make_tlstx_data_wr(sk, skb, tls_tx_imm,
+						   tls_len, credits_needed);
+			else
+				make_tx_data_wr(sk, skb, immdlen, len,
+						credits_needed, completion);
+			tp->snd_nxt += len;
+			tp->lsndtime = tcp_jiffies32;
+			if (completion)
+				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
+		} else {
+			struct cpl_close_con_req *req = cplhdr(skb);
+			unsigned int cmd  = CPL_OPCODE_G(ntohl
+					     (OPCODE_TID(req)));
+
+			if (cmd == CPL_CLOSE_CON_REQ)
+				csk_set_flag(csk,
+					     CSK_CLOSE_CON_REQUESTED);
+
+			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
+			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
+				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
+				csk->wr_unacked = 0;
+			}
+		}
+		total_size += skb->truesize;
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
+			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
+		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	}
+	sk->sk_wmem_queued -= total_size;
+	return total_size;
+}
+
+static void mark_urg(struct tcp_sock *tp, int flags,
+		     struct sk_buff *skb)
+{
+	if (unlikely(flags & MSG_OOB)) {
+		tp->snd_up = tp->write_seq;
+		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
+					 ULPCB_FLAG_BARRIER |
+					 ULPCB_FLAG_NO_APPEND |
+					 ULPCB_FLAG_NEED_HDR;
+	}
+}
+
+/*
+ * Returns true if a connection should send more data to TCP engine
+ */
+static bool should_push(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/*
+	 * If we've released our offload resources there's nothing to do ...
+	 */
+	if (!cdev)
+		return false;
+
+	/*
+	 * If there aren't any work requests in flight, or there isn't enough
+	 * data in flight, or Nagle is off then send the current TX_DATA
+	 * otherwise hold it and wait to accumulate more data.
+	 */
+	return csk->wr_credits == csk->wr_max_credits ||
+		(tp->nonagle & TCP_NAGLE_OFF);
+}
+
+/*
+ * Returns true if a TCP socket is corked.
+ */
+static bool corked(const struct tcp_sock *tp, int flags)
+{
+	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
+}
+
+/*
+ * Returns true if a send should try to push new data.
+ */
+static bool send_should_push(struct sock *sk, int flags)
+{
+	return should_push(sk) && !corked(tcp_sk(sk), flags);
+}
+
+void chtls_tcp_push(struct sock *sk, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	int qlen = skb_queue_len(&csk->txq);
+
+	if (likely(qlen)) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		mark_urg(tp, flags, skb);
+
+		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
+		    corked(tp, flags)) {
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
+			return;
+		}
+
+		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
+		if (qlen == 1 &&
+		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		     should_push(sk)))
+			chtls_push_frames(csk, 1);
+	}
+}
+
+/*
+ * Calculate the size for a new send sk_buff.  It's maximum size so we can
+ * pack lots of data into it, unless we plan to send it immediately, in which
+ * case we size it more tightly.
+ *
+ * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
+ * arise in normal cases and when it does we are just wasting memory.
+ */
+static int select_size(struct sock *sk, int io_len, int flags, int len)
+{
+	const int pgbreak = SKB_MAX_HEAD(len);
+
+	/*
+	 * If the data wouldn't fit in the main body anyway, put only the
+	 * header in the main body so it can use immediate data and place all
+	 * the payload in page fragments.
+	 */
+	if (io_len > pgbreak)
+		return 0;
+
+	/*
+	 * If we will be accumulating payload get a large main body.
+	 */
+	if (!send_should_push(sk, flags))
+		return pgbreak;
+
+	return io_len;
+}
+
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = tp->write_seq;
+	ULP_SKB_CB(skb)->flags = flags;
+	__skb_queue_tail(&csk->txq, skb);
+	sk->sk_wmem_queued += skb->truesize;
+
+	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
+		put_page(TCP_PAGE(sk));
+		TCP_PAGE(sk) = NULL;
+		TCP_OFF(sk) = 0;
+	}
+}
+
+static struct sk_buff *get_tx_skb(struct sock *sk, int size)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, TX_HEADER_LEN);
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+	}
+	return skb;
+}
+
+static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
+			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
+			sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, (TX_TLSHDR_LEN +
+			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
+		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
+	}
+	return skb;
+}
+
+static void tx_skb_finalize(struct sk_buff *skb)
+{
+	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
+
+	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
+		cb->flags = ULPCB_FLAG_NEED_HDR;
+	cb->flags |= ULPCB_FLAG_NO_APPEND;
+}
+
+static void push_frames_if_head(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (skb_queue_len(&csk->txq) == 1)
+		chtls_push_frames(csk, 1);
+}
+
+static int chtls_skb_copy_to_page_nocache(struct sock *sk,
+					  struct iov_iter *from,
+					  struct sk_buff *skb,
+					  struct page *page,
+					  int off, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
+				       off, copy, skb->len);
+	if (err)
+		return err;
+
+	skb->len             += copy;
+	skb->data_len        += copy;
+	skb->truesize        += copy;
+	sk->sk_wmem_queued   += copy;
+	return 0;
+}
+
+static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
+{
+	return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
+}
+
+static int csk_wait_memory(struct chtls_dev *cdev,
+			   struct sock *sk, long *timeo_p)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = 0;
+	long current_timeo;
+	long vm_wait = 0;
+	bool noblock;
+
+	current_timeo = *timeo_p;
+	noblock = (*timeo_p ? false : true);
+	if (csk_mem_free(cdev, sk)) {
+		current_timeo = prandom_u32_max(HZ / 5) + 2;
+		vm_wait = prandom_u32_max(HZ / 5) + 2;
+	}
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (1) {
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p) {
+			if (noblock)
+				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			goto do_nonblock;
+		}
+		if (signal_pending(current))
+			goto do_interrupted;
+		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+		if (csk_mem_free(cdev, sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+			      (sk->sk_shutdown & SEND_SHUTDOWN) ||
+			      (csk_mem_free(cdev, sk) && !vm_wait), &wait);
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
+				current_timeo -= vm_wait;
+				if (current_timeo < 0)
+					current_timeo = 0;
+			}
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+do_rm_wq:
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return err;
+do_error:
+	err = -EPIPE;
+	goto do_rm_wq;
+do_nonblock:
+	err = -EAGAIN;
+	goto do_rm_wq;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto do_rm_wq;
+}
+
+static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
+			       unsigned char *record_type)
+{
+	struct cmsghdr *cmsg;
+	int rc = -EINVAL;
+
+	for_each_cmsghdr(cmsg, msg) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+		if (cmsg->cmsg_level != SOL_TLS)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case TLS_SET_RECORD_TYPE:
+			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
+				return -EINVAL;
+
+			if (msg->msg_flags & MSG_MORE)
+				return -EINVAL;
+
+			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
+			rc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return rc;
+}
+
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int mss, flags, err;
+	int recordsz = 0;
+	int copied = 0;
+	long timeo;
+
+	lock_sock(sk);
+	flags = msg->msg_flags;
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (msg_data_left(msg)) {
+		int copy = 0;
+
+		skb = skb_peek_tail(&csk->txq);
+		if (skb) {
+			copy = mss - skb->len;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+		if (!csk_mem_free(cdev, sk))
+			goto wait_for_sndbuf;
+
+		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
+			unsigned char record_type = TLS_RECORD_TYPE_DATA;
+
+			if (unlikely(msg->msg_controllen)) {
+				err = chtls_proccess_cmsg(sk, msg,
+							  &record_type);
+				if (err)
+					goto out_err;
+
+				/* Avoid appending tls handshake, alert to tls data */
+				if (skb)
+					tx_skb_finalize(skb);
+			}
+
+			recordsz = size;
+			csk->tlshws.txleft = recordsz;
+			csk->tlshws.type = record_type;
+		}
+
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    copy <= 0) {
+new_buf:
+			if (skb) {
+				tx_skb_finalize(skb);
+				push_frames_if_head(sk);
+			}
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk,
+								 recordsz,
+								 flags,
+								 TX_TLSHDR_LEN),
+								 false);
+			} else {
+				skb = get_tx_skb(sk,
+						 select_size(sk, size, flags,
+							     TX_HEADER_LEN));
+			}
+			if (unlikely(!skb))
+				goto wait_for_memory;
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		if (skb_tailroom(skb) > 0) {
+			copy = min(copy, skb_tailroom(skb));
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+			err = skb_add_data_nocache(sk, skb,
+						   &msg->msg_iter, copy);
+			if (err)
+				goto do_fault;
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			struct page *page = TCP_PAGE(sk);
+			int pg_size = PAGE_SIZE;
+			int off = TCP_OFF(sk);
+			bool merge;
+
+			if (page)
+				pg_size = page_size(page);
+			if (off < pg_size &&
+			    skb_can_coalesce(skb, i, page, off)) {
+				merge = true;
+				goto copy;
+			}
+			merge = false;
+			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
+			    MAX_SKB_FRAGS))
+				goto new_buf;
+
+			if (page && off == pg_size) {
+				put_page(page);
+				TCP_PAGE(sk) = page = NULL;
+				pg_size = PAGE_SIZE;
+			}
+
+			if (!page) {
+				gfp_t gfp = sk->sk_allocation;
+				int order = cdev->send_page_order;
+
+				if (order) {
+					page = alloc_pages(gfp | __GFP_COMP |
+							   __GFP_NOWARN |
+							   __GFP_NORETRY,
+							   order);
+					if (page)
+						pg_size <<= order;
+				}
+				if (!page) {
+					page = alloc_page(gfp);
+					pg_size = PAGE_SIZE;
+				}
+				if (!page)
+					goto wait_for_memory;
+				off = 0;
+			}
+copy:
+			if (copy > pg_size - off)
+				copy = pg_size - off;
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+
+			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
+							     skb, page,
+							     off, copy);
+			if (unlikely(err)) {
+				if (!TCP_PAGE(sk)) {
+					TCP_PAGE(sk) = page;
+					TCP_OFF(sk) = 0;
+				}
+				goto do_fault;
+			}
+			/* Update the skb. */
+			if (merge) {
+				skb_frag_size_add(
+						&skb_shinfo(skb)->frags[i - 1],
+						copy);
+			} else {
+				skb_fill_page_desc(skb, i, page, off, copy);
+				if (off + copy < pg_size) {
+					/* space left keep page */
+					get_page(page);
+					TCP_PAGE(sk) = page;
+				} else {
+					TCP_PAGE(sk) = NULL;
+				}
+			}
+			TCP_OFF(sk) = off + copy;
+		}
+		if (unlikely(skb->len == mss))
+			tx_skb_finalize(skb);
+		tp->write_seq += copy;
+		copied += copy;
+		size -= copy;
+
+		if (is_tls_tx(csk))
+			csk->tlshws.txleft -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (size == 0)
+			goto out;
+
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
+			push_frames_if_head(sk);
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = csk_wait_memory(cdev, sk, &timeo);
+		if (err)
+			goto do_error;
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied;
+do_fault:
+	if (!skb->len) {
+		__skb_unlink(skb, &csk->txq);
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
+
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags)
+{
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	int mss, err, copied;
+	struct tcp_sock *tp;
+	long timeo;
+
+	tp = tcp_sk(sk);
+	copied = 0;
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	err = sk_stream_wait_connect(sk, &timeo);
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+	    err != 0)
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (size > 0) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		int copy, i;
+
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    (copy = mss - skb->len) <= 0) {
+new_buf:
+			if (!csk_mem_free(cdev, sk))
+				goto wait_for_sndbuf;
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk, size,
+								 flags,
+								 TX_TLSHDR_LEN),
+						     true);
+			} else {
+				skb = get_tx_skb(sk, 0);
+			}
+			if (!skb)
+				goto wait_for_memory;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (skb_can_coalesce(skb, i, page, offset)) {
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, copy);
+		} else {
+			tx_skb_finalize(skb);
+			push_frames_if_head(sk);
+			goto new_buf;
+		}
+
+		skb->len += copy;
+		if (skb->len == mss)
+			tx_skb_finalize(skb);
+		skb->data_len += copy;
+		skb->truesize += copy;
+		sk->sk_wmem_queued += copy;
+		tp->write_seq += copy;
+		copied += copy;
+		offset += copy;
+		size -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (!size)
+			break;
+
+		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
+			push_frames_if_head(sk);
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = csk_wait_memory(cdev, sk, &timeo);
+		if (err)
+			goto do_error;
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied;
+
+do_error:
+	if (copied)
+		goto out;
+
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
+
+static void chtls_select_window(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int wnd = tp->rcv_wnd;
+
+	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
+	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
+
+	if (wnd > MAX_RCV_WND)
+		wnd = MAX_RCV_WND;
+
+/*
+ * Check if we need to grow the receive window in response to an increase in
+ * the socket's receive buffer size.  Some applications increase the buffer
+ * size dynamically and rely on the window to grow accordingly.
+ */
+
+	if (wnd > tp->rcv_wnd) {
+		tp->rcv_wup -= wnd - tp->rcv_wnd;
+		tp->rcv_wnd = wnd;
+		/* Mark the receive window as updated */
+		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
+	}
+}
+
+/*
+ * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
+ * to return without sending the message in case we cannot allocate
+ * an sk_buff.  Returns the number of credits sent.
+ */
+static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
+{
+	struct cpl_rx_data_ack *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
+	if (!skb)
+		return 0;
+	__skb_put(skb, sizeof(*req));
+	req = (struct cpl_rx_data_ack *)skb->head;
+
+	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
+	INIT_TP_WR(req, csk->tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
+						    csk->tid));
+	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
+				       RX_FORCE_ACK_F);
+	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
+	return credits;
+}
+
+#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
+			     TCPF_FIN_WAIT1 | \
+			     TCPF_FIN_WAIT2)
+
+/*
+ * Called after some received data has been read.  It returns RX credits
+ * to the HW for the amount of data processed.
+ */
+static void chtls_cleanup_rbuf(struct sock *sk, int copied)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp;
+	int must_send;
+	u32 credits;
+	u32 thres;
+
+	thres = 15 * 1024;
+
+	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
+		return;
+
+	chtls_select_window(sk);
+	tp = tcp_sk(sk);
+	credits = tp->copied_seq - tp->rcv_wup;
+	if (unlikely(!credits))
+		return;
+
+/*
+ * For coalescing to work effectively ensure the receive window has
+ * at least 16KB left.
+ */
+	must_send = credits + 16384 >= tp->rcv_wnd;
+
+	if (must_send || credits >= thres)
+		tp->rcv_wup += send_rx_credits(csk, credits);
+}
+
+static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			    int flags, int *addr_len)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *hws = &csk->tlshws;
+	struct net_device *dev = csk->egress_dev;
+	struct adapter *adap = netdev2adap(dev);
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned long avail;
+	int buffers_freed;
+	int copied = 0;
+	int target;
+	long timeo;
+
+	buffers_freed = 0;
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset = 0;
+
+		if (unlikely(tp->urg_data &&
+			     tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+
+			if (!timeo)
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+		continue;
+found_ok_skb:
+		if (!skb->len) {
+			skb_dst_set(skb, NULL);
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			kfree_skb(skb);
+
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target) {
+				release_sock(sk);
+				lock_sock(sk);
+				continue;
+			}
+			break;
+		}
+		offset = hws->copied_seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					/* First byte is urgent, skip */
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+		/* Set record type if not already done. For a non-data record,
+		 * do not proceed if record type could not be copied.
+		 */
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
+			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
+			int cerr = 0;
+
+			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					sizeof(thdr->type), &thdr->type);
+
+			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
+				copied = -EIO;
+				break;
+			}
+			/*  don't send tls header, skip copy */
+			goto skip_copy;
+		}
+
+		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
+			if (!copied) {
+				copied = -EFAULT;
+				break;
+			}
+		}
+
+		copied += avail;
+		len -= avail;
+		hws->copied_seq += avail;
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if ((avail + offset) >= skb->len) {
+			struct sk_buff *next_skb;
+			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
+				tp->copied_seq += skb->len;
+				hws->rcvpld = skb->hdr_len;
+			} else {
+				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
+				tp->copied_seq += hws->rcvpld;
+			}
+			chtls_free_skb(sk, skb);
+			buffers_freed++;
+			hws->copied_seq = 0;
+			next_skb = skb_peek(&sk->sk_receive_queue);
+			if (copied >= target && !next_skb)
+				break;
+			if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
+				break;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+	release_sock(sk);
+	return copied;
+}
+
+/*
+ * Peek at data in a socket's receive buffer.
+ */
+static int peekmsg(struct sock *sk, struct msghdr *msg,
+		   size_t len, int flags)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 peek_seq, offset;
+	struct sk_buff *skb;
+	int copied = 0;
+	size_t avail;          /* amount of available data in current skb */
+	long timeo;
+
+	lock_sock(sk);
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	peek_seq = tp->copied_seq;
+
+	do {
+		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+				-EAGAIN;
+				break;
+			}
+		}
+
+		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			offset = peek_seq - ULP_SKB_CB(skb)->seq;
+			if (offset < skb->len)
+				goto found_ok_skb;
+		}
+
+		/* empty receive queue */
+		if (copied)
+			break;
+		if (sock_flag(sk, SOCK_DONE))
+			break;
+		if (sk->sk_err) {
+			copied = sock_error(sk);
+			break;
+		}
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		if (sk->sk_state == TCP_CLOSE) {
+			copied = -ENOTCONN;
+			break;
+		}
+		if (!timeo) {
+			copied = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			copied = sock_intr_errno(timeo);
+			break;
+		}
+
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			/* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else {
+			sk_wait_data(sk, &timeo, NULL);
+		}
+
+		if (unlikely(peek_seq != tp->copied_seq)) {
+			if (net_ratelimit())
+				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
+					current->comm, current->pid);
+			peek_seq = tp->copied_seq;
+		}
+		continue;
+
+found_ok_skb:
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+		/*
+		 * Do we have urgent data here?  We need to skip over the
+		 * urgent byte.
+		 */
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - peek_seq;
+
+			if (urg_offset < avail) {
+				/*
+				 * The amount of data we are preparing to copy
+				 * contains urgent data.
+				 */
+				if (!urg_offset) { /* First byte is urgent */
+					if (!sock_flag(sk, SOCK_URGINLINE)) {
+						peek_seq++;
+						offset++;
+						avail--;
+					}
+					if (!avail)
+						continue;
+				} else {
+					/* stop short of the urgent data */
+					avail = urg_offset;
+				}
+			}
+		}
+
+		/*
+		 * If MSG_TRUNC is specified the data is discarded.
+		 */
+		if (likely(!(flags & MSG_TRUNC)))
+			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		peek_seq += avail;
+		copied += avail;
+		len -= avail;
+	} while (len > 0);
+
+	release_sock(sk);
+	return copied;
+}
+
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		  int flags, int *addr_len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct chtls_sock *csk;
+	unsigned long avail;    /* amount of available data in current skb */
+	int buffers_freed;
+	int copied = 0;
+	long timeo;
+	int target;             /* Read at least this many bytes */
+
+	buffers_freed = 0;
+
+	if (unlikely(flags & MSG_OOB))
+		return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
+
+	if (unlikely(flags & MSG_PEEK))
+		return peekmsg(sk, msg, len, flags);
+
+	if (sk_can_busy_loop(sk) &&
+	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+	    sk->sk_state == TCP_ESTABLISHED)
+		sk_busy_loop(sk, flags & MSG_DONTWAIT);
+
+	lock_sock(sk);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (is_tls_rx(csk))
+		return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset;
+
+		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+
+		if (READ_ONCE(sk->sk_backlog.tail)) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+		continue;
+
+found_ok_skb:
+		if (!skb->len) {
+			chtls_kfree_skb(sk, skb);
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target)
+				continue;
+
+			break;
+		}
+
+		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+
+		if (likely(!(flags & MSG_TRUNC))) {
+			if (skb_copy_datagram_msg(skb, offset,
+						  msg, avail)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		}
+
+		tp->copied_seq += avail;
+		copied += avail;
+		len -= avail;
+
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if (avail + offset >= skb->len) {
+			chtls_free_skb(sk, skb);
+			buffers_freed++;
+
+			if  (copied >= target &&
+			     !skb_peek(&sk->sk_receive_queue))
+				break;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+
+	release_sock(sk);
+	return copied;
+}
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
new file mode 100644
index 000000000000..1e55b12fee51
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/tcp.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+#define DRV_NAME "chtls"
+
+/*
+ * chtls device management
+ * maintains a list of the chtls devices
+ */
+static LIST_HEAD(cdev_list);
+static DEFINE_MUTEX(cdev_mutex);
+
+static DEFINE_MUTEX(notify_mutex);
+static RAW_NOTIFIER_HEAD(listen_notify_list);
+static struct proto chtls_cpl_prot, chtls_cpl_protv6;
+struct request_sock_ops chtls_rsk_ops, chtls_rsk_opsv6;
+static uint send_page_order = (14 - PAGE_SHIFT < 0) ? 0 : 14 - PAGE_SHIFT;
+
+static void register_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_register(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static void unregister_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_unregister(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static int listen_notify_handler(struct notifier_block *this,
+				 unsigned long event, void *data)
+{
+	struct chtls_listen *clisten;
+	int ret = NOTIFY_DONE;
+
+	clisten = (struct chtls_listen *)data;
+
+	switch (event) {
+	case CHTLS_LISTEN_START:
+		ret = chtls_listen_start(clisten->cdev, clisten->sk);
+		kfree(clisten);
+		break;
+	case CHTLS_LISTEN_STOP:
+		chtls_listen_stop(clisten->cdev, clisten->sk);
+		kfree(clisten);
+		break;
+	}
+	return ret;
+}
+
+static struct notifier_block listen_notifier = {
+	.notifier_call = listen_notify_handler
+};
+
+static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (likely(skb_transport_header(skb) != skb_network_header(skb)))
+		return tcp_v4_do_rcv(sk, skb);
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static int chtls_start_listen(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct chtls_listen *clisten;
+
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return -EPROTONOSUPPORT;
+
+	if (sk->sk_family == PF_INET &&
+	    LOOPBACK(inet_sk(sk)->inet_rcv_saddr))
+		return -EADDRNOTAVAIL;
+
+	sk->sk_backlog_rcv = listen_backlog_rcv;
+	clisten = kmalloc(sizeof(*clisten), GFP_KERNEL);
+	if (!clisten)
+		return -ENOMEM;
+	clisten->cdev = cdev;
+	clisten->sk = sk;
+	mutex_lock(&notify_mutex);
+	raw_notifier_call_chain(&listen_notify_list,
+				      CHTLS_LISTEN_START, clisten);
+	mutex_unlock(&notify_mutex);
+	return 0;
+}
+
+static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct chtls_listen *clisten;
+
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return;
+
+	clisten = kmalloc(sizeof(*clisten), GFP_KERNEL);
+	if (!clisten)
+		return;
+	clisten->cdev = cdev;
+	clisten->sk = sk;
+	mutex_lock(&notify_mutex);
+	raw_notifier_call_chain(&listen_notify_list,
+				CHTLS_LISTEN_STOP, clisten);
+	mutex_unlock(&notify_mutex);
+}
+
+static int chtls_inline_feature(struct tls_toe_device *dev)
+{
+	struct net_device *netdev;
+	struct chtls_dev *cdev;
+	int i;
+
+	cdev = to_chtls_dev(dev);
+
+	for (i = 0; i < cdev->lldi->nports; i++) {
+		netdev = cdev->ports[i];
+		if (netdev->features & NETIF_F_HW_TLS_RECORD)
+			return 1;
+	}
+	return 0;
+}
+
+static int chtls_create_hash(struct tls_toe_device *dev, struct sock *sk)
+{
+	struct chtls_dev *cdev = to_chtls_dev(dev);
+
+	if (sk->sk_state == TCP_LISTEN)
+		return chtls_start_listen(cdev, sk);
+	return 0;
+}
+
+static void chtls_destroy_hash(struct tls_toe_device *dev, struct sock *sk)
+{
+	struct chtls_dev *cdev = to_chtls_dev(dev);
+
+	if (sk->sk_state == TCP_LISTEN)
+		chtls_stop_listen(cdev, sk);
+}
+
+static void chtls_free_uld(struct chtls_dev *cdev)
+{
+	int i;
+
+	tls_toe_unregister_device(&cdev->tlsdev);
+	kvfree(cdev->kmap.addr);
+	idr_destroy(&cdev->hwtid_idr);
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
+		kfree_skb(cdev->rspq_skb_cache[i]);
+	kfree(cdev->lldi);
+	kfree_skb(cdev->askb);
+	kfree(cdev);
+}
+
+static inline void chtls_dev_release(struct kref *kref)
+{
+	struct tls_toe_device *dev;
+	struct chtls_dev *cdev;
+	struct adapter *adap;
+
+	dev = container_of(kref, struct tls_toe_device, kref);
+	cdev = to_chtls_dev(dev);
+
+	/* Reset tls rx/tx stats */
+	adap = pci_get_drvdata(cdev->pdev);
+	atomic_set(&adap->chcr_stats.tls_pdu_tx, 0);
+	atomic_set(&adap->chcr_stats.tls_pdu_rx, 0);
+
+	chtls_free_uld(cdev);
+}
+
+static void chtls_register_dev(struct chtls_dev *cdev)
+{
+	struct tls_toe_device *tlsdev = &cdev->tlsdev;
+
+	strscpy(tlsdev->name, "chtls", TLS_TOE_DEVICE_NAME_MAX);
+	strlcat(tlsdev->name, cdev->lldi->ports[0]->name,
+		TLS_TOE_DEVICE_NAME_MAX);
+	tlsdev->feature = chtls_inline_feature;
+	tlsdev->hash = chtls_create_hash;
+	tlsdev->unhash = chtls_destroy_hash;
+	tlsdev->release = chtls_dev_release;
+	kref_init(&tlsdev->kref);
+	tls_toe_register_device(tlsdev);
+	cdev->cdev_state = CHTLS_CDEV_STATE_UP;
+}
+
+static void process_deferq(struct work_struct *task_param)
+{
+	struct chtls_dev *cdev = container_of(task_param,
+				struct chtls_dev, deferq_task);
+	struct sk_buff *skb;
+
+	spin_lock_bh(&cdev->deferq.lock);
+	while ((skb = __skb_dequeue(&cdev->deferq)) != NULL) {
+		spin_unlock_bh(&cdev->deferq.lock);
+		DEFERRED_SKB_CB(skb)->handler(cdev, skb);
+		spin_lock_bh(&cdev->deferq.lock);
+	}
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static int chtls_get_skb(struct chtls_dev *cdev)
+{
+	cdev->askb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
+	if (!cdev->askb)
+		return -ENOMEM;
+
+	skb_put(cdev->askb, sizeof(struct tcphdr));
+	skb_reset_transport_header(cdev->askb);
+	memset(cdev->askb->data, 0, cdev->askb->len);
+	return 0;
+}
+
+static void *chtls_uld_add(const struct cxgb4_lld_info *info)
+{
+	struct cxgb4_lld_info *lldi;
+	struct chtls_dev *cdev;
+	int i, j;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		goto out;
+
+	lldi = kzalloc(sizeof(*lldi), GFP_KERNEL);
+	if (!lldi)
+		goto out_lldi;
+
+	if (chtls_get_skb(cdev))
+		goto out_skb;
+
+	*lldi = *info;
+	cdev->lldi = lldi;
+	cdev->pdev = lldi->pdev;
+	cdev->tids = lldi->tids;
+	cdev->ports = lldi->ports;
+	cdev->mtus = lldi->mtus;
+	cdev->tids = lldi->tids;
+	cdev->pfvf = FW_VIID_PFN_G(cxgb4_port_viid(lldi->ports[0]))
+			<< FW_VIID_PFN_S;
+
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) {
+		unsigned int size = 64 - sizeof(struct rsp_ctrl) - 8;
+
+		cdev->rspq_skb_cache[i] = __alloc_skb(size,
+						      gfp_any(), 0,
+						      lldi->nodeid);
+		if (unlikely(!cdev->rspq_skb_cache[i]))
+			goto out_rspq_skb;
+	}
+
+	idr_init(&cdev->hwtid_idr);
+	INIT_WORK(&cdev->deferq_task, process_deferq);
+	spin_lock_init(&cdev->listen_lock);
+	spin_lock_init(&cdev->idr_lock);
+	cdev->send_page_order = min_t(uint, get_order(32768),
+				      send_page_order);
+	cdev->max_host_sndbuf = 48 * 1024;
+
+	if (lldi->vr->key.size)
+		if (chtls_init_kmap(cdev, lldi))
+			goto out_rspq_skb;
+
+	mutex_lock(&cdev_mutex);
+	list_add_tail(&cdev->list, &cdev_list);
+	mutex_unlock(&cdev_mutex);
+
+	return cdev;
+out_rspq_skb:
+	for (j = 0; j < i; j++)
+		kfree_skb(cdev->rspq_skb_cache[j]);
+	kfree_skb(cdev->askb);
+out_skb:
+	kfree(lldi);
+out_lldi:
+	kfree(cdev);
+out:
+	return NULL;
+}
+
+static void chtls_free_all_uld(void)
+{
+	struct chtls_dev *cdev, *tmp;
+
+	mutex_lock(&cdev_mutex);
+	list_for_each_entry_safe(cdev, tmp, &cdev_list, list) {
+		if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) {
+			list_del(&cdev->list);
+			kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release);
+		}
+	}
+	mutex_unlock(&cdev_mutex);
+}
+
+static int chtls_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct chtls_dev *cdev = handle;
+
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		chtls_register_dev(cdev);
+		break;
+	case CXGB4_STATE_DOWN:
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+		break;
+	case CXGB4_STATE_DETACH:
+		mutex_lock(&cdev_mutex);
+		list_del(&cdev->list);
+		mutex_unlock(&cdev_mutex);
+		kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
+					  const __be64 *rsp,
+					  u32 pktshift)
+{
+	struct sk_buff *skb;
+
+	/* Allocate space for cpl_pass_accpet_req which will be synthesized by
+	 * driver. Once driver synthesizes cpl_pass_accpet_req the skb will go
+	 * through the regular cpl_pass_accept_req processing in TOM.
+	 */
+	skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req)
+			- pktshift, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+	__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req)
+		   - pktshift);
+	/* For now we will copy  cpl_rx_pkt in the skb */
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_rx_pkt));
+	skb_copy_to_linear_data_offset(skb, sizeof(struct cpl_pass_accept_req)
+				       , gl->va + pktshift,
+				       gl->tot_len - pktshift);
+
+	return skb;
+}
+
+static int chtls_recv_packet(struct chtls_dev *cdev,
+			     const struct pkt_gl *gl, const __be64 *rsp)
+{
+	unsigned int opcode = *(u8 *)rsp;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = copy_gl_to_skb_pkt(gl, rsp, cdev->lldi->sge_pktshift);
+	if (!skb)
+		return -ENOMEM;
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+
+	return 0;
+}
+
+static int chtls_recv_rsp(struct chtls_dev *cdev, const __be64 *rsp)
+{
+	unsigned long rspq_bin;
+	unsigned int opcode;
+	struct sk_buff *skb;
+	unsigned int len;
+	int ret;
+
+	len = 64 - sizeof(struct rsp_ctrl) - 8;
+	opcode = *(u8 *)rsp;
+
+	rspq_bin = hash_ptr((void *)rsp, RSPQ_HASH_BITS);
+	skb = cdev->rspq_skb_cache[rspq_bin];
+	if (skb && !skb_is_nonlinear(skb) &&
+	    !skb_shared(skb) && !skb_cloned(skb)) {
+		refcount_inc(&skb->users);
+		if (refcount_read(&skb->users) == 2) {
+			__skb_trim(skb, 0);
+			if (skb_tailroom(skb) >= len)
+				goto copy_out;
+		}
+		refcount_dec(&skb->users);
+	}
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+copy_out:
+	__skb_put(skb, len);
+	skb_copy_to_linear_data(skb, rsp, len);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	ret = chtls_handlers[opcode](cdev, skb);
+
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+	return 0;
+}
+
+static void chtls_recv(struct chtls_dev *cdev,
+		       struct sk_buff **skbs, const __be64 *rsp)
+{
+	struct sk_buff *skb = *skbs;
+	unsigned int opcode;
+	int ret;
+
+	opcode = *(u8 *)rsp;
+
+	__skb_push(skb, sizeof(struct rss_header));
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct rss_header));
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+}
+
+static int chtls_uld_rx_handler(void *handle, const __be64 *rsp,
+				const struct pkt_gl *gl)
+{
+	struct chtls_dev *cdev = handle;
+	unsigned int opcode;
+	struct sk_buff *skb;
+
+	opcode = *(u8 *)rsp;
+
+	if (unlikely(opcode == CPL_RX_PKT)) {
+		if (chtls_recv_packet(cdev, gl, rsp) < 0)
+			goto nomem;
+		return 0;
+	}
+
+	if (!gl)
+		return chtls_recv_rsp(cdev, rsp);
+
+#define RX_PULL_LEN 128
+	skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
+	if (unlikely(!skb))
+		goto nomem;
+	chtls_recv(cdev, &skb, rsp);
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+static int do_chtls_getsockopt(struct sock *sk, char __user *optval,
+			       int __user *optlen)
+{
+	struct tls_crypto_info crypto_info = { 0 };
+
+	crypto_info.version = TLS_1_2_VERSION;
+	if (copy_to_user(optval, &crypto_info, sizeof(struct tls_crypto_info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int chtls_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->sk_proto->getsockopt(sk, level,
+						 optname, optval, optlen);
+
+	return do_chtls_getsockopt(sk, optval, optlen);
+}
+
+static int do_chtls_setsockopt(struct sock *sk, int optname,
+			       sockptr_t optval, unsigned int optlen)
+{
+	struct tls_crypto_info *crypto_info, tmp_crypto_info;
+	struct chtls_sock *csk;
+	int keylen;
+	int cipher_type;
+	int rc = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (sockptr_is_null(optval) || optlen < sizeof(*crypto_info)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = copy_from_sockptr(&tmp_crypto_info, optval, sizeof(*crypto_info));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* check version */
+	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+		rc = -ENOTSUPP;
+		goto out;
+	}
+
+	crypto_info = (struct tls_crypto_info *)&csk->tlshws.crypto_info;
+
+	/* GCM mode of AES supports 128 and 256 bit encryption, so
+	 * copy keys from user based on GCM cipher type.
+	 */
+	switch (tmp_crypto_info.cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		/* Obtain version and type from previous copy */
+		crypto_info[0] = tmp_crypto_info;
+		/* Now copy the following data */
+		rc = copy_from_sockptr_offset((char *)crypto_info +
+				sizeof(*crypto_info),
+				optval, sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_128)
+				- sizeof(*crypto_info));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		cipher_type = TLS_CIPHER_AES_GCM_128;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		crypto_info[0] = tmp_crypto_info;
+		rc = copy_from_sockptr_offset((char *)crypto_info +
+				sizeof(*crypto_info),
+				optval, sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_256)
+				- sizeof(*crypto_info));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
+		cipher_type = TLS_CIPHER_AES_GCM_256;
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out;
+	}
+	rc = chtls_setkey(csk, keylen, optname, cipher_type);
+out:
+	return rc;
+}
+
+static int chtls_setsockopt(struct sock *sk, int level, int optname,
+			    sockptr_t optval, unsigned int optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->sk_proto->setsockopt(sk, level,
+						 optname, optval, optlen);
+
+	return do_chtls_setsockopt(sk, optname, optval, optlen);
+}
+
+static struct cxgb4_uld_info chtls_uld_info = {
+	.name = DRV_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.ntxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.add = chtls_uld_add,
+	.state_change = chtls_uld_state_change,
+	.rx_handler = chtls_uld_rx_handler,
+};
+
+void chtls_install_cpl_ops(struct sock *sk)
+{
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &chtls_cpl_prot;
+	else
+		sk->sk_prot = &chtls_cpl_protv6;
+}
+
+static void __init chtls_init_ulp_ops(void)
+{
+	chtls_cpl_prot			= tcp_prot;
+	chtls_init_rsk_ops(&chtls_cpl_prot, &chtls_rsk_ops,
+			   &tcp_prot, PF_INET);
+	chtls_cpl_prot.close		= chtls_close;
+	chtls_cpl_prot.disconnect	= chtls_disconnect;
+	chtls_cpl_prot.destroy		= chtls_destroy_sock;
+	chtls_cpl_prot.shutdown		= chtls_shutdown;
+	chtls_cpl_prot.sendmsg		= chtls_sendmsg;
+	chtls_cpl_prot.sendpage		= chtls_sendpage;
+	chtls_cpl_prot.recvmsg		= chtls_recvmsg;
+	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
+	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
+#if IS_ENABLED(CONFIG_IPV6)
+	chtls_cpl_protv6		= chtls_cpl_prot;
+	chtls_init_rsk_ops(&chtls_cpl_protv6, &chtls_rsk_opsv6,
+			   &tcpv6_prot, PF_INET6);
+#endif
+}
+
+static int __init chtls_register(void)
+{
+	chtls_init_ulp_ops();
+	register_listen_notifier(&listen_notifier);
+	cxgb4_register_uld(CXGB4_ULD_TLS, &chtls_uld_info);
+	return 0;
+}
+
+static void __exit chtls_unregister(void)
+{
+	unregister_listen_notifier(&listen_notifier);
+	chtls_free_all_uld();
+	cxgb4_unregister_uld(CXGB4_ULD_TLS);
+}
+
+module_init(chtls_register);
+module_exit(chtls_unregister);
+
+MODULE_DESCRIPTION("Chelsio TLS Inline driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(CHTLS_DRV_VERSION);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index d04a6c163445..da8d10475a08 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -32,6 +32,7 @@
 
 #include <linux/tcp.h>
 #include <linux/ipv6.h>
+#include <net/inet_ecn.h>
 #include <net/route.h>
 #include <net/ip6_route.h>
 
@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
 
 	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
 				   peer_port, local_port, IPPROTO_TCP,
-				   tos, 0);
+				   tos & ~INET_ECN_MASK, 0);
 	if (IS_ERR(rt))
 		return NULL;
 	n = dst_neigh_lookup(&rt->dst, &peer_ip);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index 21034536c9c5..854d87e1125c 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -35,7 +35,6 @@
  */
 
 #define DRV_NAME "libcxgb"
-#define DRV_VERSION "1.0.0-ko"
 #define pr_fmt(fmt) DRV_NAME ": " fmt
 
 #include <linux/kernel.h>
@@ -530,5 +529,4 @@ EXPORT_SYMBOL(cxgbi_tagmask_set);
 
 MODULE_AUTHOR("Chelsio Communications");
 MODULE_DESCRIPTION("Chelsio common library");
-MODULE_VERSION(DRV_VERSION);
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index 7b02c200dd1e..1b4156461ba1 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
@@ -122,7 +122,7 @@ struct cxgbi_ppm_pool {
 	unsigned int base;		/* base index */
 	unsigned int next;		/* next possible free index */
 	spinlock_t lock;		/* ppm pool lock */
-	unsigned long bmap[0];
+	unsigned long bmap[];
 } ____cacheline_aligned_in_smp;
 
 struct cxgbi_ppm {
@@ -145,7 +145,7 @@ struct cxgbi_ppm {
 	unsigned int next;
 	unsigned int max_index_in_edram;
 	unsigned long *ppod_bmap;
-	struct cxgbi_ppod_data ppod_data[0];
+	struct cxgbi_ppod_data ppod_data[];
 };
 
 #define DDP_THRESHOLD		512