332 files changed, 21316 insertions, 9011 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index e8e9c166185d..4ded81b27d0a 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -78,7 +78,6 @@ source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
 source "drivers/net/ethernet/google/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
-source "drivers/net/ethernet/hp/Kconfig"
 source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 05abebc17804..f8f38dcb5f8a 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
 obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/
 obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
-obj-$(CONFIG_NET_VENDOR_HP) += hp/
 obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
 obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index bb032be7fe31..4cd53fc338b5 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -730,12 +730,12 @@ static int altera_tse_phy_get_addr_mdio_create(struct net_device *dev)
 {
 	struct altera_tse_private *priv = netdev_priv(dev);
 	struct device_node *np = priv->device->of_node;
-	int ret = 0;
+	int ret;
 
-	priv->phy_iface = of_get_phy_mode(np);
+	ret = of_get_phy_mode(np, &priv->phy_iface);
 
 	/* Avoid get phy addr and create mdio if no phy is present */
-	if (!priv->phy_iface)
+	if (ret)
 		return 0;
 
 	/* try to get PHY address from device tree, use PHY autodetection if
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 16553d92fad2..a3250dcf7d53 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -133,7 +133,7 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
 	u64 *ptr;
 	int i, j;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		/* Tx stats */
 		ring = &adapter->tx_ring[i];
 
@@ -205,7 +205,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
 	if (sset != ETH_SS_STATS)
 		return -EOPNOTSUPP;
 
-	return  adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+	return  adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
 		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
 }
 
@@ -214,7 +214,7 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 	const struct ena_stats *ena_stats;
 	int i, j;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		/* Tx stats */
 		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
 			ena_stats = &ena_stats_tx_strings[j];
@@ -333,7 +333,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
 
 	val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		adapter->tx_ring[i].smoothed_interval = val;
 }
 
@@ -344,7 +344,7 @@ static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter)
 
 	val = ena_com_get_nonadaptive_moderation_interval_rx(adapter->ena_dev);
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		adapter->rx_ring[i].smoothed_interval = val;
 }
 
@@ -612,7 +612,7 @@ static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
-		info->data = adapter->num_queues;
+		info->data = adapter->num_io_queues;
 		rc = 0;
 		break;
 	case ETHTOOL_GRXFH:
@@ -734,14 +734,20 @@ static void ena_get_channels(struct net_device *netdev,
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
 
-	channels->max_rx = adapter->num_queues;
-	channels->max_tx = adapter->num_queues;
-	channels->max_other = 0;
-	channels->max_combined = 0;
-	channels->rx_count = adapter->num_queues;
-	channels->tx_count = adapter->num_queues;
-	channels->other_count = 0;
-	channels->combined_count = 0;
+	channels->max_combined = adapter->max_num_io_queues;
+	channels->combined_count = adapter->num_io_queues;
+}
+
+static int ena_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *channels)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	u32 count = channels->combined_count;
+	/* The check for max value is already done in ethtool */
+	if (count < ENA_MIN_NUM_IO_QUEUES)
+		return -EINVAL;
+
+	return ena_update_queue_count(adapter, count);
 }
 
 static int ena_get_tunable(struct net_device *netdev,
@@ -807,6 +813,7 @@ static const struct ethtool_ops ena_ethtool_ops = {
 	.get_rxfh		= ena_get_rxfh,
 	.set_rxfh		= ena_set_rxfh,
 	.get_channels		= ena_get_channels,
+	.set_channels		= ena_set_channels,
 	.get_tunable		= ena_get_tunable,
 	.set_tunable		= ena_set_tunable,
 };
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c487d2a7d6dd..d46a912002ff 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -101,7 +101,7 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		adapter->rx_ring[i].mtu = mtu;
 }
 
@@ -129,10 +129,10 @@ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 	u32 i;
 	int rc;
 
-	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
+	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
 	if (!adapter->netdev->rx_cpu_rmap)
 		return -ENOMEM;
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		int irq_idx = ENA_IO_IRQ_IDX(i);
 
 		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
@@ -172,7 +172,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 
 	ena_dev = adapter->ena_dev;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		txr = &adapter->tx_ring[i];
 		rxr = &adapter->rx_ring[i];
 
@@ -294,7 +294,7 @@ static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
 {
 	int i, rc = 0;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rc = ena_setup_tx_resources(adapter, i);
 		if (rc)
 			goto err_setup_tx;
@@ -322,7 +322,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		ena_free_tx_resources(adapter, i);
 }
 
@@ -428,7 +428,7 @@ static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 {
 	int i, rc = 0;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rc = ena_setup_rx_resources(adapter, i);
 		if (rc)
 			goto err_setup_rx;
@@ -456,7 +456,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		ena_free_rx_resources(adapter, i);
 }
 
@@ -600,7 +600,7 @@ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 	struct ena_ring *rx_ring;
 	int i, rc, bufs_num;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rx_ring = &adapter->rx_ring[i];
 		bufs_num = rx_ring->ring_size - 1;
 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
@@ -616,7 +616,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		ena_free_rx_bufs(adapter, i);
 }
 
@@ -688,7 +688,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
 	struct ena_ring *tx_ring;
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		tx_ring = &adapter->tx_ring[i];
 		ena_free_tx_bufs(tx_ring);
 	}
@@ -699,7 +699,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
 	u16 ena_qid;
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		ena_qid = ENA_IO_TXQ_IDX(i);
 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 	}
@@ -710,7 +710,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
 	u16 ena_qid;
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		ena_qid = ENA_IO_RXQ_IDX(i);
 		cancel_work_sync(&adapter->ena_napi[i].dim.work);
 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
@@ -1331,7 +1331,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
  * the number of potential io queues is the minimum of what the device
  * supports and the number of vCPUs.
  */
-static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
+static int ena_enable_msix(struct ena_adapter *adapter)
 {
 	int msix_vecs, irq_cnt;
 
@@ -1342,7 +1342,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
 	}
 
 	/* Reserved the max msix vectors we might need */
-	msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
+	msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues);
 	netif_dbg(adapter, probe, adapter->netdev,
 		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
 
@@ -1359,7 +1359,7 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
 		netif_notice(adapter, probe, adapter->netdev,
 			     "enable only %d MSI-X (out of %d), reduce the number of queues\n",
 			     irq_cnt, msix_vecs);
-		adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
+		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
 	}
 
 	if (ena_init_rx_cpu_rmap(adapter))
@@ -1397,7 +1397,7 @@ static void ena_setup_io_intr(struct ena_adapter *adapter)
 
 	netdev = adapter->netdev;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		irq_idx = ENA_IO_IRQ_IDX(i);
 		cpu = i % num_online_cpus();
 
@@ -1529,7 +1529,7 @@ static void ena_del_napi(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		netif_napi_del(&adapter->ena_napi[i].napi);
 }
 
@@ -1538,7 +1538,7 @@ static void ena_init_napi(struct ena_adapter *adapter)
 	struct ena_napi *napi;
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		napi = &adapter->ena_napi[i];
 
 		netif_napi_add(adapter->netdev,
@@ -1555,7 +1555,7 @@ static void ena_napi_disable_all(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		napi_disable(&adapter->ena_napi[i].napi);
 }
 
@@ -1563,7 +1563,7 @@ static void ena_napi_enable_all(struct ena_adapter *adapter)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		napi_enable(&adapter->ena_napi[i].napi);
 }
 
@@ -1673,7 +1673,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	int rc, i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rc = ena_create_io_tx_queue(adapter, i);
 		if (rc)
 			goto create_err;
@@ -1741,7 +1741,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	int rc, i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rc = ena_create_io_rx_queue(adapter, i);
 		if (rc)
 			goto create_err;
@@ -1764,7 +1764,7 @@ static void set_io_rings_size(struct ena_adapter *adapter,
 {
 	int i;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		adapter->tx_ring[i].ring_size = new_tx_size;
 		adapter->rx_ring[i].ring_size = new_rx_size;
 	}
@@ -1902,14 +1902,14 @@ static int ena_up(struct ena_adapter *adapter)
 	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 
 	/* Enable completion queues interrupt */
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		ena_unmask_interrupt(&adapter->tx_ring[i],
 				     &adapter->rx_ring[i]);
 
 	/* schedule napi in case we had pending packets
 	 * from the last time we disable napi
 	 */
-	for (i = 0; i < adapter->num_queues; i++)
+	for (i = 0; i < adapter->num_io_queues; i++)
 		napi_schedule(&adapter->ena_napi[i].napi);
 
 	return rc;
@@ -1984,13 +1984,13 @@ static int ena_open(struct net_device *netdev)
 	int rc;
 
 	/* Notify the stack of the actual queue counts. */
-	rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
+	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
 	if (rc) {
 		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
 		return rc;
 	}
 
-	rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
+	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
 	if (rc) {
 		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
 		return rc;
@@ -2043,14 +2043,30 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
 			   u32 new_tx_size,
 			   u32 new_rx_size)
 {
-	bool dev_up;
+	bool dev_was_up;
 
-	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 	ena_close(adapter->netdev);
 	adapter->requested_tx_ring_size = new_tx_size;
 	adapter->requested_rx_ring_size = new_rx_size;
 	ena_init_io_rings(adapter);
-	return dev_up ? ena_up(adapter) : 0;
+	return dev_was_up ? ena_up(adapter) : 0;
+}
+
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	bool dev_was_up;
+
+	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	ena_close(adapter->netdev);
+	adapter->num_io_queues = new_channel_count;
+	/* We need to destroy the rss table so that the indirection
+	 * table will be reinitialized by ena_up()
+	 */
+	ena_com_rss_destroy(ena_dev);
+	ena_init_io_rings(adapter);
+	return dev_was_up ? ena_open(adapter->netdev) : 0;
 }
 
 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
@@ -2495,7 +2511,7 @@ static void ena_get_stats64(struct net_device *netdev,
 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		return;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		u64 bytes, packets;
 
 		tx_ring = &adapter->tx_ring[i];
@@ -2682,14 +2698,13 @@ err_mmio_read_less:
 	return rc;
 }
 
-static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
-						    int io_vectors)
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
 {
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	struct device *dev = &adapter->pdev->dev;
 	int rc;
 
-	rc = ena_enable_msix(adapter, io_vectors);
+	rc = ena_enable_msix(adapter);
 	if (rc) {
 		dev_err(dev, "Can not reserve msix vectors\n");
 		return rc;
@@ -2782,8 +2797,7 @@ static int ena_restore_device(struct ena_adapter *adapter)
 		goto err_device_destroy;
 	}
 
-	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
-						      adapter->num_queues);
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
 	if (rc) {
 		dev_err(&pdev->dev, "Enable MSI-X failed\n");
 		goto err_device_destroy;
@@ -2948,7 +2962,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 
 	budget = ENA_MONITORED_TX_QUEUES;
 
-	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
+	for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) {
 		tx_ring = &adapter->tx_ring[i];
 		rx_ring = &adapter->rx_ring[i];
 
@@ -2965,7 +2979,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 			break;
 	}
 
-	adapter->last_monitored_tx_qid = i % adapter->num_queues;
+	adapter->last_monitored_tx_qid = i % adapter->num_io_queues;
 }
 
 /* trigger napi schedule after 2 consecutive detections */
@@ -2995,7 +3009,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter)
 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
 		return;
 
-	for (i = 0; i < adapter->num_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues; i++) {
 		rx_ring = &adapter->rx_ring[i];
 
 		refill_required =
@@ -3137,16 +3151,16 @@ static void ena_timer_service(struct timer_list *t)
 	mod_timer(&adapter->timer_service, jiffies + HZ);
 }
 
-static int ena_calc_io_queue_num(struct pci_dev *pdev,
-				 struct ena_com_dev *ena_dev,
-				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static int ena_calc_max_io_queue_num(struct pci_dev *pdev,
+				     struct ena_com_dev *ena_dev,
+				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
-	int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
+	int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
 
 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
 			&get_feat_ctx->max_queue_ext.max_queue_ext;
-		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
+		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
 				  max_queue_ext->max_rx_cq_num);
 
 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
@@ -3156,25 +3170,25 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
 			&get_feat_ctx->max_queues;
 		io_tx_sq_num = max_queues->max_sq_num;
 		io_tx_cq_num = max_queues->max_cq_num;
-		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
+		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
 	}
 
 	/* In case of LLQ use the llq fields for the tx SQ/CQ */
 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
 
-	io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
-	io_queue_num = min_t(int, io_queue_num, io_rx_num);
-	io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
-	io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
+	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
+	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
+	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
+	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
 	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
-	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
-	if (unlikely(!io_queue_num)) {
+	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
+	if (unlikely(!max_num_io_queues)) {
 		dev_err(&pdev->dev, "The device doesn't have io queues\n");
 		return -EFAULT;
 	}
 
-	return io_queue_num;
+	return max_num_io_queues;
 }
 
 static int ena_set_queues_placement_policy(struct pci_dev *pdev,
@@ -3302,7 +3316,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter)
 	}
 
 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
-		val = ethtool_rxfh_indir_default(i, adapter->num_queues);
+		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
 						       ENA_IO_RXQ_IDX(val));
 		if (unlikely(rc && (rc != -EOPNOTSUPP))) {
@@ -3349,7 +3363,7 @@ static void set_default_llq_configurations(struct ena_llq_configurations *llq_co
 	llq_config->llq_ring_entry_size_value = 128;
 }
 
-static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
+static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
 {
 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
 	struct ena_com_dev *ena_dev = ctx->ena_dev;
@@ -3358,7 +3372,7 @@ static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
 	u32 max_tx_queue_size;
 	u32 max_rx_queue_size;
 
-	if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
 			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
 		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
@@ -3432,11 +3446,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct ena_llq_configurations llq_config;
 	struct ena_com_dev *ena_dev = NULL;
 	struct ena_adapter *adapter;
-	int io_queue_num, bars, rc;
 	struct net_device *netdev;
 	static int adapters_found;
+	u32 max_num_io_queues;
 	char *queue_type_str;
 	bool wd_state;
+	int bars, rc;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
@@ -3497,27 +3512,20 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	calc_queue_ctx.pdev = pdev;
 
 	/* Initial Tx and RX interrupt delay. Assumes 1 usec granularity.
-	* Updated during device initialization with the real granularity
-	*/
+	 * Updated during device initialization with the real granularity
+	 */
 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
 	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
 	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
-	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
-	rc = ena_calc_queue_size(&calc_queue_ctx);
-	if (rc || io_queue_num <= 0) {
+	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+	rc = ena_calc_io_queue_size(&calc_queue_ctx);
+	if (rc || !max_num_io_queues) {
 		rc = -EFAULT;
 		goto err_device_destroy;
 	}
 
-	dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n",
-		 io_queue_num,
-		 calc_queue_ctx.rx_queue_size,
-		 calc_queue_ctx.tx_queue_size,
-		 (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
-		 "ENABLED" : "DISABLED");
-
 	/* dev zeroed in init_etherdev */
-	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
+	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
 	if (!netdev) {
 		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
 		rc = -ENOMEM;
@@ -3545,7 +3553,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
 
-	adapter->num_queues = io_queue_num;
+	adapter->num_io_queues = max_num_io_queues;
+	adapter->max_num_io_queues = max_num_io_queues;
+
 	adapter->last_monitored_tx_qid = 0;
 
 	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
@@ -3569,7 +3579,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	u64_stats_init(&adapter->syncp);
 
-	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
 	if (rc) {
 		dev_err(&pdev->dev,
 			"Failed to enable and set the admin interrupts\n");
@@ -3611,9 +3621,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		queue_type_str = "Low Latency";
 
 	dev_info(&pdev->dev,
-		 "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n",
+		 "%s found at mem %lx, mac addr %pM, Placement policy: %s\n",
 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
-		 netdev->dev_addr, io_queue_num, queue_type_str);
+		 netdev->dev_addr, queue_type_str);
 
 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 72ee51a82ec7..bffd778f2ce3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -82,6 +82,8 @@
 #define ENA_DEFAULT_RING_SIZE	(1024)
 #define ENA_MIN_RING_SIZE	(256)
 
+#define ENA_MIN_NUM_IO_QUEUES	(1)
+
 #define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
 #define ENA_DEFAULT_RX_COPYBREAK	(256 - NET_IP_ALIGN)
 
@@ -161,10 +163,10 @@ struct ena_calc_queue_size_ctx {
 	struct ena_com_dev_get_features_ctx *get_feat_ctx;
 	struct ena_com_dev *ena_dev;
 	struct pci_dev *pdev;
-	u16 tx_queue_size;
-	u16 rx_queue_size;
-	u16 max_tx_queue_size;
-	u16 max_rx_queue_size;
+	u32 tx_queue_size;
+	u32 rx_queue_size;
+	u32 max_tx_queue_size;
+	u32 max_rx_queue_size;
 	u16 max_tx_sgl_size;
 	u16 max_rx_sgl_size;
 };
@@ -324,7 +326,8 @@ struct ena_adapter {
 	u32 rx_copybreak;
 	u32 max_mtu;
 
-	int num_queues;
+	u32 num_io_queues;
+	u32 max_num_io_queues;
 
 	int msix_vecs;
 
@@ -387,6 +390,7 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 int ena_update_queue_sizes(struct ena_adapter *adapter,
 			   u32 new_tx_size,
 			   u32 new_rx_size);
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
 
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 131cab855be7..6e0a6e234483 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -4,15 +4,8 @@
 # aQuantia Ethernet Controller AQtion Linux Driver
 # Copyright(c) 2014-2017 aQuantia Corporation.
 #
-# Contact Information: <rdc-drv@aquantia.com>
-# aQuantia Corporation, 105 E. Tasman Dr. San Jose, CA 95134, USA
-#
 ################################################################################
 
-#
-# Makefile for the AQtion(tm) Ethernet driver
-#
-
 obj-$(CONFIG_AQTION) += atlantic.o
 
 atlantic-objs := aq_main.o \
@@ -24,8 +17,11 @@ atlantic-objs := aq_main.o \
 	aq_ethtool.o \
 	aq_drvinfo.o \
 	aq_filters.o \
+	aq_phy.o \
 	hw_atl/hw_atl_a0.o \
 	hw_atl/hw_atl_b0.o \
 	hw_atl/hw_atl_utils.o \
 	hw_atl/hw_atl_utils_fw2x.o \
 	hw_atl/hw_atl_llh.o
+
+atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o
+\ No newline at end of file
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 02f1b70c4e25..f0c41f7408e5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_cfg.h: Definition of configuration parameters and constants. */
@@ -27,7 +27,7 @@
 
 #define AQ_CFG_INTERRUPT_MODERATION_USEC_MAX (0x1FF * 2)
 
-#define AQ_CFG_IRQ_MASK                      0x1FFU
+#define AQ_CFG_IRQ_MASK                      0x3FFU
 
 #define AQ_CFG_VECS_MAX   8U
 #define AQ_CFG_TCS_MAX    8U
@@ -70,14 +70,11 @@
 
 /*#define AQ_CFG_MAC_ADDR_PERMANENT {0x30, 0x0E, 0xE3, 0x12, 0x34, 0x56}*/
 
-#define AQ_NIC_FC_OFF    0U
-#define AQ_NIC_FC_TX     1U
-#define AQ_NIC_FC_RX     2U
-#define AQ_NIC_FC_FULL   3U
-#define AQ_NIC_FC_AUTO   4U
-
 #define AQ_CFG_FC_MODE AQ_NIC_FC_FULL
 
+/* Default WOL modes used on initialization */
+#define AQ_CFG_WOL_MODES WAKE_MAGIC
+
 #define AQ_CFG_SPEED_MSK  0xFFFFU	/* 0xFFFFU==auto_neg */
 
 #define AQ_CFG_IS_AUTONEG_DEF       1U
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 24df132384fb..6353a5c5ed27 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ethtool.c: Definition of ethertool related functions. */
@@ -9,13 +9,18 @@
 #include "aq_ethtool.h"
 #include "aq_nic.h"
 #include "aq_vec.h"
+#include "aq_ptp.h"
 #include "aq_filters.h"
 
+#include <linux/ptp_clock_kernel.h>
+
 static void aq_ethtool_get_regs(struct net_device *ndev,
 				struct ethtool_regs *regs, void *p)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	u32 regs_count = aq_nic_get_regs_count(aq_nic);
+	u32 regs_count;
+
+	regs_count = aq_nic_get_regs_count(aq_nic);
 
 	memset(p, 0, regs_count * sizeof(u32));
 	aq_nic_get_regs(aq_nic, regs, p);
@@ -24,7 +29,9 @@ static void aq_ethtool_get_regs(struct net_device *ndev,
 static int aq_ethtool_get_regs_len(struct net_device *ndev)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	u32 regs_count = aq_nic_get_regs_count(aq_nic);
+	u32 regs_count;
+
+	regs_count = aq_nic_get_regs_count(aq_nic);
 
 	return regs_count * sizeof(u32);
 }
@@ -89,11 +96,21 @@ static const char aq_ethtool_queue_stat_names[][ETH_GSTRING_LEN] = {
 	"Queue[%d] InErrors",
 };
 
+static const char aq_ethtool_priv_flag_names[][ETH_GSTRING_LEN] = {
+	"DMASystemLoopback",
+	"PKTSystemLoopback",
+	"DMANetworkLoopback",
+	"PHYInternalLoopback",
+	"PHYExternalLoopback",
+};
+
 static void aq_ethtool_stats(struct net_device *ndev,
 			     struct ethtool_stats *stats, u64 *data)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
+
+	cfg = aq_nic_get_cfg(aq_nic);
 
 	memset(data, 0, (ARRAY_SIZE(aq_ethtool_stat_names) +
 			 ARRAY_SIZE(aq_ethtool_queue_stat_names) *
@@ -104,11 +121,15 @@ static void aq_ethtool_stats(struct net_device *ndev,
 static void aq_ethtool_get_drvinfo(struct net_device *ndev,
 				   struct ethtool_drvinfo *drvinfo)
 {
-	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
 	struct pci_dev *pdev = to_pci_dev(ndev->dev.parent);
-	u32 firmware_version = aq_nic_get_fw_version(aq_nic);
-	u32 regs_count = aq_nic_get_regs_count(aq_nic);
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *cfg;
+	u32 firmware_version;
+	u32 regs_count;
+
+	cfg = aq_nic_get_cfg(aq_nic);
+	firmware_version = aq_nic_get_fw_version(aq_nic);
+	regs_count = aq_nic_get_regs_count(aq_nic);
 
 	strlcat(drvinfo->driver, AQ_CFG_DRV_NAME, sizeof(drvinfo->driver));
 	strlcat(drvinfo->version, AQ_CFG_DRV_VERSION, sizeof(drvinfo->version));
@@ -129,12 +150,15 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev,
 static void aq_ethtool_get_strings(struct net_device *ndev,
 				   u32 stringset, u8 *data)
 {
-	int i, si;
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 	u8 *p = data;
+	int i, si;
 
-	if (stringset == ETH_SS_STATS) {
+	cfg = aq_nic_get_cfg(aq_nic);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
 		memcpy(p, aq_ethtool_stat_names,
 		       sizeof(aq_ethtool_stat_names));
 		p = p + sizeof(aq_ethtool_stat_names);
@@ -147,23 +171,63 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 				p += ETH_GSTRING_LEN;
 			}
 		}
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(p, aq_ethtool_priv_flag_names,
+		       sizeof(aq_ethtool_priv_flag_names));
+		break;
 	}
 }
 
-static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset)
+static int aq_ethtool_set_phys_id(struct net_device *ndev,
+				  enum ethtool_phys_id_state state)
 {
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_hw_s *hw = aq_nic->aq_hw;
 	int ret = 0;
+
+	if (!aq_nic->aq_fw_ops->led_control)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&aq_nic->fwreq_mutex);
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		ret = aq_nic->aq_fw_ops->led_control(hw, AQ_HW_LED_BLINK |
+				 AQ_HW_LED_BLINK << 2 | AQ_HW_LED_BLINK << 4);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		ret = aq_nic->aq_fw_ops->led_control(hw, AQ_HW_LED_DEFAULT);
+		break;
+	default:
+		break;
+	}
+
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	return ret;
+}
+
+static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset)
+{
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
+	int ret = 0;
+
+	cfg = aq_nic_get_cfg(aq_nic);
 
 	switch (stringset) {
 	case ETH_SS_STATS:
 		ret = ARRAY_SIZE(aq_ethtool_stat_names) +
 			cfg->vecs * ARRAY_SIZE(aq_ethtool_queue_stat_names);
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		ret = ARRAY_SIZE(aq_ethtool_priv_flag_names);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 	}
+
 	return ret;
 }
 
@@ -175,7 +239,9 @@ static u32 aq_ethtool_get_rss_indir_size(struct net_device *ndev)
 static u32 aq_ethtool_get_rss_key_size(struct net_device *ndev)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
+
+	cfg = aq_nic_get_cfg(aq_nic);
 
 	return sizeof(cfg->aq_rss.hash_secret_key);
 }
@@ -184,9 +250,11 @@ static int aq_ethtool_get_rss(struct net_device *ndev, u32 *indir, u8 *key,
 			      u8 *hfunc)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 	unsigned int i = 0U;
 
+	cfg = aq_nic_get_cfg(aq_nic);
+
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
 	if (indir) {
@@ -196,6 +264,7 @@ static int aq_ethtool_get_rss(struct net_device *ndev, u32 *indir, u8 *key,
 	if (key)
 		memcpy(key, cfg->aq_rss.hash_secret_key,
 		       sizeof(cfg->aq_rss.hash_secret_key));
+
 	return 0;
 }
 
@@ -239,9 +308,11 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev,
 				u32 *rule_locs)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 	int err = 0;
 
+	cfg = aq_nic_get_cfg(aq_nic);
+
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
 		cmd->data = cfg->vecs;
@@ -266,8 +337,8 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev,
 static int aq_ethtool_set_rxnfc(struct net_device *ndev,
 				struct ethtool_rxnfc *cmd)
 {
-	int err = 0;
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	int err = 0;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
@@ -288,7 +359,9 @@ static int aq_ethtool_get_coalesce(struct net_device *ndev,
 				   struct ethtool_coalesce *coal)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
+
+	cfg = aq_nic_get_cfg(aq_nic);
 
 	if (cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON ||
 	    cfg->itr == AQ_CFG_INTERRUPT_MODERATION_AUTO) {
@@ -302,6 +375,7 @@ static int aq_ethtool_get_coalesce(struct net_device *ndev,
 		coal->rx_max_coalesced_frames = 1;
 		coal->tx_max_coalesced_frames = 1;
 	}
+
 	return 0;
 }
 
@@ -309,7 +383,9 @@ static int aq_ethtool_set_coalesce(struct net_device *ndev,
 				   struct ethtool_coalesce *coal)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
+
+	cfg = aq_nic_get_cfg(aq_nic);
 
 	/* This is not yet supported
 	 */
@@ -351,13 +427,12 @@ static void aq_ethtool_get_wol(struct net_device *ndev,
 			       struct ethtool_wolinfo *wol)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 
-	wol->supported = WAKE_MAGIC;
-	wol->wolopts = 0;
+	cfg = aq_nic_get_cfg(aq_nic);
 
-	if (cfg->wol)
-		wol->wolopts |= WAKE_MAGIC;
+	wol->supported = AQ_NIC_WOL_MODES;
+	wol->wolopts = cfg->wol;
 }
 
 static int aq_ethtool_set_wol(struct net_device *ndev,
@@ -365,18 +440,50 @@ static int aq_ethtool_set_wol(struct net_device *ndev,
 {
 	struct pci_dev *pdev = to_pci_dev(ndev->dev.parent);
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 	int err = 0;
 
-	if (wol->wolopts & WAKE_MAGIC)
-		cfg->wol |= AQ_NIC_WOL_ENABLED;
-	else
-		cfg->wol &= ~AQ_NIC_WOL_ENABLED;
-	err = device_set_wakeup_enable(&pdev->dev, wol->wolopts);
+	cfg = aq_nic_get_cfg(aq_nic);
+
+	if (wol->wolopts & ~AQ_NIC_WOL_MODES)
+		return -EOPNOTSUPP;
+
+	cfg->wol = wol->wolopts;
+
+	err = device_set_wakeup_enable(&pdev->dev, !!cfg->wol);
 
 	return err;
 }
 
+static int aq_ethtool_get_ts_info(struct net_device *ndev,
+				  struct ethtool_ts_info *info)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	ethtool_op_get_ts_info(ndev, info);
+
+	if (!aq_nic->aq_ptp)
+		return 0;
+
+	info->so_timestamping |=
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
+	info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+	info->phc_index = ptp_clock_index(aq_ptp_get_ptp_clock(aq_nic->aq_ptp));
+
+	return 0;
+}
+
 static enum hw_atl_fw2x_rate eee_mask_to_ethtool_mask(u32 speed)
 {
 	u32 rate = 0;
@@ -481,7 +588,7 @@ static void aq_ethtool_get_pauseparam(struct net_device *ndev,
 				      struct ethtool_pauseparam *pause)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	u32 fc = aq_nic->aq_nic_cfg.flow_control;
+	u32 fc = aq_nic->aq_nic_cfg.fc.req;
 
 	pause->autoneg = 0;
 
@@ -503,14 +610,14 @@ static int aq_ethtool_set_pauseparam(struct net_device *ndev,
 		return -EOPNOTSUPP;
 
 	if (pause->rx_pause)
-		aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_RX;
+		aq_nic->aq_hw->aq_nic_cfg->fc.req |= AQ_NIC_FC_RX;
 	else
-		aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_RX;
+		aq_nic->aq_hw->aq_nic_cfg->fc.req &= ~AQ_NIC_FC_RX;
 
 	if (pause->tx_pause)
-		aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_TX;
+		aq_nic->aq_hw->aq_nic_cfg->fc.req |= AQ_NIC_FC_TX;
 	else
-		aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_TX;
+		aq_nic->aq_hw->aq_nic_cfg->fc.req &= ~AQ_NIC_FC_TX;
 
 	mutex_lock(&aq_nic->fwreq_mutex);
 	err = aq_nic->aq_fw_ops->set_flow_control(aq_nic->aq_hw);
@@ -523,23 +630,28 @@ static void aq_get_ringparam(struct net_device *ndev,
 			     struct ethtool_ringparam *ring)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
+	struct aq_nic_cfg_s *cfg;
 
-	ring->rx_pending = aq_nic_cfg->rxds;
-	ring->tx_pending = aq_nic_cfg->txds;
+	cfg = aq_nic_get_cfg(aq_nic);
+
+	ring->rx_pending = cfg->rxds;
+	ring->tx_pending = cfg->txds;
 
-	ring->rx_max_pending = aq_nic_cfg->aq_hw_caps->rxds_max;
-	ring->tx_max_pending = aq_nic_cfg->aq_hw_caps->txds_max;
+	ring->rx_max_pending = cfg->aq_hw_caps->rxds_max;
+	ring->tx_max_pending = cfg->aq_hw_caps->txds_max;
 }
 
 static int aq_set_ringparam(struct net_device *ndev,
 			    struct ethtool_ringparam *ring)
 {
-	int err = 0;
-	bool ndev_running = false;
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
-	const struct aq_hw_caps_s *hw_caps = aq_nic_cfg->aq_hw_caps;
+	const struct aq_hw_caps_s *hw_caps;
+	bool ndev_running = false;
+	struct aq_nic_cfg_s *cfg;
+	int err = 0;
+
+	cfg = aq_nic_get_cfg(aq_nic);
+	hw_caps = cfg->aq_hw_caps;
 
 	if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
 		err = -EOPNOTSUPP;
@@ -553,18 +665,18 @@ static int aq_set_ringparam(struct net_device *ndev,
 
 	aq_nic_free_vectors(aq_nic);
 
-	aq_nic_cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
-	aq_nic_cfg->rxds = min(aq_nic_cfg->rxds, hw_caps->rxds_max);
-	aq_nic_cfg->rxds = ALIGN(aq_nic_cfg->rxds, AQ_HW_RXD_MULTIPLE);
+	cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
+	cfg->rxds = min(cfg->rxds, hw_caps->rxds_max);
+	cfg->rxds = ALIGN(cfg->rxds, AQ_HW_RXD_MULTIPLE);
 
-	aq_nic_cfg->txds = max(ring->tx_pending, hw_caps->txds_min);
-	aq_nic_cfg->txds = min(aq_nic_cfg->txds, hw_caps->txds_max);
-	aq_nic_cfg->txds = ALIGN(aq_nic_cfg->txds, AQ_HW_TXD_MULTIPLE);
+	cfg->txds = max(ring->tx_pending, hw_caps->txds_min);
+	cfg->txds = min(cfg->txds, hw_caps->txds_max);
+	cfg->txds = ALIGN(cfg->txds, AQ_HW_TXD_MULTIPLE);
 
-	for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < aq_nic_cfg->vecs;
+	for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < cfg->vecs;
 	     aq_nic->aq_vecs++) {
 		aq_nic->aq_vec[aq_nic->aq_vecs] =
-		    aq_vec_alloc(aq_nic, aq_nic->aq_vecs, aq_nic_cfg);
+		    aq_vec_alloc(aq_nic, aq_nic->aq_vecs, cfg);
 		if (unlikely(!aq_nic->aq_vec[aq_nic->aq_vecs])) {
 			err = -ENOMEM;
 			goto err_exit;
@@ -577,12 +689,61 @@ err_exit:
 	return err;
 }
 
+static u32 aq_get_msg_level(struct net_device *ndev)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	return aq_nic->msg_enable;
+}
+
+static void aq_set_msg_level(struct net_device *ndev, u32 data)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	aq_nic->msg_enable = data;
+}
+
+u32 aq_ethtool_get_priv_flags(struct net_device *ndev)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	return aq_nic->aq_nic_cfg.priv_flags;
+}
+
+int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *cfg;
+	u32 priv_flags;
+
+	cfg = aq_nic_get_cfg(aq_nic);
+	priv_flags = cfg->priv_flags;
+
+	if (flags & ~AQ_PRIV_FLAGS_MASK)
+		return -EOPNOTSUPP;
+
+	cfg->priv_flags = flags;
+
+	if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
+		if (netif_running(ndev)) {
+			dev_close(ndev);
+
+			dev_open(ndev, NULL);
+		}
+	} else if ((priv_flags ^ flags) & AQ_HW_LOOPBACK_MASK) {
+		aq_nic_set_loopback(aq_nic);
+	}
+
+	return 0;
+}
+
 const struct ethtool_ops aq_ethtool_ops = {
 	.get_link            = aq_ethtool_get_link,
 	.get_regs_len        = aq_ethtool_get_regs_len,
 	.get_regs            = aq_ethtool_get_regs,
 	.get_drvinfo         = aq_ethtool_get_drvinfo,
 	.get_strings         = aq_ethtool_get_strings,
+	.set_phys_id         = aq_ethtool_set_phys_id,
 	.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
 	.get_wol             = aq_ethtool_get_wol,
 	.set_wol             = aq_ethtool_set_wol,
@@ -598,10 +759,15 @@ const struct ethtool_ops aq_ethtool_ops = {
 	.set_rxfh            = aq_ethtool_set_rss,
 	.get_rxnfc           = aq_ethtool_get_rxnfc,
 	.set_rxnfc           = aq_ethtool_set_rxnfc,
+	.get_msglevel        = aq_get_msg_level,
+	.set_msglevel        = aq_set_msg_level,
 	.get_sset_count      = aq_ethtool_get_sset_count,
 	.get_ethtool_stats   = aq_ethtool_stats,
+	.get_priv_flags      = aq_ethtool_get_priv_flags,
+	.set_priv_flags      = aq_ethtool_set_priv_flags,
 	.get_link_ksettings  = aq_ethtool_get_link_ksettings,
 	.set_link_ksettings  = aq_ethtool_set_link_ksettings,
 	.get_coalesce	     = aq_ethtool_get_coalesce,
 	.set_coalesce	     = aq_ethtool_set_coalesce,
+	.get_ts_info         = aq_ethtool_get_ts_info,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
index 632b5531db4a..6d5be5ebeb13 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.h
@@ -12,5 +12,6 @@
 #include "aq_common.h"
 
 extern const struct ethtool_ops aq_ethtool_ops;
+#define AQ_PRIV_FLAGS_MASK   (AQ_HW_LOOPBACK_MASK)
 
 #endif /* AQ_ETHTOOL_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index aee827f07c16..6102251bb909 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2014-2017 aQuantia Corporation. */
+/* Copyright (C) 2014-2019 aQuantia Corporation. */
 
 /* File aq_filters.c: RX filters related functions. */
 
@@ -89,12 +89,14 @@ static int aq_check_approve_fl3l4(struct aq_nic_s *aq_nic,
 				  struct aq_hw_rx_fltrs_s *rx_fltrs,
 				  struct ethtool_rx_flow_spec *fsp)
 {
+	u32 last_location = AQ_RX_LAST_LOC_FL3L4 -
+			    aq_nic->aq_hw_rx_fltrs.fl3l4.reserved_count;
+
 	if (fsp->location < AQ_RX_FIRST_LOC_FL3L4 ||
-	    fsp->location > AQ_RX_LAST_LOC_FL3L4) {
+	    fsp->location > last_location) {
 		netdev_err(aq_nic->ndev,
 			   "ethtool: location must be in range [%d, %d]",
-			   AQ_RX_FIRST_LOC_FL3L4,
-			   AQ_RX_LAST_LOC_FL3L4);
+			   AQ_RX_FIRST_LOC_FL3L4, last_location);
 		return -EINVAL;
 	}
 	if (rx_fltrs->fl3l4.is_ipv6 && rx_fltrs->fl3l4.active_ipv4) {
@@ -124,12 +126,15 @@ aq_check_approve_fl2(struct aq_nic_s *aq_nic,
 		     struct aq_hw_rx_fltrs_s *rx_fltrs,
 		     struct ethtool_rx_flow_spec *fsp)
 {
+	u32 last_location = AQ_RX_LAST_LOC_FETHERT -
+			    aq_nic->aq_hw_rx_fltrs.fet_reserved_count;
+
 	if (fsp->location < AQ_RX_FIRST_LOC_FETHERT ||
-	    fsp->location > AQ_RX_LAST_LOC_FETHERT) {
+	    fsp->location > last_location) {
 		netdev_err(aq_nic->ndev,
 			   "ethtool: location must be in range [%d, %d]",
 			   AQ_RX_FIRST_LOC_FETHERT,
-			   AQ_RX_LAST_LOC_FETHERT);
+			   last_location);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 53d7478689a0..cc70c606b6ef 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_hw.h: Declaration of abstract interface for NIC hardware specific
@@ -15,6 +15,9 @@
 #include "aq_rss.h"
 #include "hw_atl/hw_atl_utils.h"
 
+#define AQ_HW_MAC_COUNTER_HZ   312500000ll
+#define AQ_HW_PHY_COUNTER_HZ   160000000ll
+
 #define AQ_RX_FIRST_LOC_FVLANID     0U
 #define AQ_RX_LAST_LOC_FVLANID	   15U
 #define AQ_RX_FIRST_LOC_FETHERT    16U
@@ -94,6 +97,7 @@ struct aq_stats_s {
 #define AQ_HW_FLAG_STOPPING    0x00000008U
 #define AQ_HW_FLAG_RESETTING   0x00000010U
 #define AQ_HW_FLAG_CLOSING     0x00000020U
+#define AQ_HW_PTP_AVAILABLE    0x01000000U
 #define AQ_HW_LINK_DOWN        0x04000000U
 #define AQ_HW_FLAG_ERR_UNPLUG  0x40000000U
 #define AQ_HW_FLAG_ERR_HW      0x80000000U
@@ -115,6 +119,23 @@ struct aq_stats_s {
 
 #define AQ_HW_MULTICAST_ADDRESS_MAX     32U
 
+#define AQ_HW_LED_BLINK    0x2U
+#define AQ_HW_LED_DEFAULT  0x0U
+
+enum aq_priv_flags {
+	AQ_HW_LOOPBACK_DMA_SYS,
+	AQ_HW_LOOPBACK_PKT_SYS,
+	AQ_HW_LOOPBACK_DMA_NET,
+	AQ_HW_LOOPBACK_PHYINT_SYS,
+	AQ_HW_LOOPBACK_PHYEXT_SYS,
+};
+
+#define AQ_HW_LOOPBACK_MASK	(BIT(AQ_HW_LOOPBACK_DMA_SYS) |\
+				 BIT(AQ_HW_LOOPBACK_PKT_SYS) |\
+				 BIT(AQ_HW_LOOPBACK_DMA_NET) |\
+				 BIT(AQ_HW_LOOPBACK_PHYINT_SYS) |\
+				 BIT(AQ_HW_LOOPBACK_PHYEXT_SYS))
+
 struct aq_hw_s {
 	atomic_t flags;
 	u8 rbl_enabled:1;
@@ -133,8 +154,11 @@ struct aq_hw_s {
 	atomic_t dpc;
 	u32 mbox_addr;
 	u32 rpc_addr;
+	u32 settings_addr;
 	u32 rpc_tid;
 	struct hw_atl_utils_fw_rpc rpc;
+	s64 ptp_clk_offset;
+	u16 phy_id;
 };
 
 struct aq_ring_s;
@@ -235,7 +259,43 @@ struct aq_hw_ops {
 	int (*hw_set_offload)(struct aq_hw_s *self,
 			      struct aq_nic_cfg_s *aq_nic_cfg);
 
+	int (*hw_tx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
+
+	int (*hw_rx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
+
+	int (*hw_ring_hwts_rx_fill)(struct aq_hw_s *self,
+				    struct aq_ring_s *aq_ring);
+
+	int (*hw_ring_hwts_rx_receive)(struct aq_hw_s *self,
+				       struct aq_ring_s *ring);
+
+	void (*hw_get_ptp_ts)(struct aq_hw_s *self, u64 *stamp);
+
+	int (*hw_adj_clock_freq)(struct aq_hw_s *self, s32 delta);
+
+	int (*hw_adj_sys_clock)(struct aq_hw_s *self, s64 delta);
+
+	int (*hw_set_sys_clock)(struct aq_hw_s *self, u64 time, u64 ts);
+
+	int (*hw_ts_to_sys_clock)(struct aq_hw_s *self, u64 ts, u64 *time);
+
+	int (*hw_gpio_pulse)(struct aq_hw_s *self, u32 index, u64 start,
+			     u32 period);
+
+	int (*hw_extts_gpio_enable)(struct aq_hw_s *self, u32 index,
+				    u32 enable);
+
+	int (*hw_get_sync_ts)(struct aq_hw_s *self, u64 *ts);
+
+	u16 (*rx_extract_ts)(struct aq_hw_s *self, u8 *p, unsigned int len,
+			     u64 *timestamp);
+
+	int (*extract_hwts)(struct aq_hw_s *self, u8 *p, unsigned int len,
+			    u64 *timestamp);
+
 	int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc);
+
+	int (*hw_set_loopback)(struct aq_hw_s *self, u32 mode, bool enable);
 };
 
 struct aq_fw_ops {
@@ -264,9 +324,19 @@ struct aq_fw_ops {
 
 	int (*set_flow_control)(struct aq_hw_s *self);
 
+	int (*led_control)(struct aq_hw_s *self, u32 mode);
+
+	int (*set_phyloopback)(struct aq_hw_s *self, u32 mode, bool enable);
+
 	int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
 			 u8 *mac);
 
+	int (*send_fw_request)(struct aq_hw_s *self,
+			       const struct hw_fw_request_iface *fw_req,
+			       size_t size);
+
+	void (*enable_ptp)(struct aq_hw_s *self, int enable);
+
 	int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
 
 	int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
index 9c7a226d81b6..7dbf49adcea6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
@@ -59,6 +59,7 @@ u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg)
 	u64 value = aq_hw_read_reg(hw, reg);
 
 	value |= (u64)aq_hw_read_reg(hw, reg + 4) << 32;
+
 	return value;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index bb65dd39f847..538f460a3da7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_main.c: Main file for aQuantia Linux driver. */
@@ -10,10 +10,13 @@
 #include "aq_nic.h"
 #include "aq_pci_func.h"
 #include "aq_ethtool.h"
+#include "aq_ptp.h"
 #include "aq_filters.h"
 
 #include <linux/netdevice.h>
 #include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
 
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(AQ_CFG_DRV_VERSION);
@@ -50,8 +53,8 @@ struct net_device *aq_ndev_alloc(void)
 
 static int aq_ndev_open(struct net_device *ndev)
 {
-	int err = 0;
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	int err = 0;
 
 	err = aq_nic_init(aq_nic);
 	if (err < 0)
@@ -71,19 +74,20 @@ static int aq_ndev_open(struct net_device *ndev)
 
 err_exit:
 	if (err < 0)
-		aq_nic_deinit(aq_nic);
+		aq_nic_deinit(aq_nic, true);
+
 	return err;
 }
 
 static int aq_ndev_close(struct net_device *ndev)
 {
-	int err = 0;
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	int err = 0;
 
 	err = aq_nic_stop(aq_nic);
 	if (err < 0)
 		goto err_exit;
-	aq_nic_deinit(aq_nic);
+	aq_nic_deinit(aq_nic, true);
 
 err_exit:
 	return err;
@@ -93,13 +97,33 @@ static int aq_ndev_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 
+	if (unlikely(aq_utils_obj_test(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP))) {
+		/* Hardware adds the Timestamp for PTPv2 802.AS1
+		 * and PTPv2 IPv4 UDP.
+		 * We have to push even general 320 port messages to the ptp
+		 * queue explicitly. This is a limitation of current firmware
+		 * and hardware PTP design of the chip. Otherwise ptp stream
+		 * will fail to sync
+		 */
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ||
+		    unlikely((ip_hdr(skb)->version == 4) &&
+			     (ip_hdr(skb)->protocol == IPPROTO_UDP) &&
+			     ((udp_hdr(skb)->dest == htons(319)) ||
+			      (udp_hdr(skb)->dest == htons(320)))) ||
+		    unlikely(eth_hdr(skb)->h_proto == htons(ETH_P_1588)))
+			return aq_ptp_xmit(aq_nic, skb);
+	}
+
+	skb_tx_timestamp(skb);
 	return aq_nic_xmit(aq_nic, skb);
 }
 
 static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
-	int err = aq_nic_set_mtu(aq_nic, new_mtu + ETH_HLEN);
+	int err;
+
+	err = aq_nic_set_mtu(aq_nic, new_mtu + ETH_HLEN);
 
 	if (err < 0)
 		goto err_exit;
@@ -112,8 +136,8 @@ err_exit:
 static int aq_ndev_set_features(struct net_device *ndev,
 				netdev_features_t features)
 {
-	bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
 	bool is_vlan_tx_insert = !!(features & NETIF_F_HW_VLAN_CTAG_TX);
+	bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 	bool need_ndev_restart = false;
 	struct aq_nic_cfg_s *aq_cfg;
@@ -197,6 +221,87 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev)
 	(void)aq_nic_set_multicast_list(aq_nic, ndev);
 }
 
+static int aq_ndev_config_hwtstamp(struct aq_nic_s *aq_nic,
+				   struct hwtstamp_config *config)
+{
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return aq_ptp_hwtstamp_config_set(aq_nic->aq_ptp, config);
+}
+
+static int aq_ndev_hwtstamp_set(struct aq_nic_s *aq_nic, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int ret_val;
+
+	if (!aq_nic->aq_ptp)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	ret_val = aq_ndev_config_hwtstamp(aq_nic, &config);
+	if (ret_val)
+		return ret_val;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+
+static int aq_ndev_hwtstamp_get(struct aq_nic_s *aq_nic, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+
+	if (!aq_nic->aq_ptp)
+		return -EOPNOTSUPP;
+
+	aq_ptp_hwtstamp_config_get(aq_nic->aq_ptp, &config);
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+	       -EFAULT : 0;
+}
+
+static int aq_ndev_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(netdev);
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return aq_ndev_hwtstamp_set(aq_nic, ifr);
+
+	case SIOCGHWTSTAMP:
+		return aq_ndev_hwtstamp_get(aq_nic, ifr);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
 				  u16 vid)
 {
@@ -234,6 +339,7 @@ static const struct net_device_ops aq_ndev_ops = {
 	.ndo_change_mtu = aq_ndev_change_mtu,
 	.ndo_set_mac_address = aq_ndev_set_mac_address,
 	.ndo_set_features = aq_ndev_set_features,
+	.ndo_do_ioctl = aq_ndev_ioctl,
 	.ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 137c1de4c6ec..a17a4da7bc15 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_nic.c: Definition of common code for NIC. */
@@ -12,6 +12,9 @@
 #include "aq_hw.h"
 #include "aq_pci_func.h"
 #include "aq_main.h"
+#include "aq_phy.h"
+#include "aq_ptp.h"
+#include "aq_filters.h"
 
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
@@ -38,10 +41,6 @@ static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
 
 static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
 {
-	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
-	struct aq_rss_parameters *rss_params = &cfg->aq_rss;
-	int i = 0;
-
 	static u8 rss_key[AQ_CFG_RSS_HASHKEY_SIZE] = {
 		0x1e, 0xad, 0x71, 0x87, 0x65, 0xfc, 0x26, 0x7d,
 		0x0d, 0x45, 0x67, 0x74, 0xcd, 0x06, 0x1a, 0x18,
@@ -49,6 +48,11 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
 		0x19, 0x13, 0x4b, 0xa9, 0xd0, 0x3e, 0xfe, 0x70,
 		0x25, 0x03, 0xab, 0x50, 0x6a, 0x8b, 0x82, 0x0c
 	};
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	struct aq_rss_parameters *rss_params;
+	int i = 0;
+
+	rss_params = &cfg->aq_rss;
 
 	rss_params->hash_secret_key_size = sizeof(rss_key);
 	memcpy(rss_params->hash_secret_key, rss_key, sizeof(rss_key));
@@ -75,7 +79,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	cfg->is_rss = AQ_CFG_IS_RSS_DEF;
 	cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF;
 	cfg->aq_rss.base_cpu_number = AQ_CFG_RSS_BASE_CPU_NUM_DEF;
-	cfg->flow_control = AQ_CFG_FC_MODE;
+	cfg->fc.req = AQ_CFG_FC_MODE;
+	cfg->wol = AQ_CFG_WOL_MODES;
 
 	cfg->mtu = AQ_CFG_MTU_DEF;
 	cfg->link_speed_msk = AQ_CFG_SPEED_MSK;
@@ -139,18 +144,27 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 	if (err)
 		return err;
 
+	if (self->aq_fw_ops->get_flow_control)
+		self->aq_fw_ops->get_flow_control(self->aq_hw, &fc);
+	self->aq_nic_cfg.fc.cur = fc;
+
 	if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) {
-		pr_info("%s: link change old %d new %d\n",
-			AQ_CFG_DRV_NAME, self->link_status.mbps,
-			self->aq_hw->aq_link_status.mbps);
+		netdev_info(self->ndev, "%s: link change old %d new %d\n",
+			    AQ_CFG_DRV_NAME, self->link_status.mbps,
+			    self->aq_hw->aq_link_status.mbps);
 		aq_nic_update_interrupt_moderation_settings(self);
 
+		if (self->aq_ptp) {
+			aq_ptp_clock_init(self);
+			aq_ptp_tm_offset_set(self,
+					     self->aq_hw->aq_link_status.mbps);
+			aq_ptp_link_change(self);
+		}
+
 		/* Driver has to update flow control settings on RX block
 		 * on any link event.
 		 * We should query FW whether it negotiated FC.
 		 */
-		if (self->aq_fw_ops->get_flow_control)
-			self->aq_fw_ops->get_flow_control(self->aq_hw, &fc);
 		if (self->aq_hw_ops->hw_set_fc)
 			self->aq_hw_ops->hw_set_fc(self->aq_hw, fc, 0);
 	}
@@ -169,6 +183,7 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 		netif_tx_disable(self->ndev);
 		aq_utils_obj_set(&self->flags, AQ_NIC_LINK_DOWN);
 	}
+
 	return 0;
 }
 
@@ -183,6 +198,7 @@ static irqreturn_t aq_linkstate_threaded_isr(int irq, void *private)
 
 	self->aq_hw_ops->hw_irq_enable(self->aq_hw,
 				       BIT(self->aq_nic_cfg.link_irq_vec));
+
 	return IRQ_HANDLED;
 }
 
@@ -192,6 +208,8 @@ static void aq_nic_service_task(struct work_struct *work)
 					     service_task);
 	int err;
 
+	aq_ptp_service_task(self);
+
 	if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
 		return;
 
@@ -211,7 +229,8 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
 {
 	struct aq_nic_s *self = from_timer(self, t, service_timer);
 
-	mod_timer(&self->service_timer, jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
+	mod_timer(&self->service_timer,
+		  jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
 
 	aq_ndev_schedule_work(&self->service_task);
 }
@@ -290,9 +309,11 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
 	self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
 				     NETIF_F_RXHASH | NETIF_F_SG |
 				     NETIF_F_LRO | NETIF_F_TSO;
+	self->ndev->gso_partial_features = NETIF_F_GSO_UDP_L4;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
 	self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
+	self->msg_enable = NETIF_MSG_DRV | NETIF_MSG_LINK;
 	self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN;
 	self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN;
 
@@ -312,8 +333,8 @@ struct net_device *aq_nic_get_ndev(struct aq_nic_s *self)
 int aq_nic_init(struct aq_nic_s *self)
 {
 	struct aq_vec_s *aq_vec = NULL;
-	int err = 0;
 	unsigned int i = 0U;
+	int err = 0;
 
 	self->power_state = AQ_HW_POWER_STATE_D0;
 	mutex_lock(&self->fwreq_mutex);
@@ -327,10 +348,27 @@ int aq_nic_init(struct aq_nic_s *self)
 	if (err < 0)
 		goto err_exit;
 
+	if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_TP) {
+		self->aq_hw->phy_id = HW_ATL_PHY_ID_MAX;
+		err = aq_phy_init(self->aq_hw);
+	}
+
 	for (i = 0U, aq_vec = self->aq_vec[0];
 		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
 		aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw);
 
+	err = aq_ptp_init(self, self->irqvecs - 1);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_ptp_ring_alloc(self);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_ptp_ring_init(self);
+	if (err < 0)
+		goto err_exit;
+
 	netif_carrier_off(self->ndev);
 
 err_exit:
@@ -340,8 +378,8 @@ err_exit:
 int aq_nic_start(struct aq_nic_s *self)
 {
 	struct aq_vec_s *aq_vec = NULL;
-	int err = 0;
 	unsigned int i = 0U;
+	int err = 0;
 
 	err = self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
 						     self->mc_list.ar,
@@ -361,6 +399,10 @@ int aq_nic_start(struct aq_nic_s *self)
 			goto err_exit;
 	}
 
+	err = aq_ptp_ring_start(self);
+	if (err < 0)
+		goto err_exit;
+
 	err = self->aq_hw_ops->hw_start(self->aq_hw);
 	if (err < 0)
 		goto err_exit;
@@ -371,6 +413,8 @@ int aq_nic_start(struct aq_nic_s *self)
 
 	INIT_WORK(&self->service_task, aq_nic_service_task);
 
+	aq_nic_set_loopback(self);
+
 	timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
 	aq_nic_service_timer_cb(&self->service_timer);
 
@@ -388,6 +432,10 @@ int aq_nic_start(struct aq_nic_s *self)
 				goto err_exit;
 		}
 
+		err = aq_ptp_irq_alloc(self);
+		if (err < 0)
+			goto err_exit;
+
 		if (self->aq_nic_cfg.link_irq_vec) {
 			int irqvec = pci_irq_vector(self->pdev,
 						   self->aq_nic_cfg.link_irq_vec);
@@ -420,30 +468,48 @@ err_exit:
 	return err;
 }
 
-static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
-				   struct sk_buff *skb,
-				   struct aq_ring_s *ring)
+unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
+			    struct aq_ring_s *ring)
 {
-	unsigned int ret = 0U;
 	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int frag_count = 0U;
-	unsigned int dx = ring->sw_tail;
 	struct aq_ring_buff_s *first = NULL;
-	struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx];
+	u8 ipver = ip_hdr(skb)->version;
+	struct aq_ring_buff_s *dx_buff;
 	bool need_context_tag = false;
+	unsigned int frag_count = 0U;
+	unsigned int ret = 0U;
+	unsigned int dx;
+	u8 l4proto = 0;
+
+	if (ipver == 4)
+		l4proto = ip_hdr(skb)->protocol;
+	else if (ipver == 6)
+		l4proto = ipv6_hdr(skb)->nexthdr;
 
+	dx = ring->sw_tail;
+	dx_buff = &ring->buff_ring[dx];
 	dx_buff->flags = 0U;
 
 	if (unlikely(skb_is_gso(skb))) {
 		dx_buff->mss = skb_shinfo(skb)->gso_size;
-		dx_buff->is_gso = 1U;
+		if (l4proto == IPPROTO_TCP) {
+			dx_buff->is_gso_tcp = 1U;
+			dx_buff->len_l4 = tcp_hdrlen(skb);
+		} else if (l4proto == IPPROTO_UDP) {
+			dx_buff->is_gso_udp = 1U;
+			dx_buff->len_l4 = sizeof(struct udphdr);
+			/* UDP GSO Hardware does not replace packet length. */
+			udp_hdr(skb)->len = htons(dx_buff->mss +
+						  dx_buff->len_l4);
+		} else {
+			WARN_ONCE(true, "Bad GSO mode");
+			goto exit;
+		}
 		dx_buff->len_pkt = skb->len;
 		dx_buff->len_l2 = ETH_HLEN;
-		dx_buff->len_l3 = ip_hdrlen(skb);
-		dx_buff->len_l4 = tcp_hdrlen(skb);
+		dx_buff->len_l3 = skb_network_header_len(skb);
 		dx_buff->eop_index = 0xffffU;
-		dx_buff->is_ipv6 =
-			(ip_hdr(skb)->version == 6) ? 1U : 0U;
+		dx_buff->is_ipv6 = (ipver == 6);
 		need_context_tag = true;
 	}
 
@@ -477,24 +543,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 	++ret;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ?
-			1U : 0U;
-
-		if (ip_hdr(skb)->version == 4) {
-			dx_buff->is_tcp_cso =
-				(ip_hdr(skb)->protocol == IPPROTO_TCP) ?
-					1U : 0U;
-			dx_buff->is_udp_cso =
-				(ip_hdr(skb)->protocol == IPPROTO_UDP) ?
-					1U : 0U;
-		} else if (ip_hdr(skb)->version == 6) {
-			dx_buff->is_tcp_cso =
-				(ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ?
-					1U : 0U;
-			dx_buff->is_udp_cso =
-				(ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ?
-					1U : 0U;
-		}
+		dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol);
+		dx_buff->is_tcp_cso = (l4proto == IPPROTO_TCP);
+		dx_buff->is_udp_cso = (l4proto == IPPROTO_UDP);
 	}
 
 	for (; nr_frags--; ++frag_count) {
@@ -549,7 +600,8 @@ mapping_error:
 	     --ret, dx = aq_ring_next_dx(ring, dx)) {
 		dx_buff = &ring->buff_ring[dx];
 
-		if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) {
+		if (!(dx_buff->is_gso_tcp || dx_buff->is_gso_udp) &&
+		    !dx_buff->is_vlan && dx_buff->pa) {
 			if (unlikely(dx_buff->is_sop)) {
 				dma_unmap_single(aq_nic_get_dev(self),
 						 dx_buff->pa,
@@ -570,11 +622,11 @@ exit:
 
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 {
+	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
 	struct aq_ring_s *ring = NULL;
 	unsigned int frags = 0U;
-	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
-	unsigned int tc = 0U;
 	int err = NETDEV_TX_OK;
+	unsigned int tc = 0U;
 
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
@@ -587,6 +639,11 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 
 	aq_ring_update_queue_state(ring);
 
+	if (self->aq_nic_cfg.priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
+		err = NETDEV_TX_BUSY;
+		goto err_exit;
+	}
+
 	/* Above status update may stop the queue. Check this. */
 	if (__netif_subqueue_stopped(self->ndev, ring->idx)) {
 		err = NETDEV_TX_BUSY;
@@ -667,6 +724,7 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
 		if (err < 0)
 			return err;
 	}
+
 	return aq_nic_set_packet_filter(self, packet_filter);
 }
 
@@ -711,10 +769,10 @@ int aq_nic_get_regs_count(struct aq_nic_s *self)
 
 void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 {
-	unsigned int i = 0U;
-	unsigned int count = 0U;
 	struct aq_vec_s *aq_vec = NULL;
 	struct aq_stats_s *stats;
+	unsigned int count = 0U;
+	unsigned int i = 0U;
 
 	if (self->aq_fw_ops->update_stats) {
 		mutex_lock(&self->fwreq_mutex);
@@ -764,8 +822,8 @@ err_exit:;
 
 static void aq_nic_update_ndev_stats(struct aq_nic_s *self)
 {
-	struct net_device *ndev = self->ndev;
 	struct aq_stats_s *stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw);
+	struct net_device *ndev = self->ndev;
 
 	ndev->stats.rx_packets = stats->dma_pkt_rc;
 	ndev->stats.rx_bytes = stats->dma_oct_rc;
@@ -810,9 +868,12 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     100baseT_Full);
 
-	if (self->aq_nic_cfg.aq_hw_caps->flow_control)
+	if (self->aq_nic_cfg.aq_hw_caps->flow_control) {
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     Asym_Pause);
+	}
 
 	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
 
@@ -846,13 +907,13 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     100baseT_Full);
 
-	if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX)
+	if (self->aq_nic_cfg.fc.cur & AQ_NIC_FC_RX)
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Pause);
 
 	/* Asym is when either RX or TX, but not both */
-	if (!!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) ^
-	    !!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX))
+	if (!!(self->aq_nic_cfg.fc.cur & AQ_NIC_FC_TX) ^
+	    !!(self->aq_nic_cfg.fc.cur & AQ_NIC_FC_RX))
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Asym_Pause);
 
@@ -935,6 +996,44 @@ u32 aq_nic_get_fw_version(struct aq_nic_s *self)
 	return fw_version;
 }
 
+int aq_nic_set_loopback(struct aq_nic_s *self)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+
+	if (!self->aq_hw_ops->hw_set_loopback ||
+	    !self->aq_fw_ops->set_phyloopback)
+		return -ENOTSUPP;
+
+	mutex_lock(&self->fwreq_mutex);
+	self->aq_hw_ops->hw_set_loopback(self->aq_hw,
+					 AQ_HW_LOOPBACK_DMA_SYS,
+					 !!(cfg->priv_flags &
+					    BIT(AQ_HW_LOOPBACK_DMA_SYS)));
+
+	self->aq_hw_ops->hw_set_loopback(self->aq_hw,
+					 AQ_HW_LOOPBACK_PKT_SYS,
+					 !!(cfg->priv_flags &
+					    BIT(AQ_HW_LOOPBACK_PKT_SYS)));
+
+	self->aq_hw_ops->hw_set_loopback(self->aq_hw,
+					 AQ_HW_LOOPBACK_DMA_NET,
+					 !!(cfg->priv_flags &
+					    BIT(AQ_HW_LOOPBACK_DMA_NET)));
+
+	self->aq_fw_ops->set_phyloopback(self->aq_hw,
+					 AQ_HW_LOOPBACK_PHYINT_SYS,
+					 !!(cfg->priv_flags &
+					    BIT(AQ_HW_LOOPBACK_PHYINT_SYS)));
+
+	self->aq_fw_ops->set_phyloopback(self->aq_hw,
+					 AQ_HW_LOOPBACK_PHYEXT_SYS,
+					 !!(cfg->priv_flags &
+					    BIT(AQ_HW_LOOPBACK_PHYEXT_SYS)));
+	mutex_unlock(&self->fwreq_mutex);
+
+	return 0;
+}
+
 int aq_nic_stop(struct aq_nic_s *self)
 {
 	struct aq_vec_s *aq_vec = NULL;
@@ -953,14 +1052,31 @@ int aq_nic_stop(struct aq_nic_s *self)
 	else
 		aq_pci_func_free_irqs(self);
 
+	aq_ptp_irq_free(self);
+
 	for (i = 0U, aq_vec = self->aq_vec[0];
 		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
 		aq_vec_stop(aq_vec);
 
+	aq_ptp_ring_stop(self);
+
 	return self->aq_hw_ops->hw_stop(self->aq_hw);
 }
 
-void aq_nic_deinit(struct aq_nic_s *self)
+void aq_nic_set_power(struct aq_nic_s *self)
+{
+	if (self->power_state != AQ_HW_POWER_STATE_D0 ||
+	    self->aq_hw->aq_nic_cfg->wol)
+		if (likely(self->aq_fw_ops->set_power)) {
+			mutex_lock(&self->fwreq_mutex);
+			self->aq_fw_ops->set_power(self->aq_hw,
+						   self->power_state,
+						   self->ndev->dev_addr);
+			mutex_unlock(&self->fwreq_mutex);
+		}
+}
+
+void aq_nic_deinit(struct aq_nic_s *self, bool link_down)
 {
 	struct aq_vec_s *aq_vec = NULL;
 	unsigned int i = 0U;
@@ -972,23 +1088,17 @@ void aq_nic_deinit(struct aq_nic_s *self)
 		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
 		aq_vec_deinit(aq_vec);
 
-	if (likely(self->aq_fw_ops->deinit)) {
+	aq_ptp_unregister(self);
+	aq_ptp_ring_deinit(self);
+	aq_ptp_ring_free(self);
+	aq_ptp_free(self);
+
+	if (likely(self->aq_fw_ops->deinit) && link_down) {
 		mutex_lock(&self->fwreq_mutex);
 		self->aq_fw_ops->deinit(self->aq_hw);
 		mutex_unlock(&self->fwreq_mutex);
 	}
 
-	if (self->power_state != AQ_HW_POWER_STATE_D0 ||
-	    self->aq_hw->aq_nic_cfg->wol)
-		if (likely(self->aq_fw_ops->set_power)) {
-			mutex_lock(&self->fwreq_mutex);
-			self->aq_fw_ops->set_power(self->aq_hw,
-						   self->power_state,
-						   self->ndev->dev_addr);
-			mutex_unlock(&self->fwreq_mutex);
-		}
-
-
 err_exit:;
 }
 
@@ -1009,44 +1119,6 @@ void aq_nic_free_vectors(struct aq_nic_s *self)
 err_exit:;
 }
 
-int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg)
-{
-	int err = 0;
-
-	if (!netif_running(self->ndev)) {
-		err = 0;
-		goto out;
-	}
-	rtnl_lock();
-	if (pm_msg->event & PM_EVENT_SLEEP || pm_msg->event & PM_EVENT_FREEZE) {
-		self->power_state = AQ_HW_POWER_STATE_D3;
-		netif_device_detach(self->ndev);
-		netif_tx_stop_all_queues(self->ndev);
-
-		err = aq_nic_stop(self);
-		if (err < 0)
-			goto err_exit;
-
-		aq_nic_deinit(self);
-	} else {
-		err = aq_nic_init(self);
-		if (err < 0)
-			goto err_exit;
-
-		err = aq_nic_start(self);
-		if (err < 0)
-			goto err_exit;
-
-		netif_device_attach(self->ndev);
-		netif_tx_start_all_queues(self->ndev);
-	}
-
-err_exit:
-	rtnl_unlock();
-out:
-	return err;
-}
-
 void aq_nic_shutdown(struct aq_nic_s *self)
 {
 	int err = 0;
@@ -1063,8 +1135,52 @@ void aq_nic_shutdown(struct aq_nic_s *self)
 		if (err < 0)
 			goto err_exit;
 	}
-	aq_nic_deinit(self);
+	aq_nic_deinit(self, !self->aq_hw->aq_nic_cfg->wol);
+	aq_nic_set_power(self);
 
 err_exit:
 	rtnl_unlock();
 }
+
+u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type)
+{
+	u8 location = 0xFF;
+	u32 fltr_cnt;
+	u32 n_bit;
+
+	switch (type) {
+	case aq_rx_filter_ethertype:
+		location = AQ_RX_LAST_LOC_FETHERT - AQ_RX_FIRST_LOC_FETHERT -
+			   self->aq_hw_rx_fltrs.fet_reserved_count;
+		self->aq_hw_rx_fltrs.fet_reserved_count++;
+		break;
+	case aq_rx_filter_l3l4:
+		fltr_cnt = AQ_RX_LAST_LOC_FL3L4 - AQ_RX_FIRST_LOC_FL3L4;
+		n_bit = fltr_cnt - self->aq_hw_rx_fltrs.fl3l4.reserved_count;
+
+		self->aq_hw_rx_fltrs.fl3l4.active_ipv4 |= BIT(n_bit);
+		self->aq_hw_rx_fltrs.fl3l4.reserved_count++;
+		location = n_bit;
+		break;
+	default:
+		break;
+	}
+
+	return location;
+}
+
+void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
+			   u32 location)
+{
+	switch (type) {
+	case aq_rx_filter_ethertype:
+		self->aq_hw_rx_fltrs.fet_reserved_count--;
+		break;
+	case aq_rx_filter_l3l4:
+		self->aq_hw_rx_fltrs.fl3l4.reserved_count--;
+		self->aq_hw_rx_fltrs.fl3l4.active_ipv4 &= ~BIT(location);
+		break;
+	default:
+		break;
+	}
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 255b54a6ae07..a752f8bb4b08 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_nic.h: Declaration of common code for NIC. */
@@ -17,6 +17,20 @@ struct aq_ring_s;
 struct aq_hw_ops;
 struct aq_fw_s;
 struct aq_vec_s;
+struct aq_ptp_s;
+enum aq_rx_filter_type;
+
+enum aq_fc_mode {
+	AQ_NIC_FC_OFF = 0,
+	AQ_NIC_FC_TX,
+	AQ_NIC_FC_RX,
+	AQ_NIC_FC_FULL,
+};
+
+struct aq_fc_info {
+	enum aq_fc_mode req;
+	enum aq_fc_mode cur;
+};
 
 struct aq_nic_cfg_s {
 	const struct aq_hw_caps_s *aq_hw_caps;
@@ -32,7 +46,7 @@ struct aq_nic_cfg_s {
 	u32 rxpageorder;
 	u32 num_rss_queues;
 	u32 mtu;
-	u32 flow_control;
+	struct aq_fc_info fc;
 	u32 link_speed_msk;
 	u32 wol;
 	u8 is_vlan_rx_strip;
@@ -44,6 +58,7 @@ struct aq_nic_cfg_s {
 	bool is_polling;
 	bool is_rss;
 	bool is_lro;
+	u32 priv_flags;
 	u8  tcs;
 	struct aq_rss_parameters aq_rss;
 	u32 eee_speeds;
@@ -53,11 +68,13 @@ struct aq_nic_cfg_s {
 #define AQ_NIC_FLAG_STOPPING    0x00000008U
 #define AQ_NIC_FLAG_RESETTING   0x00000010U
 #define AQ_NIC_FLAG_CLOSING     0x00000020U
+#define AQ_NIC_PTP_DPATH_UP     0x02000000U
 #define AQ_NIC_LINK_DOWN        0x04000000U
 #define AQ_NIC_FLAG_ERR_UNPLUG  0x40000000U
 #define AQ_NIC_FLAG_ERR_HW      0x80000000U
 
-#define AQ_NIC_WOL_ENABLED	BIT(0)
+#define AQ_NIC_WOL_MODES        (WAKE_MAGIC |\
+				 WAKE_PHY)
 
 #define AQ_NIC_TCVEC2RING(_NIC_, _TC_, _VEC_) \
 	((_TC_) * AQ_CFG_TCS_MAX + (_VEC_))
@@ -67,9 +84,10 @@ struct aq_hw_rx_fl2 {
 };
 
 struct aq_hw_rx_fl3l4 {
-	u8   active_ipv4;
-	u8   active_ipv6:2;
+	u8 active_ipv4;
+	u8 active_ipv6:2;
 	u8 is_ipv6;
+	u8 reserved_count;
 };
 
 struct aq_hw_rx_fltrs_s {
@@ -77,10 +95,13 @@ struct aq_hw_rx_fltrs_s {
 	u16                   active_filters;
 	struct aq_hw_rx_fl2   fl2;
 	struct aq_hw_rx_fl3l4 fl3l4;
+	/*filter ether type */
+	u8 fet_reserved_count;
 };
 
 struct aq_nic_s {
 	atomic_t flags;
+	u32 msg_enable;
 	struct aq_vec_s *aq_vec[AQ_CFG_VECS_MAX];
 	struct aq_ring_s *aq_ring_tx[AQ_CFG_VECS_MAX * AQ_CFG_TCS_MAX];
 	struct aq_hw_s *aq_hw;
@@ -108,6 +129,8 @@ struct aq_nic_s {
 	u32 irqvecs;
 	/* mutex to serialize FW interface access operations */
 	struct mutex fwreq_mutex;
+	/* PTP support */
+	struct aq_ptp_s *aq_ptp;
 	struct aq_hw_rx_fltrs_s aq_hw_rx_fltrs;
 };
 
@@ -126,12 +149,15 @@ void aq_nic_cfg_start(struct aq_nic_s *self);
 int aq_nic_ndev_register(struct aq_nic_s *self);
 void aq_nic_ndev_free(struct aq_nic_s *self);
 int aq_nic_start(struct aq_nic_s *self);
+unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
+			    struct aq_ring_s *ring);
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb);
 int aq_nic_get_regs(struct aq_nic_s *self, struct ethtool_regs *regs, void *p);
 int aq_nic_get_regs_count(struct aq_nic_s *self);
 void aq_nic_get_stats(struct aq_nic_s *self, u64 *data);
 int aq_nic_stop(struct aq_nic_s *self);
-void aq_nic_deinit(struct aq_nic_s *self);
+void aq_nic_deinit(struct aq_nic_s *self, bool link_down);
+void aq_nic_set_power(struct aq_nic_s *self);
 void aq_nic_free_hot_resources(struct aq_nic_s *self);
 void aq_nic_free_vectors(struct aq_nic_s *self);
 int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu);
@@ -145,8 +171,10 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self,
 			      const struct ethtool_link_ksettings *cmd);
 struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self);
 u32 aq_nic_get_fw_version(struct aq_nic_s *self);
-int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg);
+int aq_nic_set_loopback(struct aq_nic_s *self);
 int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self);
 void aq_nic_shutdown(struct aq_nic_s *self);
-
+u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type);
+void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
+			   u32 location);
 #endif /* AQ_NIC_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 74b9f3f1da81..a161026cfbfd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_pci_func.c: Definition of PCI functions. */
@@ -185,6 +185,7 @@ unsigned int aq_pci_func_get_irq_type(struct aq_nic_s *self)
 		return AQ_HW_IRQ_MSIX;
 	if (self->pdev->msi_enabled)
 		return AQ_HW_IRQ_MSI;
+
 	return AQ_HW_IRQ_LEGACY;
 }
 
@@ -196,12 +197,12 @@ static void aq_pci_free_irq_vectors(struct aq_nic_s *self)
 static int aq_pci_probe(struct pci_dev *pdev,
 			const struct pci_device_id *pci_id)
 {
-	struct aq_nic_s *self;
-	int err;
 	struct net_device *ndev;
 	resource_size_t mmio_pa;
-	u32 bar;
+	struct aq_nic_s *self;
 	u32 numvecs;
+	u32 bar;
+	int err;
 
 	err = pci_enable_device(pdev);
 	if (err)
@@ -269,6 +270,9 @@ static int aq_pci_probe(struct pci_dev *pdev,
 	numvecs = min((u8)AQ_CFG_VECS_DEF,
 		      aq_nic_get_cfg(self)->aq_hw_caps->msix_irqs);
 	numvecs = min(numvecs, num_online_cpus());
+	/* Request IRQ vector for PTP */
+	numvecs += 1;
+
 	numvecs += AQ_HW_SERVICE_IRQS;
 	/*enable interrupts */
 #if !AQ_CFG_FORCE_LEGACY_INT
@@ -308,6 +312,7 @@ err_ndev:
 	pci_release_regions(pdev);
 err_pci_func:
 	pci_disable_device(pdev);
+
 	return err;
 }
 
@@ -344,29 +349,98 @@ static void aq_pci_shutdown(struct pci_dev *pdev)
 	}
 }
 
-static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg)
+static int aq_suspend_common(struct device *dev, bool deep)
 {
-	struct aq_nic_s *self = pci_get_drvdata(pdev);
+	struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev));
+
+	rtnl_lock();
 
-	return aq_nic_change_pm_state(self, &pm_msg);
+	nic->power_state = AQ_HW_POWER_STATE_D3;
+	netif_device_detach(nic->ndev);
+	netif_tx_stop_all_queues(nic->ndev);
+
+	aq_nic_stop(nic);
+
+	if (deep) {
+		aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+		aq_nic_set_power(nic);
+	}
+
+	rtnl_unlock();
+
+	return 0;
 }
 
-static int aq_pci_resume(struct pci_dev *pdev)
+static int atl_resume_common(struct device *dev, bool deep)
 {
-	struct aq_nic_s *self = pci_get_drvdata(pdev);
-	pm_message_t pm_msg = PMSG_RESTORE;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct aq_nic_s *nic;
+	int ret;
+
+	nic = pci_get_drvdata(pdev);
+
+	rtnl_lock();
 
-	return aq_nic_change_pm_state(self, &pm_msg);
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+
+	if (deep) {
+		ret = aq_nic_init(nic);
+		if (ret)
+			goto err_exit;
+	}
+
+	ret = aq_nic_start(nic);
+	if (ret)
+		goto err_exit;
+
+	netif_device_attach(nic->ndev);
+	netif_tx_start_all_queues(nic->ndev);
+
+err_exit:
+	rtnl_unlock();
+
+	return ret;
 }
 
+static int aq_pm_freeze(struct device *dev)
+{
+	return aq_suspend_common(dev, false);
+}
+
+static int aq_pm_suspend_poweroff(struct device *dev)
+{
+	return aq_suspend_common(dev, true);
+}
+
+static int aq_pm_thaw(struct device *dev)
+{
+	return atl_resume_common(dev, false);
+}
+
+static int aq_pm_resume_restore(struct device *dev)
+{
+	return atl_resume_common(dev, true);
+}
+
+const struct dev_pm_ops aq_pm_ops = {
+	.suspend = aq_pm_suspend_poweroff,
+	.poweroff = aq_pm_suspend_poweroff,
+	.freeze = aq_pm_freeze,
+	.resume = aq_pm_resume_restore,
+	.restore = aq_pm_resume_restore,
+	.thaw = aq_pm_thaw,
+};
+
 static struct pci_driver aq_pci_ops = {
 	.name = AQ_CFG_DRV_NAME,
 	.id_table = aq_pci_tbl,
 	.probe = aq_pci_probe,
 	.remove = aq_pci_remove,
-	.suspend = aq_pci_suspend,
-	.resume = aq_pci_resume,
 	.shutdown = aq_pci_shutdown,
+#ifdef CONFIG_PM
+	.driver.pm = &aq_pm_ops,
+#endif
 };
 
 int aq_pci_func_register_driver(void)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.c b/drivers/net/ethernet/aquantia/atlantic/aq_phy.c
new file mode 100644
index 000000000000..51ae921e3e1f
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_phy.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* aQuantia Corporation Network Driver
+ * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved
+ */
+
+#include "aq_phy.h"
+
+bool aq_mdio_busy_wait(struct aq_hw_s *aq_hw)
+{
+	int err = 0;
+	u32 val;
+
+	err = readx_poll_timeout_atomic(hw_atl_mdio_busy_get, aq_hw,
+					val, val == 0U, 10U, 100000U);
+
+	if (err < 0)
+		return false;
+
+	return true;
+}
+
+u16 aq_mdio_read_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr)
+{
+	u16 phy_addr = aq_hw->phy_id << 5 | mmd;
+
+	/* Set Address register. */
+	hw_atl_glb_mdio_iface4_set(aq_hw, (addr & HW_ATL_MDIO_ADDRESS_MSK) <<
+				   HW_ATL_MDIO_ADDRESS_SHIFT);
+	/* Send Address command. */
+	hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+				   (3 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+				   ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+				    HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+	aq_mdio_busy_wait(aq_hw);
+
+	/* Send Read command. */
+	hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+				   (1 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+				   ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+				    HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+	/* Read result. */
+	aq_mdio_busy_wait(aq_hw);
+
+	return (u16)hw_atl_glb_mdio_iface5_get(aq_hw);
+}
+
+void aq_mdio_write_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr, u16 data)
+{
+	u16 phy_addr = aq_hw->phy_id << 5 | mmd;
+
+	/* Set Address register. */
+	hw_atl_glb_mdio_iface4_set(aq_hw, (addr & HW_ATL_MDIO_ADDRESS_MSK) <<
+				   HW_ATL_MDIO_ADDRESS_SHIFT);
+	/* Send Address command. */
+	hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+				   (3 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+				   ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+				    HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+	aq_mdio_busy_wait(aq_hw);
+
+	hw_atl_glb_mdio_iface3_set(aq_hw, (data & HW_ATL_MDIO_WRITE_DATA_MSK) <<
+				   HW_ATL_MDIO_WRITE_DATA_SHIFT);
+	/* Send Write command. */
+	hw_atl_glb_mdio_iface2_set(aq_hw, HW_ATL_MDIO_EXECUTE_OPERATION_MSK |
+				   (2 << HW_ATL_MDIO_OP_MODE_SHIFT) |
+				   ((phy_addr & HW_ATL_MDIO_PHY_ADDRESS_MSK) <<
+				    HW_ATL_MDIO_PHY_ADDRESS_SHIFT));
+
+	aq_mdio_busy_wait(aq_hw);
+}
+
+u16 aq_phy_read_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address)
+{
+	int err = 0;
+	u32 val;
+
+	err = readx_poll_timeout_atomic(hw_atl_sem_mdio_get, aq_hw,
+					val, val == 1U, 10U, 100000U);
+
+	if (err < 0) {
+		err = 0xffff;
+		goto err_exit;
+	}
+
+	err = aq_mdio_read_word(aq_hw, mmd, address);
+
+	hw_atl_reg_glb_cpu_sem_set(aq_hw, 1U, HW_ATL_FW_SM_MDIO);
+
+err_exit:
+	return err;
+}
+
+void aq_phy_write_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address, u16 data)
+{
+	int err = 0;
+	u32 val;
+
+	err = readx_poll_timeout_atomic(hw_atl_sem_mdio_get, aq_hw,
+					val, val == 1U, 10U, 100000U);
+	if (err < 0)
+		return;
+
+	aq_mdio_write_word(aq_hw, mmd, address, data);
+	hw_atl_reg_glb_cpu_sem_set(aq_hw, 1U, HW_ATL_FW_SM_MDIO);
+}
+
+bool aq_phy_init_phy_id(struct aq_hw_s *aq_hw)
+{
+	u16 val;
+
+	for (aq_hw->phy_id = 0; aq_hw->phy_id < HW_ATL_PHY_ID_MAX;
+	     ++aq_hw->phy_id) {
+		/* PMA Standard Device Identifier 2: Address 1.3 */
+		val = aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 3);
+
+		if (val != 0xffff)
+			return true;
+	}
+
+	return false;
+}
+
+bool aq_phy_init(struct aq_hw_s *aq_hw)
+{
+	u32 dev_id;
+
+	if (aq_hw->phy_id == HW_ATL_PHY_ID_MAX)
+		if (!aq_phy_init_phy_id(aq_hw))
+			return false;
+
+	/* PMA Standard Device Identifier:
+	 * Address 1.2 = MSW,
+	 * Address 1.3 = LSW
+	 */
+	dev_id = aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 2);
+	dev_id <<= 16;
+	dev_id |= aq_phy_read_reg(aq_hw, MDIO_MMD_PMAPMD, 3);
+
+	if (dev_id == 0xffffffff) {
+		aq_hw->phy_id = HW_ATL_PHY_ID_MAX;
+		return false;
+	}
+
+	return true;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_phy.h b/drivers/net/ethernet/aquantia/atlantic/aq_phy.h
new file mode 100644
index 000000000000..84b72ad04a4a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_phy.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* aQuantia Corporation Network Driver
+ * Copyright (C) 2018-2019 aQuantia Corporation. All rights reserved
+ */
+
+#ifndef AQ_PHY_H
+#define AQ_PHY_H
+
+#include <linux/mdio.h>
+
+#include "hw_atl/hw_atl_llh.h"
+#include "hw_atl/hw_atl_llh_internal.h"
+#include "aq_hw_utils.h"
+#include "aq_hw.h"
+
+#define HW_ATL_PHY_ID_MAX 32U
+
+bool aq_mdio_busy_wait(struct aq_hw_s *aq_hw);
+
+u16 aq_mdio_read_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr);
+
+void aq_mdio_write_word(struct aq_hw_s *aq_hw, u16 mmd, u16 addr, u16 data);
+
+u16 aq_phy_read_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address);
+
+void aq_phy_write_reg(struct aq_hw_s *aq_hw, u16 mmd, u16 address, u16 data);
+
+bool aq_phy_init_phy_id(struct aq_hw_s *aq_hw);
+
+bool aq_phy_init(struct aq_hw_s *aq_hw);
+
+#endif /* AQ_PHY_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
new file mode 100644
index 000000000000..58e8c641e8b3
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -0,0 +1,1392 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Aquantia Corporation Network Driver
+ * Copyright (C) 2014-2019 Aquantia Corporation. All rights reserved
+ */
+
+/* File aq_ptp.c:
+ * Definition of functions for Linux PTP support.
+ */
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/ptp_classify.h>
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+
+#include "aq_nic.h"
+#include "aq_ptp.h"
+#include "aq_ring.h"
+#include "aq_phy.h"
+#include "aq_filters.h"
+
+#define AQ_PTP_TX_TIMEOUT        (HZ *  10)
+
+#define POLL_SYNC_TIMER_MS 15
+
+enum ptp_speed_offsets {
+	ptp_offset_idx_10 = 0,
+	ptp_offset_idx_100,
+	ptp_offset_idx_1000,
+	ptp_offset_idx_2500,
+	ptp_offset_idx_5000,
+	ptp_offset_idx_10000,
+};
+
+struct ptp_skb_ring {
+	struct sk_buff **buff;
+	spinlock_t lock;
+	unsigned int size;
+	unsigned int head;
+	unsigned int tail;
+};
+
+struct ptp_tx_timeout {
+	spinlock_t lock;
+	bool active;
+	unsigned long tx_start;
+};
+
+struct aq_ptp_s {
+	struct aq_nic_s *aq_nic;
+	struct hwtstamp_config hwtstamp_config;
+	spinlock_t ptp_lock;
+	spinlock_t ptp_ring_lock;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_info;
+
+	atomic_t offset_egress;
+	atomic_t offset_ingress;
+
+	struct aq_ring_param_s ptp_ring_param;
+
+	struct ptp_tx_timeout ptp_tx_timeout;
+
+	unsigned int idx_vector;
+	struct napi_struct napi;
+
+	struct aq_ring_s ptp_tx;
+	struct aq_ring_s ptp_rx;
+	struct aq_ring_s hwts_rx;
+
+	struct ptp_skb_ring skb_ring;
+
+	struct aq_rx_filter_l3l4 udp_filter;
+	struct aq_rx_filter_l2 eth_type_filter;
+
+	struct delayed_work poll_sync;
+	u32 poll_timeout_ms;
+
+	bool extts_pin_enabled;
+	u64 last_sync1588_ts;
+};
+
+struct ptp_tm_offset {
+	unsigned int mbps;
+	int egress;
+	int ingress;
+};
+
+static struct ptp_tm_offset ptp_offset[6];
+
+void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic, unsigned int mbps)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	int i, egress, ingress;
+
+	if (!aq_ptp)
+		return;
+
+	egress = 0;
+	ingress = 0;
+
+	for (i = 0; i < ARRAY_SIZE(ptp_offset); i++) {
+		if (mbps == ptp_offset[i].mbps) {
+			egress = ptp_offset[i].egress;
+			ingress = ptp_offset[i].ingress;
+			break;
+		}
+	}
+
+	atomic_set(&aq_ptp->offset_egress, egress);
+	atomic_set(&aq_ptp->offset_ingress, ingress);
+}
+
+static int __aq_ptp_skb_put(struct ptp_skb_ring *ring, struct sk_buff *skb)
+{
+	unsigned int next_head = (ring->head + 1) % ring->size;
+
+	if (next_head == ring->tail)
+		return -ENOMEM;
+
+	ring->buff[ring->head] = skb_get(skb);
+	ring->head = next_head;
+
+	return 0;
+}
+
+static int aq_ptp_skb_put(struct ptp_skb_ring *ring, struct sk_buff *skb)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	ret = __aq_ptp_skb_put(ring, skb);
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return ret;
+}
+
+static struct sk_buff *__aq_ptp_skb_get(struct ptp_skb_ring *ring)
+{
+	struct sk_buff *skb;
+
+	if (ring->tail == ring->head)
+		return NULL;
+
+	skb = ring->buff[ring->tail];
+	ring->tail = (ring->tail + 1) % ring->size;
+
+	return skb;
+}
+
+static struct sk_buff *aq_ptp_skb_get(struct ptp_skb_ring *ring)
+{
+	unsigned long flags;
+	struct sk_buff *skb;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	skb = __aq_ptp_skb_get(ring);
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return skb;
+}
+
+static unsigned int aq_ptp_skb_buf_len(struct ptp_skb_ring *ring)
+{
+	unsigned long flags;
+	unsigned int len;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	len = (ring->head >= ring->tail) ?
+	ring->head - ring->tail :
+	ring->size - ring->tail + ring->head;
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return len;
+}
+
+static int aq_ptp_skb_ring_init(struct ptp_skb_ring *ring, unsigned int size)
+{
+	struct sk_buff **buff = kmalloc(sizeof(*buff) * size, GFP_KERNEL);
+
+	if (!buff)
+		return -ENOMEM;
+
+	spin_lock_init(&ring->lock);
+
+	ring->buff = buff;
+	ring->size = size;
+	ring->head = 0;
+	ring->tail = 0;
+
+	return 0;
+}
+
+static void aq_ptp_skb_ring_clean(struct ptp_skb_ring *ring)
+{
+	struct sk_buff *skb;
+
+	while ((skb = aq_ptp_skb_get(ring)) != NULL)
+		dev_kfree_skb_any(skb);
+}
+
+static void aq_ptp_skb_ring_release(struct ptp_skb_ring *ring)
+{
+	if (ring->buff) {
+		aq_ptp_skb_ring_clean(ring);
+		kfree(ring->buff);
+		ring->buff = NULL;
+	}
+}
+
+static void aq_ptp_tx_timeout_init(struct ptp_tx_timeout *timeout)
+{
+	spin_lock_init(&timeout->lock);
+	timeout->active = false;
+}
+
+static void aq_ptp_tx_timeout_start(struct aq_ptp_s *aq_ptp)
+{
+	struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timeout->lock, flags);
+	timeout->active = true;
+	timeout->tx_start = jiffies;
+	spin_unlock_irqrestore(&timeout->lock, flags);
+}
+
+static void aq_ptp_tx_timeout_update(struct aq_ptp_s *aq_ptp)
+{
+	if (!aq_ptp_skb_buf_len(&aq_ptp->skb_ring)) {
+		struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+		unsigned long flags;
+
+		spin_lock_irqsave(&timeout->lock, flags);
+		timeout->active = false;
+		spin_unlock_irqrestore(&timeout->lock, flags);
+	}
+}
+
+static void aq_ptp_tx_timeout_check(struct aq_ptp_s *aq_ptp)
+{
+	struct ptp_tx_timeout *timeout = &aq_ptp->ptp_tx_timeout;
+	unsigned long flags;
+	bool timeout_flag;
+
+	timeout_flag = false;
+
+	spin_lock_irqsave(&timeout->lock, flags);
+	if (timeout->active) {
+		timeout_flag = time_is_before_jiffies(timeout->tx_start +
+						      AQ_PTP_TX_TIMEOUT);
+		/* reset active flag if timeout detected */
+		if (timeout_flag)
+			timeout->active = false;
+	}
+	spin_unlock_irqrestore(&timeout->lock, flags);
+
+	if (timeout_flag) {
+		aq_ptp_skb_ring_clean(&aq_ptp->skb_ring);
+		netdev_err(aq_ptp->aq_nic->ndev,
+			   "PTP Timeout. Clearing Tx Timestamp SKBs\n");
+	}
+}
+
+/* aq_ptp_adjfine
+ * @ptp: the ptp clock structure
+ * @ppb: parts per billion adjustment from base
+ *
+ * adjust the frequency of the ptp cycle counter by the
+ * indicated ppb from the base frequency.
+ */
+static int aq_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+
+	mutex_lock(&aq_nic->fwreq_mutex);
+	aq_nic->aq_hw_ops->hw_adj_clock_freq(aq_nic->aq_hw,
+					     scaled_ppm_to_ppb(scaled_ppm));
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	return 0;
+}
+
+/* aq_ptp_adjtime
+ * @ptp: the ptp clock structure
+ * @delta: offset to adjust the cycle counter by
+ *
+ * adjust the timer by resetting the timecounter structure.
+ */
+static int aq_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	unsigned long flags;
+
+	spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+	aq_nic->aq_hw_ops->hw_adj_sys_clock(aq_nic->aq_hw, delta);
+	spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+	return 0;
+}
+
+/* aq_ptp_gettime
+ * @ptp: the ptp clock structure
+ * @ts: timespec structure to hold the current time value
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec.
+ */
+static int aq_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	unsigned long flags;
+	u64 ns;
+
+	spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+	aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &ns);
+	spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+/* aq_ptp_settime
+ * @ptp: the ptp clock structure
+ * @ts: the timespec containing the new time for the cycle counter
+ *
+ * reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ */
+static int aq_ptp_settime(struct ptp_clock_info *ptp,
+			  const struct timespec64 *ts)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	unsigned long flags;
+	u64 ns = timespec64_to_ns(ts);
+	u64 now;
+
+	spin_lock_irqsave(&aq_ptp->ptp_lock, flags);
+	aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &now);
+	aq_nic->aq_hw_ops->hw_adj_sys_clock(aq_nic->aq_hw, (s64)ns - (s64)now);
+
+	spin_unlock_irqrestore(&aq_ptp->ptp_lock, flags);
+
+	return 0;
+}
+
+static void aq_ptp_convert_to_hwtstamp(struct aq_ptp_s *aq_ptp,
+				       struct skb_shared_hwtstamps *hwtstamp,
+				       u64 timestamp)
+{
+	memset(hwtstamp, 0, sizeof(*hwtstamp));
+	hwtstamp->hwtstamp = ns_to_ktime(timestamp);
+}
+
+static int aq_ptp_hw_pin_conf(struct aq_nic_s *aq_nic, u32 pin_index, u64 start,
+			      u64 period)
+{
+	if (period)
+		netdev_dbg(aq_nic->ndev,
+			   "Enable GPIO %d pulsing, start time %llu, period %u\n",
+			   pin_index, start, (u32)period);
+	else
+		netdev_dbg(aq_nic->ndev,
+			   "Disable GPIO %d pulsing, start time %llu, period %u\n",
+			   pin_index, start, (u32)period);
+
+	/* Notify hardware of request to being sending pulses.
+	 * If period is ZERO then pulsen is disabled.
+	 */
+	mutex_lock(&aq_nic->fwreq_mutex);
+	aq_nic->aq_hw_ops->hw_gpio_pulse(aq_nic->aq_hw, pin_index,
+					 start, (u32)period);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	return 0;
+}
+
+static int aq_ptp_perout_pin_configure(struct ptp_clock_info *ptp,
+				       struct ptp_clock_request *rq, int on)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct ptp_clock_time *t = &rq->perout.period;
+	struct ptp_clock_time *s = &rq->perout.start;
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	u64 start, period;
+	u32 pin_index = rq->perout.index;
+
+	/* verify the request channel is there */
+	if (pin_index >= ptp->n_per_out)
+		return -EINVAL;
+
+	/* we cannot support periods greater
+	 * than 4 seconds due to reg limit
+	 */
+	if (t->sec > 4 || t->sec < 0)
+		return -ERANGE;
+
+	/* convert to unsigned 64b ns,
+	 * verify we can put it in a 32b register
+	 */
+	period = on ? t->sec * NSEC_PER_SEC + t->nsec : 0;
+
+	/* verify the value is in range supported by hardware */
+	if (period > U32_MAX)
+		return -ERANGE;
+	/* convert to unsigned 64b ns */
+	/* TODO convert to AQ time */
+	start = on ? s->sec * NSEC_PER_SEC + s->nsec : 0;
+
+	aq_ptp_hw_pin_conf(aq_nic, pin_index, start, period);
+
+	return 0;
+}
+
+static int aq_ptp_pps_pin_configure(struct ptp_clock_info *ptp,
+				    struct ptp_clock_request *rq, int on)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	u64 start, period;
+	u32 pin_index = 0;
+	u32 rest = 0;
+
+	/* verify the request channel is there */
+	if (pin_index >= ptp->n_per_out)
+		return -EINVAL;
+
+	aq_nic->aq_hw_ops->hw_get_ptp_ts(aq_nic->aq_hw, &start);
+	div_u64_rem(start, NSEC_PER_SEC, &rest);
+	period = on ? NSEC_PER_SEC : 0; /* PPS - pulse per second */
+	start = on ? start - rest + NSEC_PER_SEC *
+		(rest > 990000000LL ? 2 : 1) : 0;
+
+	aq_ptp_hw_pin_conf(aq_nic, pin_index, start, period);
+
+	return 0;
+}
+
+static void aq_ptp_extts_pin_ctrl(struct aq_ptp_s *aq_ptp)
+{
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	u32 enable = aq_ptp->extts_pin_enabled;
+
+	if (aq_nic->aq_hw_ops->hw_extts_gpio_enable)
+		aq_nic->aq_hw_ops->hw_extts_gpio_enable(aq_nic->aq_hw, 0,
+							enable);
+}
+
+static int aq_ptp_extts_pin_configure(struct ptp_clock_info *ptp,
+				      struct ptp_clock_request *rq, int on)
+{
+	struct aq_ptp_s *aq_ptp = container_of(ptp, struct aq_ptp_s, ptp_info);
+
+	u32 pin_index = rq->extts.index;
+
+	if (pin_index >= ptp->n_ext_ts)
+		return -EINVAL;
+
+	aq_ptp->extts_pin_enabled = !!on;
+	if (on) {
+		aq_ptp->poll_timeout_ms = POLL_SYNC_TIMER_MS;
+		cancel_delayed_work_sync(&aq_ptp->poll_sync);
+		schedule_delayed_work(&aq_ptp->poll_sync,
+				      msecs_to_jiffies(aq_ptp->poll_timeout_ms));
+	}
+
+	aq_ptp_extts_pin_ctrl(aq_ptp);
+	return 0;
+}
+
+/* aq_ptp_gpio_feature_enable
+ * @ptp: the ptp clock structure
+ * @rq: the requested feature to change
+ * @on: whether to enable or disable the feature
+ */
+static int aq_ptp_gpio_feature_enable(struct ptp_clock_info *ptp,
+				      struct ptp_clock_request *rq, int on)
+{
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return aq_ptp_extts_pin_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PEROUT:
+		return aq_ptp_perout_pin_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PPS:
+		return aq_ptp_pps_pin_configure(ptp, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/* aq_ptp_verify
+ * @ptp: the ptp clock structure
+ * @pin: index of the pin in question
+ * @func: the desired function to use
+ * @chan: the function channel index to use
+ */
+static int aq_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+			 enum ptp_pin_function func, unsigned int chan)
+{
+	/* verify the requested pin is there */
+	if (!ptp->pin_config || pin >= ptp->n_pins)
+		return -EINVAL;
+
+	/* enforce locked channels, no changing them */
+	if (chan != ptp->pin_config[pin].chan)
+		return -EINVAL;
+
+	/* we want to keep the functions locked as well */
+	if (func != ptp->pin_config[pin].func)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* aq_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: the private adapter struct
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the hwtstamps structure which
+ * is passed up the network stack
+ */
+void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	struct sk_buff *skb = aq_ptp_skb_get(&aq_ptp->skb_ring);
+	struct skb_shared_hwtstamps hwtstamp;
+
+	if (!skb) {
+		netdev_err(aq_nic->ndev, "have timestamp but tx_queues empty\n");
+		return;
+	}
+
+	timestamp += atomic_read(&aq_ptp->offset_egress);
+	aq_ptp_convert_to_hwtstamp(aq_ptp, &hwtstamp, timestamp);
+	skb_tstamp_tx(skb, &hwtstamp);
+	dev_kfree_skb_any(skb);
+
+	aq_ptp_tx_timeout_update(aq_ptp);
+}
+
+/* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp
+ * @adapter: pointer to adapter struct
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the hwtstamps structure which
+ * is passed up the network stack
+ */
+static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb,
+			       u64 timestamp)
+{
+	timestamp -= atomic_read(&aq_ptp->offset_ingress);
+	aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp);
+}
+
+void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+				struct hwtstamp_config *config)
+{
+	*config = aq_ptp->hwtstamp_config;
+}
+
+static void aq_ptp_prepare_filters(struct aq_ptp_s *aq_ptp)
+{
+	aq_ptp->udp_filter.cmd = HW_ATL_RX_ENABLE_FLTR_L3L4 |
+			       HW_ATL_RX_ENABLE_CMP_PROT_L4 |
+			       HW_ATL_RX_UDP |
+			       HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4 |
+			       HW_ATL_RX_HOST << HW_ATL_RX_ACTION_FL3F4_SHIFT |
+			       HW_ATL_RX_ENABLE_QUEUE_L3L4 |
+			       aq_ptp->ptp_rx.idx << HW_ATL_RX_QUEUE_FL3L4_SHIFT;
+	aq_ptp->udp_filter.p_dst = PTP_EV_PORT;
+
+	aq_ptp->eth_type_filter.ethertype = ETH_P_1588;
+	aq_ptp->eth_type_filter.queue = aq_ptp->ptp_rx.idx;
+}
+
+int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+			       struct hwtstamp_config *config)
+{
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	const struct aq_hw_ops *hw_ops;
+	int err = 0;
+
+	hw_ops = aq_nic->aq_hw_ops;
+	if (config->tx_type == HWTSTAMP_TX_ON ||
+	    config->rx_filter == HWTSTAMP_FILTER_PTP_V2_EVENT) {
+		aq_ptp_prepare_filters(aq_ptp);
+		if (hw_ops->hw_filter_l3l4_set) {
+			err = hw_ops->hw_filter_l3l4_set(aq_nic->aq_hw,
+							 &aq_ptp->udp_filter);
+		}
+		if (!err && hw_ops->hw_filter_l2_set) {
+			err = hw_ops->hw_filter_l2_set(aq_nic->aq_hw,
+						       &aq_ptp->eth_type_filter);
+		}
+		aq_utils_obj_set(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP);
+	} else {
+		aq_ptp->udp_filter.cmd &= ~HW_ATL_RX_ENABLE_FLTR_L3L4;
+		if (hw_ops->hw_filter_l3l4_set) {
+			err = hw_ops->hw_filter_l3l4_set(aq_nic->aq_hw,
+							 &aq_ptp->udp_filter);
+		}
+		if (!err && hw_ops->hw_filter_l2_clear) {
+			err = hw_ops->hw_filter_l2_clear(aq_nic->aq_hw,
+							&aq_ptp->eth_type_filter);
+		}
+		aq_utils_obj_clear(&aq_nic->flags, AQ_NIC_PTP_DPATH_UP);
+	}
+
+	if (err)
+		return -EREMOTEIO;
+
+	aq_ptp->hwtstamp_config = *config;
+
+	return 0;
+}
+
+bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return false;
+
+	return &aq_ptp->ptp_tx == ring ||
+	       &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring;
+}
+
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+		      unsigned int len)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	u64 timestamp = 0;
+	u16 ret = aq_nic->aq_hw_ops->rx_extract_ts(aq_nic->aq_hw,
+						   p, len, &timestamp);
+
+	if (ret > 0)
+		aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp);
+
+	return ret;
+}
+
+static int aq_ptp_poll(struct napi_struct *napi, int budget)
+{
+	struct aq_ptp_s *aq_ptp = container_of(napi, struct aq_ptp_s, napi);
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	bool was_cleaned = false;
+	int work_done = 0;
+	int err;
+
+	/* Processing PTP TX traffic */
+	err = aq_nic->aq_hw_ops->hw_ring_tx_head_update(aq_nic->aq_hw,
+							&aq_ptp->ptp_tx);
+	if (err < 0)
+		goto err_exit;
+
+	if (aq_ptp->ptp_tx.sw_head != aq_ptp->ptp_tx.hw_head) {
+		aq_ring_tx_clean(&aq_ptp->ptp_tx);
+
+		was_cleaned = true;
+	}
+
+	/* Processing HW_TIMESTAMP RX traffic */
+	err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_receive(aq_nic->aq_hw,
+							 &aq_ptp->hwts_rx);
+	if (err < 0)
+		goto err_exit;
+
+	if (aq_ptp->hwts_rx.sw_head != aq_ptp->hwts_rx.hw_head) {
+		aq_ring_hwts_rx_clean(&aq_ptp->hwts_rx, aq_nic);
+
+		err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_fill(aq_nic->aq_hw,
+							      &aq_ptp->hwts_rx);
+		if (err < 0)
+			goto err_exit;
+
+		was_cleaned = true;
+	}
+
+	/* Processing PTP RX traffic */
+	err = aq_nic->aq_hw_ops->hw_ring_rx_receive(aq_nic->aq_hw,
+						    &aq_ptp->ptp_rx);
+	if (err < 0)
+		goto err_exit;
+
+	if (aq_ptp->ptp_rx.sw_head != aq_ptp->ptp_rx.hw_head) {
+		unsigned int sw_tail_old;
+
+		err = aq_ring_rx_clean(&aq_ptp->ptp_rx, napi, &work_done, budget);
+		if (err < 0)
+			goto err_exit;
+
+		sw_tail_old = aq_ptp->ptp_rx.sw_tail;
+		err = aq_ring_rx_fill(&aq_ptp->ptp_rx);
+		if (err < 0)
+			goto err_exit;
+
+		err = aq_nic->aq_hw_ops->hw_ring_rx_fill(aq_nic->aq_hw,
+							 &aq_ptp->ptp_rx,
+							 sw_tail_old);
+		if (err < 0)
+			goto err_exit;
+	}
+
+	if (was_cleaned)
+		work_done = budget;
+
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		aq_nic->aq_hw_ops->hw_irq_enable(aq_nic->aq_hw,
+					BIT_ULL(aq_ptp->ptp_ring_param.vec_idx));
+	}
+
+err_exit:
+	return work_done;
+}
+
+static irqreturn_t aq_ptp_isr(int irq, void *private)
+{
+	struct aq_ptp_s *aq_ptp = private;
+	int err = 0;
+
+	if (!aq_ptp) {
+		err = -EINVAL;
+		goto err_exit;
+	}
+	napi_schedule(&aq_ptp->napi);
+
+err_exit:
+	return err >= 0 ? IRQ_HANDLED : IRQ_NONE;
+}
+
+int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	struct aq_ring_s *ring = &aq_ptp->ptp_tx;
+	unsigned long irq_flags;
+	int err = NETDEV_TX_OK;
+	unsigned int frags;
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		goto err_exit;
+	}
+
+	frags = skb_shinfo(skb)->nr_frags + 1;
+	/* Frags cannot be bigger 16KB
+	 * because PTP usually works
+	 * without Jumbo even in a background
+	 */
+	if (frags > AQ_CFG_SKB_FRAGS_MAX || frags > aq_ring_avail_dx(ring)) {
+		/* Drop packet because it doesn't make sence to delay it */
+		dev_kfree_skb_any(skb);
+		goto err_exit;
+	}
+
+	err = aq_ptp_skb_put(&aq_ptp->skb_ring, skb);
+	if (err) {
+		netdev_err(aq_nic->ndev, "SKB Ring is overflow (%u)!\n",
+			   ring->size);
+		return NETDEV_TX_BUSY;
+	}
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	aq_ptp_tx_timeout_start(aq_ptp);
+	skb_tx_timestamp(skb);
+
+	spin_lock_irqsave(&aq_nic->aq_ptp->ptp_ring_lock, irq_flags);
+	frags = aq_nic_map_skb(aq_nic, skb, ring);
+
+	if (likely(frags)) {
+		err = aq_nic->aq_hw_ops->hw_ring_tx_xmit(aq_nic->aq_hw,
+						       ring, frags);
+		if (err >= 0) {
+			++ring->stats.tx.packets;
+			ring->stats.tx.bytes += skb->len;
+		}
+	} else {
+		err = NETDEV_TX_BUSY;
+	}
+	spin_unlock_irqrestore(&aq_nic->aq_ptp->ptp_ring_lock, irq_flags);
+
+err_exit:
+	return err;
+}
+
+void aq_ptp_service_task(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return;
+
+	aq_ptp_tx_timeout_check(aq_ptp);
+}
+
+int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic)
+{
+	struct pci_dev *pdev = aq_nic->pdev;
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	int err = 0;
+
+	if (!aq_ptp)
+		return 0;
+
+	if (pdev->msix_enabled || pdev->msi_enabled) {
+		err = request_irq(pci_irq_vector(pdev, aq_ptp->idx_vector),
+				  aq_ptp_isr, 0, aq_nic->ndev->name, aq_ptp);
+	} else {
+		err = -EINVAL;
+		goto err_exit;
+	}
+
+err_exit:
+	return err;
+}
+
+void aq_ptp_irq_free(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	struct pci_dev *pdev = aq_nic->pdev;
+
+	if (!aq_ptp)
+		return;
+
+	free_irq(pci_irq_vector(pdev, aq_ptp->idx_vector), aq_ptp);
+}
+
+int aq_ptp_ring_init(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	int err = 0;
+
+	if (!aq_ptp)
+		return 0;
+
+	err = aq_ring_init(&aq_ptp->ptp_tx);
+	if (err < 0)
+		goto err_exit;
+	err = aq_nic->aq_hw_ops->hw_ring_tx_init(aq_nic->aq_hw,
+						 &aq_ptp->ptp_tx,
+						 &aq_ptp->ptp_ring_param);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_ring_init(&aq_ptp->ptp_rx);
+	if (err < 0)
+		goto err_exit;
+	err = aq_nic->aq_hw_ops->hw_ring_rx_init(aq_nic->aq_hw,
+						 &aq_ptp->ptp_rx,
+						 &aq_ptp->ptp_ring_param);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_ring_rx_fill(&aq_ptp->ptp_rx);
+	if (err < 0)
+		goto err_rx_free;
+	err = aq_nic->aq_hw_ops->hw_ring_rx_fill(aq_nic->aq_hw,
+						 &aq_ptp->ptp_rx,
+						 0U);
+	if (err < 0)
+		goto err_rx_free;
+
+	err = aq_ring_init(&aq_ptp->hwts_rx);
+	if (err < 0)
+		goto err_rx_free;
+	err = aq_nic->aq_hw_ops->hw_ring_rx_init(aq_nic->aq_hw,
+						 &aq_ptp->hwts_rx,
+						 &aq_ptp->ptp_ring_param);
+	if (err < 0)
+		goto err_exit;
+	err = aq_nic->aq_hw_ops->hw_ring_hwts_rx_fill(aq_nic->aq_hw,
+						      &aq_ptp->hwts_rx);
+	if (err < 0)
+		goto err_exit;
+
+	return err;
+
+err_rx_free:
+	aq_ring_rx_deinit(&aq_ptp->ptp_rx);
+err_exit:
+	return err;
+}
+
+int aq_ptp_ring_start(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	int err = 0;
+
+	if (!aq_ptp)
+		return 0;
+
+	err = aq_nic->aq_hw_ops->hw_ring_tx_start(aq_nic->aq_hw, &aq_ptp->ptp_tx);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_nic->aq_hw_ops->hw_ring_rx_start(aq_nic->aq_hw, &aq_ptp->ptp_rx);
+	if (err < 0)
+		goto err_exit;
+
+	err = aq_nic->aq_hw_ops->hw_ring_rx_start(aq_nic->aq_hw,
+						  &aq_ptp->hwts_rx);
+	if (err < 0)
+		goto err_exit;
+
+	napi_enable(&aq_ptp->napi);
+
+err_exit:
+	return err;
+}
+
+void aq_ptp_ring_stop(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return;
+
+	aq_nic->aq_hw_ops->hw_ring_tx_stop(aq_nic->aq_hw, &aq_ptp->ptp_tx);
+	aq_nic->aq_hw_ops->hw_ring_rx_stop(aq_nic->aq_hw, &aq_ptp->ptp_rx);
+
+	aq_nic->aq_hw_ops->hw_ring_rx_stop(aq_nic->aq_hw, &aq_ptp->hwts_rx);
+
+	napi_disable(&aq_ptp->napi);
+}
+
+void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp || !aq_ptp->ptp_tx.aq_nic || !aq_ptp->ptp_rx.aq_nic)
+		return;
+
+	aq_ring_tx_clean(&aq_ptp->ptp_tx);
+	aq_ring_rx_deinit(&aq_ptp->ptp_rx);
+}
+
+#define PTP_8TC_RING_IDX             8
+#define PTP_4TC_RING_IDX            16
+#define PTP_HWST_RING_IDX           31
+
+int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	unsigned int tx_ring_idx, rx_ring_idx;
+	struct aq_ring_s *hwts;
+	u32 tx_tc_mode, rx_tc_mode;
+	struct aq_ring_s *ring;
+	int err;
+
+	if (!aq_ptp)
+		return 0;
+
+	/* Index must to be 8 (8 TCs) or 16 (4 TCs).
+	 * It depends from Traffic Class mode.
+	 */
+	aq_nic->aq_hw_ops->hw_tx_tc_mode_get(aq_nic->aq_hw, &tx_tc_mode);
+	if (tx_tc_mode == 0)
+		tx_ring_idx = PTP_8TC_RING_IDX;
+	else
+		tx_ring_idx = PTP_4TC_RING_IDX;
+
+	ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic,
+				tx_ring_idx, &aq_nic->aq_nic_cfg);
+	if (!ring) {
+		err = -ENOMEM;
+		goto err_exit;
+	}
+
+	aq_nic->aq_hw_ops->hw_rx_tc_mode_get(aq_nic->aq_hw, &rx_tc_mode);
+	if (rx_tc_mode == 0)
+		rx_ring_idx = PTP_8TC_RING_IDX;
+	else
+		rx_ring_idx = PTP_4TC_RING_IDX;
+
+	ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic,
+				rx_ring_idx, &aq_nic->aq_nic_cfg);
+	if (!ring) {
+		err = -ENOMEM;
+		goto err_exit_ptp_tx;
+	}
+
+	hwts = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX,
+				     aq_nic->aq_nic_cfg.rxds,
+				     aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size);
+	if (!hwts) {
+		err = -ENOMEM;
+		goto err_exit_ptp_rx;
+	}
+
+	err = aq_ptp_skb_ring_init(&aq_ptp->skb_ring, aq_nic->aq_nic_cfg.rxds);
+	if (err != 0) {
+		err = -ENOMEM;
+		goto err_exit_hwts_rx;
+	}
+
+	aq_ptp->ptp_ring_param.vec_idx = aq_ptp->idx_vector;
+	aq_ptp->ptp_ring_param.cpu = aq_ptp->ptp_ring_param.vec_idx +
+			aq_nic_get_cfg(aq_nic)->aq_rss.base_cpu_number;
+	cpumask_set_cpu(aq_ptp->ptp_ring_param.cpu,
+			&aq_ptp->ptp_ring_param.affinity_mask);
+
+	return 0;
+
+err_exit_hwts_rx:
+	aq_ring_free(&aq_ptp->hwts_rx);
+err_exit_ptp_rx:
+	aq_ring_free(&aq_ptp->ptp_rx);
+err_exit_ptp_tx:
+	aq_ring_free(&aq_ptp->ptp_tx);
+err_exit:
+	return err;
+}
+
+void aq_ptp_ring_free(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return;
+
+	aq_ring_free(&aq_ptp->ptp_tx);
+	aq_ring_free(&aq_ptp->ptp_rx);
+	aq_ring_free(&aq_ptp->hwts_rx);
+
+	aq_ptp_skb_ring_release(&aq_ptp->skb_ring);
+}
+
+#define MAX_PTP_GPIO_COUNT 4
+
+static struct ptp_clock_info aq_ptp_clock = {
+	.owner		= THIS_MODULE,
+	.name		= "atlantic ptp",
+	.max_adj	= 999999999,
+	.n_ext_ts	= 0,
+	.pps		= 0,
+	.adjfine	= aq_ptp_adjfine,
+	.adjtime	= aq_ptp_adjtime,
+	.gettime64	= aq_ptp_gettime,
+	.settime64	= aq_ptp_settime,
+	.n_per_out	= 0,
+	.enable		= aq_ptp_gpio_feature_enable,
+	.n_pins		= 0,
+	.verify		= aq_ptp_verify,
+	.pin_config	= NULL,
+};
+
+#define ptp_offset_init(__idx, __mbps, __egress, __ingress)   do { \
+		ptp_offset[__idx].mbps = (__mbps); \
+		ptp_offset[__idx].egress = (__egress); \
+		ptp_offset[__idx].ingress = (__ingress); } \
+		while (0)
+
+static void aq_ptp_offset_init_from_fw(const struct hw_atl_ptp_offset *offsets)
+{
+	int i;
+
+	/* Load offsets for PTP */
+	for (i = 0; i < ARRAY_SIZE(ptp_offset); i++) {
+		switch (i) {
+		/* 100M */
+		case ptp_offset_idx_100:
+			ptp_offset_init(i, 100,
+					offsets->egress_100,
+					offsets->ingress_100);
+			break;
+		/* 1G */
+		case ptp_offset_idx_1000:
+			ptp_offset_init(i, 1000,
+					offsets->egress_1000,
+					offsets->ingress_1000);
+			break;
+		/* 2.5G */
+		case ptp_offset_idx_2500:
+			ptp_offset_init(i, 2500,
+					offsets->egress_2500,
+					offsets->ingress_2500);
+			break;
+		/* 5G */
+		case ptp_offset_idx_5000:
+			ptp_offset_init(i, 5000,
+					offsets->egress_5000,
+					offsets->ingress_5000);
+			break;
+		/* 10G */
+		case ptp_offset_idx_10000:
+			ptp_offset_init(i, 10000,
+					offsets->egress_10000,
+					offsets->ingress_10000);
+			break;
+		}
+	}
+}
+
+static void aq_ptp_offset_init(const struct hw_atl_ptp_offset *offsets)
+{
+	memset(ptp_offset, 0, sizeof(ptp_offset));
+
+	aq_ptp_offset_init_from_fw(offsets);
+}
+
+static void aq_ptp_gpio_init(struct ptp_clock_info *info,
+			     struct hw_atl_info *hw_info)
+{
+	struct ptp_pin_desc pin_desc[MAX_PTP_GPIO_COUNT];
+	u32 extts_pin_cnt = 0;
+	u32 out_pin_cnt = 0;
+	u32 i;
+
+	memset(pin_desc, 0, sizeof(pin_desc));
+
+	for (i = 0; i < MAX_PTP_GPIO_COUNT - 1; i++) {
+		if (hw_info->gpio_pin[i] ==
+		    (GPIO_PIN_FUNCTION_PTP0 + out_pin_cnt)) {
+			snprintf(pin_desc[out_pin_cnt].name,
+				 sizeof(pin_desc[out_pin_cnt].name),
+				 "AQ_GPIO%d", i);
+			pin_desc[out_pin_cnt].index = out_pin_cnt;
+			pin_desc[out_pin_cnt].chan = out_pin_cnt;
+			pin_desc[out_pin_cnt++].func = PTP_PF_PEROUT;
+		}
+	}
+
+	info->n_per_out = out_pin_cnt;
+
+	if (hw_info->caps_ex & BIT(CAPS_EX_PHY_CTRL_TS_PIN)) {
+		extts_pin_cnt += 1;
+
+		snprintf(pin_desc[out_pin_cnt].name,
+			 sizeof(pin_desc[out_pin_cnt].name),
+			  "AQ_GPIO%d", out_pin_cnt);
+		pin_desc[out_pin_cnt].index = out_pin_cnt;
+		pin_desc[out_pin_cnt].chan = 0;
+		pin_desc[out_pin_cnt].func = PTP_PF_EXTTS;
+	}
+
+	info->n_pins = out_pin_cnt + extts_pin_cnt;
+	info->n_ext_ts = extts_pin_cnt;
+
+	if (!info->n_pins)
+		return;
+
+	info->pin_config = kcalloc(info->n_pins, sizeof(struct ptp_pin_desc),
+				   GFP_KERNEL);
+
+	if (!info->pin_config)
+		return;
+
+	memcpy(info->pin_config, &pin_desc,
+	       sizeof(struct ptp_pin_desc) * info->n_pins);
+}
+
+void aq_ptp_clock_init(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+	struct timespec64 ts;
+
+	ktime_get_real_ts64(&ts);
+	aq_ptp_settime(&aq_ptp->ptp_info, &ts);
+}
+
+static void aq_ptp_poll_sync_work_cb(struct work_struct *w);
+
+int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec)
+{
+	struct hw_atl_utils_mbox mbox;
+	struct ptp_clock *clock;
+	struct aq_ptp_s *aq_ptp;
+	int err = 0;
+
+	if (!aq_nic->aq_hw_ops->hw_get_ptp_ts) {
+		aq_nic->aq_ptp = NULL;
+		return 0;
+	}
+
+	if (!aq_nic->aq_fw_ops->enable_ptp) {
+		aq_nic->aq_ptp = NULL;
+		return 0;
+	}
+
+	hw_atl_utils_mpi_read_stats(aq_nic->aq_hw, &mbox);
+
+	if (!(mbox.info.caps_ex & BIT(CAPS_EX_PHY_PTP_EN))) {
+		aq_nic->aq_ptp = NULL;
+		return 0;
+	}
+
+	aq_ptp_offset_init(&mbox.info.ptp_offset);
+
+	aq_ptp = kzalloc(sizeof(*aq_ptp), GFP_KERNEL);
+	if (!aq_ptp) {
+		err = -ENOMEM;
+		goto err_exit;
+	}
+
+	aq_ptp->aq_nic = aq_nic;
+
+	spin_lock_init(&aq_ptp->ptp_lock);
+	spin_lock_init(&aq_ptp->ptp_ring_lock);
+
+	aq_ptp->ptp_info = aq_ptp_clock;
+	aq_ptp_gpio_init(&aq_ptp->ptp_info, &mbox.info);
+	clock = ptp_clock_register(&aq_ptp->ptp_info, &aq_nic->ndev->dev);
+	if (IS_ERR(clock)) {
+		netdev_err(aq_nic->ndev, "ptp_clock_register failed\n");
+		err = PTR_ERR(clock);
+		goto err_exit;
+	}
+	aq_ptp->ptp_clock = clock;
+	aq_ptp_tx_timeout_init(&aq_ptp->ptp_tx_timeout);
+
+	atomic_set(&aq_ptp->offset_egress, 0);
+	atomic_set(&aq_ptp->offset_ingress, 0);
+
+	netif_napi_add(aq_nic_get_ndev(aq_nic), &aq_ptp->napi,
+		       aq_ptp_poll, AQ_CFG_NAPI_WEIGHT);
+
+	aq_ptp->idx_vector = idx_vec;
+
+	aq_nic->aq_ptp = aq_ptp;
+
+	/* enable ptp counter */
+	aq_utils_obj_set(&aq_nic->aq_hw->flags, AQ_HW_PTP_AVAILABLE);
+	mutex_lock(&aq_nic->fwreq_mutex);
+	aq_nic->aq_fw_ops->enable_ptp(aq_nic->aq_hw, 1);
+	aq_ptp_clock_init(aq_nic);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	INIT_DELAYED_WORK(&aq_ptp->poll_sync, &aq_ptp_poll_sync_work_cb);
+	aq_ptp->eth_type_filter.location =
+			aq_nic_reserve_filter(aq_nic, aq_rx_filter_ethertype);
+	aq_ptp->udp_filter.location =
+			aq_nic_reserve_filter(aq_nic, aq_rx_filter_l3l4);
+
+	return 0;
+
+err_exit:
+	if (aq_ptp)
+		kfree(aq_ptp->ptp_info.pin_config);
+	kfree(aq_ptp);
+	aq_nic->aq_ptp = NULL;
+	return err;
+}
+
+void aq_ptp_unregister(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return;
+
+	ptp_clock_unregister(aq_ptp->ptp_clock);
+}
+
+void aq_ptp_free(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return;
+
+	aq_nic_release_filter(aq_nic, aq_rx_filter_ethertype,
+			      aq_ptp->eth_type_filter.location);
+	aq_nic_release_filter(aq_nic, aq_rx_filter_l3l4,
+			      aq_ptp->udp_filter.location);
+	cancel_delayed_work_sync(&aq_ptp->poll_sync);
+	/* disable ptp */
+	mutex_lock(&aq_nic->fwreq_mutex);
+	aq_nic->aq_fw_ops->enable_ptp(aq_nic->aq_hw, 0);
+	mutex_unlock(&aq_nic->fwreq_mutex);
+
+	kfree(aq_ptp->ptp_info.pin_config);
+
+	netif_napi_del(&aq_ptp->napi);
+	kfree(aq_ptp);
+	aq_nic->aq_ptp = NULL;
+}
+
+struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp)
+{
+	return aq_ptp->ptp_clock;
+}
+
+/* PTP external GPIO nanoseconds count */
+static uint64_t aq_ptp_get_sync1588_ts(struct aq_nic_s *aq_nic)
+{
+	u64 ts = 0;
+
+	if (aq_nic->aq_hw_ops->hw_get_sync_ts)
+		aq_nic->aq_hw_ops->hw_get_sync_ts(aq_nic->aq_hw, &ts);
+
+	return ts;
+}
+
+static void aq_ptp_start_work(struct aq_ptp_s *aq_ptp)
+{
+	if (aq_ptp->extts_pin_enabled) {
+		aq_ptp->poll_timeout_ms = POLL_SYNC_TIMER_MS;
+		aq_ptp->last_sync1588_ts =
+				aq_ptp_get_sync1588_ts(aq_ptp->aq_nic);
+		schedule_delayed_work(&aq_ptp->poll_sync,
+				      msecs_to_jiffies(aq_ptp->poll_timeout_ms));
+	}
+}
+
+int aq_ptp_link_change(struct aq_nic_s *aq_nic)
+{
+	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
+
+	if (!aq_ptp)
+		return 0;
+
+	if (aq_nic->aq_hw->aq_link_status.mbps)
+		aq_ptp_start_work(aq_ptp);
+	else
+		cancel_delayed_work_sync(&aq_ptp->poll_sync);
+
+	return 0;
+}
+
+static bool aq_ptp_sync_ts_updated(struct aq_ptp_s *aq_ptp, u64 *new_ts)
+{
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	u64 sync_ts2;
+	u64 sync_ts;
+
+	sync_ts = aq_ptp_get_sync1588_ts(aq_nic);
+
+	if (sync_ts != aq_ptp->last_sync1588_ts) {
+		sync_ts2 = aq_ptp_get_sync1588_ts(aq_nic);
+		if (sync_ts != sync_ts2) {
+			sync_ts = sync_ts2;
+			sync_ts2 = aq_ptp_get_sync1588_ts(aq_nic);
+			if (sync_ts != sync_ts2) {
+				netdev_err(aq_nic->ndev,
+					   "%s: Unable to get correct GPIO TS",
+					   __func__);
+				sync_ts = 0;
+			}
+		}
+
+		*new_ts = sync_ts;
+		return true;
+	}
+	return false;
+}
+
+static int aq_ptp_check_sync1588(struct aq_ptp_s *aq_ptp)
+{
+	struct aq_nic_s *aq_nic = aq_ptp->aq_nic;
+	u64 sync_ts;
+
+	 /* Sync1588 pin was triggered */
+	if (aq_ptp_sync_ts_updated(aq_ptp, &sync_ts)) {
+		if (aq_ptp->extts_pin_enabled) {
+			struct ptp_clock_event ptp_event;
+			u64 time = 0;
+
+			aq_nic->aq_hw_ops->hw_ts_to_sys_clock(aq_nic->aq_hw,
+							      sync_ts, &time);
+			ptp_event.index = aq_ptp->ptp_info.n_pins - 1;
+			ptp_event.timestamp = time;
+
+			ptp_event.type = PTP_CLOCK_EXTTS;
+			ptp_clock_event(aq_ptp->ptp_clock, &ptp_event);
+		}
+
+		aq_ptp->last_sync1588_ts = sync_ts;
+	}
+
+	return 0;
+}
+
+static void aq_ptp_poll_sync_work_cb(struct work_struct *w)
+{
+	struct delayed_work *dw = to_delayed_work(w);
+	struct aq_ptp_s *aq_ptp = container_of(dw, struct aq_ptp_s, poll_sync);
+
+	aq_ptp_check_sync1588(aq_ptp);
+
+	if (aq_ptp->extts_pin_enabled) {
+		unsigned long timeout = msecs_to_jiffies(aq_ptp->poll_timeout_ms);
+
+		schedule_delayed_work(&aq_ptp->poll_sync, timeout);
+	}
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
new file mode 100644
index 000000000000..231906431a48
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Aquantia Corporation Network Driver
+ * Copyright (C) 2014-2019 Aquantia Corporation. All rights reserved
+ */
+
+/* File aq_ptp.h: Declaration of PTP functions.
+ */
+#ifndef AQ_PTP_H
+#define AQ_PTP_H
+
+#include <linux/net_tstamp.h>
+
+#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+
+/* Common functions */
+int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec);
+
+void aq_ptp_unregister(struct aq_nic_s *aq_nic);
+void aq_ptp_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic);
+void aq_ptp_irq_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_free(struct aq_nic_s *aq_nic);
+
+int aq_ptp_ring_init(struct aq_nic_s *aq_nic);
+int aq_ptp_ring_start(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_stop(struct aq_nic_s *aq_nic);
+void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic);
+
+void aq_ptp_service_task(struct aq_nic_s *aq_nic);
+
+void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic, unsigned int mbps);
+
+void aq_ptp_clock_init(struct aq_nic_s *aq_nic);
+
+/* Traffic processing functions */
+int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb);
+void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp);
+
+/* Must be to check available of PTP before call */
+void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+				struct hwtstamp_config *config);
+int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+			       struct hwtstamp_config *config);
+
+/* Return either ring is belong to PTP or not*/
+bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring);
+
+u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p,
+		      unsigned int len);
+
+struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp);
+
+int aq_ptp_link_change(struct aq_nic_s *aq_nic);
+
+#else
+
+static inline int aq_ptp_init(struct aq_nic_s *aq_nic, unsigned int idx_vec)
+{
+	return 0;
+}
+
+static inline void aq_ptp_unregister(struct aq_nic_s *aq_nic) {}
+
+static inline void aq_ptp_free(struct aq_nic_s *aq_nic)
+{
+}
+
+static inline int aq_ptp_irq_alloc(struct aq_nic_s *aq_nic)
+{
+	return 0;
+}
+
+static inline void aq_ptp_irq_free(struct aq_nic_s *aq_nic)
+{
+}
+
+static inline int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
+{
+	return 0;
+}
+
+static inline void aq_ptp_ring_free(struct aq_nic_s *aq_nic) {}
+
+static inline int aq_ptp_ring_init(struct aq_nic_s *aq_nic)
+{
+	return 0;
+}
+
+static inline int aq_ptp_ring_start(struct aq_nic_s *aq_nic)
+{
+	return 0;
+}
+
+static inline void aq_ptp_ring_stop(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_service_task(struct aq_nic_s *aq_nic) {}
+static inline void aq_ptp_tm_offset_set(struct aq_nic_s *aq_nic,
+					unsigned int mbps) {}
+static inline void aq_ptp_clock_init(struct aq_nic_s *aq_nic) {}
+static inline int aq_ptp_xmit(struct aq_nic_s *aq_nic, struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) {}
+static inline void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp,
+					      struct hwtstamp_config *config) {}
+static inline int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp,
+					     struct hwtstamp_config *config)
+{
+	return 0;
+}
+
+static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring)
+{
+	return false;
+}
+
+static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic,
+				    struct sk_buff *skb, u8 *p,
+				    unsigned int len)
+{
+	return 0;
+}
+
+static inline struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp)
+{
+	return NULL;
+}
+
+static inline int aq_ptp_link_change(struct aq_nic_s *aq_nic)
+{
+	return 0;
+}
+#endif
+
+#endif /* AQ_PTP_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 76bdbe1596d6..951d86f8b66e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ring.c: Definition of functions for Rx/Tx rings. */
@@ -10,6 +10,7 @@
 #include "aq_nic.h"
 #include "aq_hw.h"
 #include "aq_hw_utils.h"
+#include "aq_ptp.h"
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -29,8 +30,8 @@ static int aq_get_rxpage(struct aq_rxpage *rxpage, unsigned int order,
 			 struct device *dev)
 {
 	struct page *page;
-	dma_addr_t daddr;
 	int ret = -ENOMEM;
+	dma_addr_t daddr;
 
 	page = dev_alloc_pages(order);
 	if (unlikely(!page))
@@ -117,6 +118,7 @@ err_exit:
 		aq_ring_free(self);
 		self = NULL;
 	}
+
 	return self;
 }
 
@@ -143,6 +145,7 @@ err_exit:
 		aq_ring_free(self);
 		self = NULL;
 	}
+
 	return self;
 }
 
@@ -174,6 +177,31 @@ err_exit:
 		aq_ring_free(self);
 		self = NULL;
 	}
+
+	return self;
+}
+
+struct aq_ring_s *
+aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic,
+		      unsigned int idx, unsigned int size, unsigned int dx_size)
+{
+	struct device *dev = aq_nic_get_dev(aq_nic);
+	size_t sz = size * dx_size + AQ_CFG_RXDS_DEF;
+
+	memset(self, 0, sizeof(*self));
+
+	self->aq_nic = aq_nic;
+	self->idx = idx;
+	self->size = size;
+	self->dx_size = dx_size;
+
+	self->dx_ring = dma_alloc_coherent(dev, sz, &self->dx_ring_pa,
+					   GFP_KERNEL);
+	if (!self->dx_ring) {
+		aq_ring_free(self);
+		return NULL;
+	}
+
 	return self;
 }
 
@@ -182,6 +210,7 @@ int aq_ring_init(struct aq_ring_s *self)
 	self->hw_head = 0;
 	self->sw_head = 0;
 	self->sw_tail = 0;
+
 	return 0;
 }
 
@@ -290,6 +319,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 		self->sw_head = aq_ring_next_dx(self, self->sw_head),
 		--budget, ++(*work_done)) {
 		struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
+		bool is_ptp_ring = aq_ptp_ring(self->aq_nic, self);
 		struct aq_ring_buff_s *buff_ = NULL;
 		struct sk_buff *skb = NULL;
 		unsigned int next_ = 0U;
@@ -354,6 +384,11 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 				err = -ENOMEM;
 				goto err_exit;
 			}
+			if (is_ptp_ring)
+				buff->len -=
+					aq_ptp_extract_ts(self->aq_nic, skb,
+						aq_buf_vaddr(&buff->rxdata),
+						buff->len);
 			skb_put(skb, buff->len);
 			page_ref_inc(buff->rxdata.page);
 		} else {
@@ -362,6 +397,11 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 				err = -ENOMEM;
 				goto err_exit;
 			}
+			if (is_ptp_ring)
+				buff->len -=
+					aq_ptp_extract_ts(self->aq_nic, skb,
+						aq_buf_vaddr(&buff->rxdata),
+						buff->len);
 
 			hdr_len = buff->len;
 			if (hdr_len > AQ_CFG_RX_HDR_SIZE)
@@ -421,8 +461,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 		skb_set_hash(skb, buff->rss_hash,
 			     buff->is_hash_l4 ? PKT_HASH_TYPE_L4 :
 			     PKT_HASH_TYPE_NONE);
-
-		skb_record_rx_queue(skb, self->idx);
+		/* Send all PTP traffic to 0 queue */
+		skb_record_rx_queue(skb, is_ptp_ring ? 0 : self->idx);
 
 		++self->stats.rx.packets;
 		self->stats.rx.bytes += skb->len;
@@ -434,6 +474,21 @@ err_exit:
 	return err;
 }
 
+void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic)
+{
+	while (self->sw_head != self->hw_head) {
+		u64 ns;
+
+		aq_nic->aq_hw_ops->extract_hwts(aq_nic->aq_hw,
+						self->dx_ring +
+						(self->sw_head * self->dx_size),
+						self->dx_size, &ns);
+		aq_ptp_tx_hwtstamp(aq_nic, ns);
+
+		self->sw_head = aq_ring_next_dx(self, self->sw_head);
+	}
+}
+
 int aq_ring_rx_fill(struct aq_ring_s *self)
 {
 	unsigned int page_order = self->page_order;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 47abd09d06c2..991e4d31b094 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File aq_ring.h: Declaration of functions for Rx/Tx rings. */
@@ -65,19 +65,20 @@ struct __packed aq_ring_buff_s {
 	};
 	union {
 		struct {
-			u16 len;
+			u32 len:16;
 			u32 is_ip_cso:1;
 			u32 is_udp_cso:1;
 			u32 is_tcp_cso:1;
 			u32 is_cso_err:1;
 			u32 is_sop:1;
 			u32 is_eop:1;
-			u32 is_gso:1;
+			u32 is_gso_tcp:1;
+			u32 is_gso_udp:1;
 			u32 is_mapped:1;
 			u32 is_cleaned:1;
 			u32 is_error:1;
 			u32 is_vlan:1;
-			u32 rsvd3:5;
+			u32 rsvd3:4;
 			u16 eop_index;
 			u16 rsvd4;
 		};
@@ -174,4 +175,9 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 		     int budget);
 int aq_ring_rx_fill(struct aq_ring_s *self);
 
+struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self,
+		struct aq_nic_s *aq_nic, unsigned int idx,
+		unsigned int size, unsigned int dx_size);
+void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic);
+
 #endif /* AQ_RING_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index a95c263a45aa..6e19e27b6200 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -103,8 +103,8 @@ err_exit:
 struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx,
 			      struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	struct aq_vec_s *self = NULL;
 	struct aq_ring_s *ring = NULL;
+	struct aq_vec_s *self = NULL;
 	unsigned int i = 0U;
 	int err = 0;
 
@@ -159,6 +159,7 @@ err_exit:
 		aq_vec_free(self);
 		self = NULL;
 	}
+
 	return self;
 }
 
@@ -263,6 +264,7 @@ void aq_vec_deinit(struct aq_vec_s *self)
 		aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
 		aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]);
 	}
+
 err_exit:;
 }
 
@@ -305,8 +307,8 @@ err_exit:
 irqreturn_t aq_vec_isr_legacy(int irq, void *private)
 {
 	struct aq_vec_s *self = private;
+	irqreturn_t err = 0;
 	u64 irq_mask = 0U;
-	int err;
 
 	if (!self)
 		return IRQ_NONE;
@@ -361,9 +363,9 @@ void aq_vec_add_stats(struct aq_vec_s *self,
 
 int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count)
 {
-	unsigned int count = 0U;
 	struct aq_ring_stats_rx_s stats_rx;
 	struct aq_ring_stats_tx_s stats_tx;
+	unsigned int count = 0U;
 
 	memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
 	memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 359a4d387185..9b1062b8af64 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -119,10 +119,10 @@ err_exit:
 
 static int hw_atl_a0_hw_qos_set(struct aq_hw_s *self)
 {
-	u32 tc = 0U;
-	u32 buff_size = 0U;
-	unsigned int i_priority = 0U;
 	bool is_rx_flow_control = false;
+	unsigned int i_priority = 0U;
+	u32 buff_size = 0U;
+	u32 tc = 0U;
 
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
@@ -155,7 +155,7 @@ static int hw_atl_a0_hw_qos_set(struct aq_hw_s *self)
 
 	/* QoS Rx buf size per TC */
 	tc = 0;
-	is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->flow_control);
+	is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->fc.req);
 	buff_size = HW_ATL_A0_RXBUF_MAX;
 
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
@@ -180,9 +180,9 @@ static int hw_atl_a0_hw_rss_hash_set(struct aq_hw_s *self,
 				     struct aq_rss_parameters *rss_params)
 {
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
-	int err = 0;
-	unsigned int i = 0U;
 	unsigned int addr = 0U;
+	unsigned int i = 0U;
+	int err = 0;
 	u32 val;
 
 	for (i = 10, addr = 0U; i--; ++addr) {
@@ -207,12 +207,12 @@ err_exit:
 static int hw_atl_a0_hw_rss_set(struct aq_hw_s *self,
 				struct aq_rss_parameters *rss_params)
 {
-	u8 *indirection_table =	rss_params->indirection_table;
-	u32 i = 0U;
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
-	int err = 0;
+	u8 *indirection_table =	rss_params->indirection_table;
 	u16 bitary[1 + (HW_ATL_A0_RSS_REDIRECTION_MAX *
 		   HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)];
+	int err = 0;
+	u32 i = 0U;
 	u32 val;
 
 	memset(bitary, 0, sizeof(bitary));
@@ -321,9 +321,9 @@ static int hw_atl_a0_hw_init_rx_path(struct aq_hw_s *self)
 
 static int hw_atl_a0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 {
-	int err = 0;
 	unsigned int h = 0U;
 	unsigned int l = 0U;
+	int err = 0;
 
 	if (!mac_addr) {
 		err = -EINVAL;
@@ -352,10 +352,9 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 		[AQ_HW_IRQ_MSI]     = { 0x20000021U, 0x20000025U },
 		[AQ_HW_IRQ_MSIX]    = { 0x20000022U, 0x20000026U },
 	};
-
+	struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
 	int err = 0;
 
-	struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
 
 	hw_atl_a0_hw_init_tx_path(self);
 	hw_atl_a0_hw_init_rx_path(self);
@@ -404,6 +403,7 @@ static int hw_atl_a0_hw_ring_tx_start(struct aq_hw_s *self,
 				      struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 1, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -411,6 +411,7 @@ static int hw_atl_a0_hw_ring_rx_start(struct aq_hw_s *self,
 				      struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 1, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -418,6 +419,7 @@ static int hw_atl_a0_hw_start(struct aq_hw_s *self)
 {
 	hw_atl_tpb_tx_buff_en_set(self, 1);
 	hw_atl_rpb_rx_buff_en_set(self, 1);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -425,6 +427,7 @@ static int hw_atl_a0_hw_tx_ring_tail_update(struct aq_hw_s *self,
 					    struct aq_ring_s *ring)
 {
 	hw_atl_reg_tx_dma_desc_tail_ptr_set(self, ring->sw_tail, ring->idx);
+
 	return 0;
 }
 
@@ -435,8 +438,8 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	struct aq_ring_buff_s *buff = NULL;
 	struct hw_atl_txd_s *txd = NULL;
 	unsigned int buff_pa_len = 0U;
-	unsigned int pkt_len = 0U;
 	unsigned int frag_count = 0U;
+	unsigned int pkt_len = 0U;
 	bool is_gso = false;
 
 	buff = &ring->buff_ring[ring->sw_tail];
@@ -451,7 +454,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_gso) {
+		if (buff->is_gso_tcp) {
 			txd->ctl |= (buff->len_l3 << 31) |
 				(buff->len_l2 << 24) |
 				HW_ATL_A0_TXD_CTL_CMD_TCP |
@@ -500,6 +503,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	}
 
 	hw_atl_a0_hw_tx_ring_tail_update(self, ring);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -507,8 +511,8 @@ static int hw_atl_a0_hw_ring_rx_init(struct aq_hw_s *self,
 				     struct aq_ring_s *aq_ring,
 				     struct aq_ring_param_s *aq_ring_param)
 {
-	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 
 	hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx);
 
@@ -549,8 +553,8 @@ static int hw_atl_a0_hw_ring_tx_init(struct aq_hw_s *self,
 				     struct aq_ring_s *aq_ring,
 				     struct aq_ring_param_s *aq_ring_param)
 {
-	u32 dma_desc_lsw_addr = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_msw_addr = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+	u32 dma_desc_lsw_addr = (u32)aq_ring->dx_ring_pa;
 
 	hw_atl_reg_tx_dma_desc_base_addresslswset(self, dma_desc_lsw_addr,
 						  aq_ring->idx);
@@ -599,8 +603,8 @@ static int hw_atl_a0_hw_ring_rx_fill(struct aq_hw_s *self,
 static int hw_atl_a0_hw_ring_tx_head_update(struct aq_hw_s *self,
 					    struct aq_ring_s *ring)
 {
-	int err = 0;
 	unsigned int hw_head = hw_atl_tdm_tx_desc_head_ptr_get(self, ring->idx);
+	int err = 0;
 
 	if (aq_utils_obj_test(&self->flags, AQ_HW_FLAG_ERR_UNPLUG)) {
 		err = -ENXIO;
@@ -720,6 +724,7 @@ static int hw_atl_a0_hw_irq_enable(struct aq_hw_s *self, u64 mask)
 {
 	hw_atl_itr_irq_msk_setlsw_set(self, LODWORD(mask) |
 			       (1U << HW_ATL_A0_ERR_INT));
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -737,6 +742,7 @@ static int hw_atl_a0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
 static int hw_atl_a0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 {
 	*mask = hw_atl_itr_irq_statuslsw_get(self);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -859,6 +865,7 @@ static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self)
 static int hw_atl_a0_hw_stop(struct aq_hw_s *self)
 {
 	hw_atl_a0_hw_irq_disable(self, HW_ATL_A0_INT_MASK);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -866,6 +873,7 @@ static int hw_atl_a0_hw_ring_tx_stop(struct aq_hw_s *self,
 				     struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 0U, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -873,6 +881,7 @@ static int hw_atl_a0_hw_ring_rx_stop(struct aq_hw_s *self,
 				     struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 0U, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 2ad3fa6316ce..58e891af6e09 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_b0.c: Definition of Atlantic hardware specific functions. */
@@ -10,6 +10,7 @@
 #include "../aq_hw_utils.h"
 #include "../aq_ring.h"
 #include "../aq_nic.h"
+#include "../aq_phy.h"
 #include "hw_atl_b0.h"
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
@@ -42,13 +43,17 @@
 			NETIF_F_NTUPLE |  \
 			NETIF_F_HW_VLAN_CTAG_FILTER | \
 			NETIF_F_HW_VLAN_CTAG_RX |     \
-			NETIF_F_HW_VLAN_CTAG_TX,      \
+			NETIF_F_HW_VLAN_CTAG_TX |     \
+			NETIF_F_GSO_UDP_L4      |     \
+			NETIF_F_GSO_PARTIAL,          \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
 	.mac_regs_count = 88,		  \
 	.hw_alive_check_addr = 0x10U
 
+#define FRAC_PER_NS 0x100000000LL
+
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_FIBRE,
@@ -104,14 +109,15 @@ static int hw_atl_b0_hw_reset(struct aq_hw_s *self)
 static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc)
 {
 	hw_atl_rpb_rx_xoff_en_per_tc_set(self, !!(fc & AQ_NIC_FC_RX), tc);
+
 	return 0;
 }
 
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
-	u32 tc = 0U;
-	u32 buff_size = 0U;
 	unsigned int i_priority = 0U;
+	u32 buff_size = 0U;
+	u32 tc = 0U;
 
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
@@ -124,13 +130,16 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U);
-	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U);
-	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U);
-	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U);
+	tc = 0;
+
+	/* TX Packet Scheduler Data TC0 */
+	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
+	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
+	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
-	/* Tx buf size */
-	buff_size = HW_ATL_B0_TXBUF_MAX;
+	/* Tx buf size TC0 */
+	buff_size = HW_ATL_B0_TXBUF_MAX - HW_ATL_B0_PTP_TXBUF_SIZE;
 
 	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc);
 	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
@@ -141,10 +150,15 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 						   (buff_size *
 						   (1024 / 32U) * 50U) /
 						   100U, tc);
+	/* Init TC2 for PTP_TX */
+	tc = 2;
+
+	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE,
+					       tc);
 
 	/* QoS Rx buf size per TC */
 	tc = 0;
-	buff_size = HW_ATL_B0_RXBUF_MAX;
+	buff_size = HW_ATL_B0_RXBUF_MAX - HW_ATL_B0_PTP_RXBUF_SIZE;
 
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
 	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
@@ -156,7 +170,15 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 						   (1024U / 32U) * 50U) /
 						   100U, tc);
 
-	hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc);
+	hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
+
+	/* Init TC2 for PTP_RX */
+	tc = 2;
+
+	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE,
+					       tc);
+	/* No flow control for PTP */
+	hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, tc);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
@@ -169,9 +191,9 @@ static int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
 				     struct aq_rss_parameters *rss_params)
 {
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
-	int err = 0;
-	unsigned int i = 0U;
 	unsigned int addr = 0U;
+	unsigned int i = 0U;
+	int err = 0;
 	u32 val;
 
 	for (i = 10, addr = 0U; i--; ++addr) {
@@ -196,12 +218,12 @@ err_exit:
 static int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
 				struct aq_rss_parameters *rss_params)
 {
-	u8 *indirection_table =	rss_params->indirection_table;
-	u32 i = 0U;
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
-	int err = 0;
+	u8 *indirection_table =	rss_params->indirection_table;
 	u16 bitary[1 + (HW_ATL_B0_RSS_REDIRECTION_MAX *
 		   HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)];
+	int err = 0;
+	u32 i = 0U;
 	u32 val;
 
 	memset(bitary, 0, sizeof(bitary));
@@ -285,6 +307,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 
 		hw_atl_itr_rsc_delay_set(self, 1U);
 	}
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -363,9 +386,9 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 
 static int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 {
-	int err = 0;
 	unsigned int h = 0U;
 	unsigned int l = 0U;
+	int err = 0;
 
 	if (!mac_addr) {
 		err = -EINVAL;
@@ -394,11 +417,10 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 		[AQ_HW_IRQ_MSI]     = { 0x20000021U, 0x20000025U },
 		[AQ_HW_IRQ_MSIX]    = { 0x20000022U, 0x20000026U },
 	};
-
+	struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
 	int err = 0;
 	u32 val;
 
-	struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
 
 	hw_atl_b0_hw_init_tx_path(self);
 	hw_atl_b0_hw_init_rx_path(self);
@@ -441,8 +463,10 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 
 	/* Interrupts */
 	hw_atl_reg_gen_irq_map_set(self,
-				   ((HW_ATL_B0_ERR_INT << 0x18) | (1U << 0x1F)) |
-			    ((HW_ATL_B0_ERR_INT << 0x10) | (1U << 0x17)), 0U);
+				   ((HW_ATL_B0_ERR_INT << 0x18) |
+				    (1U << 0x1F)) |
+				   ((HW_ATL_B0_ERR_INT << 0x10) |
+				    (1U << 0x17)), 0U);
 
 	/* Enable link interrupt */
 	if (aq_nic_cfg->link_irq_vec)
@@ -459,6 +483,7 @@ static int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self,
 				      struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 1, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -466,6 +491,7 @@ static int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self,
 				      struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 1, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -473,6 +499,7 @@ static int hw_atl_b0_hw_start(struct aq_hw_s *self)
 {
 	hw_atl_tpb_tx_buff_en_set(self, 1);
 	hw_atl_rpb_rx_buff_en_set(self, 1);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -480,6 +507,7 @@ static int hw_atl_b0_hw_tx_ring_tail_update(struct aq_hw_s *self,
 					    struct aq_ring_s *ring)
 {
 	hw_atl_reg_tx_dma_desc_tail_ptr_set(self, ring->sw_tail, ring->idx);
+
 	return 0;
 }
 
@@ -490,8 +518,8 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	struct aq_ring_buff_s *buff = NULL;
 	struct hw_atl_txd_s *txd = NULL;
 	unsigned int buff_pa_len = 0U;
-	unsigned int pkt_len = 0U;
 	unsigned int frag_count = 0U;
+	unsigned int pkt_len = 0U;
 	bool is_vlan = false;
 	bool is_gso = false;
 
@@ -507,8 +535,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_gso) {
-			txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
+		if (buff->is_gso_tcp || buff->is_gso_udp) {
+			if (buff->is_gso_tcp)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
 			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
 			txd->ctl |= (buff->len_l3 << 31) |
 				    (buff->len_l2 << 24);
@@ -528,7 +557,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			txd->ctl |= buff->vlan_tx_tag << 4;
 			is_vlan = true;
 		}
-		if (!buff->is_gso && !buff->is_vlan) {
+		if (!buff->is_gso_tcp && !buff->is_gso_udp && !buff->is_vlan) {
 			buff_pa_len = buff->len;
 
 			txd->buf_addr = buff->pa;
@@ -567,6 +596,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	}
 
 	hw_atl_b0_hw_tx_ring_tail_update(self, ring);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -574,9 +604,9 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
 				     struct aq_ring_s *aq_ring,
 				     struct aq_ring_param_s *aq_ring_param)
 {
-	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
 	u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip;
+	u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
 
 	hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx);
 
@@ -617,8 +647,8 @@ static int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self,
 				     struct aq_ring_s *aq_ring,
 				     struct aq_ring_param_s *aq_ring_param)
 {
-	u32 dma_desc_lsw_addr = (u32)aq_ring->dx_ring_pa;
 	u32 dma_desc_msw_addr = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+	u32 dma_desc_lsw_addr = (u32)aq_ring->dx_ring_pa;
 
 	hw_atl_reg_tx_dma_desc_base_addresslswset(self, dma_desc_lsw_addr,
 						  aq_ring->idx);
@@ -664,11 +694,53 @@ static int hw_atl_b0_hw_ring_rx_fill(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_hw_ring_hwts_rx_fill(struct aq_hw_s *self,
+					  struct aq_ring_s *ring)
+{
+	unsigned int i;
+
+	for (i = aq_ring_avail_dx(ring); i--;
+			ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail)) {
+		struct hw_atl_rxd_s *rxd =
+			(struct hw_atl_rxd_s *)
+			&ring->dx_ring[ring->sw_tail * HW_ATL_B0_RXD_SIZE];
+
+		rxd->buf_addr = ring->dx_ring_pa + ring->size * ring->dx_size;
+		rxd->hdr_addr = 0U;
+	}
+	/* Make sure descriptors are updated before bump tail*/
+	wmb();
+
+	hw_atl_reg_rx_dma_desc_tail_ptr_set(self, ring->sw_tail, ring->idx);
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_ring_hwts_rx_receive(struct aq_hw_s *self,
+					     struct aq_ring_s *ring)
+{
+	while (ring->hw_head != ring->sw_tail) {
+		struct hw_atl_rxd_hwts_wb_s *hwts_wb =
+			(struct hw_atl_rxd_hwts_wb_s *)
+			(ring->dx_ring + (ring->hw_head * HW_ATL_B0_RXD_SIZE));
+
+		/* RxD is not done */
+		if (!(hwts_wb->sec_lw0 & 0x1U))
+			break;
+
+		ring->hw_head = aq_ring_next_dx(ring, ring->hw_head);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
 					    struct aq_ring_s *ring)
 {
+	unsigned int hw_head_;
 	int err = 0;
-	unsigned int hw_head_ = hw_atl_tdm_tx_desc_head_ptr_get(self, ring->idx);
+
+	hw_head_ = hw_atl_tdm_tx_desc_head_ptr_get(self, ring->idx);
 
 	if (aq_utils_obj_test(&self->flags, AQ_HW_FLAG_ERR_UNPLUG)) {
 		err = -ENXIO;
@@ -784,6 +856,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 static int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask)
 {
 	hw_atl_itr_irq_msk_setlsw_set(self, LODWORD(mask));
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -793,12 +866,14 @@ static int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
 	hw_atl_itr_irq_status_clearlsw_set(self, LODWORD(mask));
 
 	atomic_inc(&self->dpc);
+
 	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 {
 	*mask = hw_atl_itr_irq_statuslsw_get(self);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -807,8 +882,8 @@ static int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
 					  unsigned int packet_filter)
 {
-	unsigned int i = 0U;
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	unsigned int i = 0U;
 
 	hw_atl_rpfl2promiscuous_mode_en_set(self,
 					    IS_FILTER_ENABLED(IFF_PROMISC));
@@ -846,29 +921,30 @@ static int hw_atl_b0_hw_multicast_list_set(struct aq_hw_s *self,
 					   u32 count)
 {
 	int err = 0;
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 
 	if (count > (HW_ATL_B0_MAC_MAX - HW_ATL_B0_MAC_MIN)) {
 		err = -EBADRQC;
 		goto err_exit;
 	}
-	for (self->aq_nic_cfg->mc_list_count = 0U;
-			self->aq_nic_cfg->mc_list_count < count;
-			++self->aq_nic_cfg->mc_list_count) {
-		u32 i = self->aq_nic_cfg->mc_list_count;
+	for (cfg->mc_list_count = 0U;
+			cfg->mc_list_count < count;
+			++cfg->mc_list_count) {
+		u32 i = cfg->mc_list_count;
 		u32 h = (ar_mac[i][0] << 8) | (ar_mac[i][1]);
 		u32 l = (ar_mac[i][2] << 24) | (ar_mac[i][3] << 16) |
 					(ar_mac[i][4] << 8) | ar_mac[i][5];
 
 		hw_atl_rpfl2_uc_flr_en_set(self, 0U, HW_ATL_B0_MAC_MIN + i);
 
-		hw_atl_rpfl2unicast_dest_addresslsw_set(self,
-							l, HW_ATL_B0_MAC_MIN + i);
+		hw_atl_rpfl2unicast_dest_addresslsw_set(self, l,
+							HW_ATL_B0_MAC_MIN + i);
 
-		hw_atl_rpfl2unicast_dest_addressmsw_set(self,
-							h, HW_ATL_B0_MAC_MIN + i);
+		hw_atl_rpfl2unicast_dest_addressmsw_set(self, h,
+							HW_ATL_B0_MAC_MIN + i);
 
 		hw_atl_rpfl2_uc_flr_en_set(self,
-					   (self->aq_nic_cfg->is_mc_list_enabled),
+					   (cfg->is_mc_list_enabled),
 					   HW_ATL_B0_MAC_MIN + i);
 	}
 
@@ -995,6 +1071,7 @@ static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
 				     struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 0U, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
@@ -1002,9 +1079,231 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
 				     struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 0U, ring->idx);
+
 	return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_tx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
+{
+	*tc_mode = hw_atl_rpb_tps_tx_tc_mode_get(self);
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_rx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
+{
+	*tc_mode = hw_atl_rpb_rpf_rx_traf_class_mode_get(self);
+	return aq_hw_err_from_flags(self);
+}
+
+#define get_ptp_ts_val_u64(self, indx) \
+	((u64)(hw_atl_pcs_ptp_clock_get(self, indx) & 0xffff))
+
+static void hw_atl_b0_get_ptp_ts(struct aq_hw_s *self, u64 *stamp)
+{
+	u64 ns;
+
+	hw_atl_pcs_ptp_clock_read_enable(self, 1);
+	hw_atl_pcs_ptp_clock_read_enable(self, 0);
+	ns = (get_ptp_ts_val_u64(self, 0) +
+	      (get_ptp_ts_val_u64(self, 1) << 16)) * NSEC_PER_SEC +
+	     (get_ptp_ts_val_u64(self, 3) +
+	      (get_ptp_ts_val_u64(self, 4) << 16));
+
+	*stamp = ns + self->ptp_clk_offset;
+}
+
+static void hw_atl_b0_adj_params_get(u64 freq, s64 adj, u32 *ns, u32 *fns)
+{
+	/* For accuracy, the digit is extended */
+	s64 base_ns = ((adj + NSEC_PER_SEC) * NSEC_PER_SEC);
+	u64 nsi_frac = 0;
+	u64 nsi;
+
+	base_ns = div64_s64(base_ns, freq);
+	nsi = div64_u64(base_ns, NSEC_PER_SEC);
+
+	if (base_ns != nsi * NSEC_PER_SEC) {
+		s64 divisor = div64_s64((s64)NSEC_PER_SEC * NSEC_PER_SEC,
+					base_ns - nsi * NSEC_PER_SEC);
+		nsi_frac = div64_s64(FRAC_PER_NS * NSEC_PER_SEC, divisor);
+	}
+
+	*ns = (u32)nsi;
+	*fns = (u32)nsi_frac;
+}
+
+static void
+hw_atl_b0_mac_adj_param_calc(struct hw_fw_request_ptp_adj_freq *ptp_adj_freq,
+			     u64 phyfreq, u64 macfreq)
+{
+	s64 adj_fns_val;
+	s64 fns_in_sec_phy = phyfreq * (ptp_adj_freq->fns_phy +
+					FRAC_PER_NS * ptp_adj_freq->ns_phy);
+	s64 fns_in_sec_mac = macfreq * (ptp_adj_freq->fns_mac +
+					FRAC_PER_NS * ptp_adj_freq->ns_mac);
+	s64 fault_in_sec_phy = FRAC_PER_NS * NSEC_PER_SEC - fns_in_sec_phy;
+	s64 fault_in_sec_mac = FRAC_PER_NS * NSEC_PER_SEC - fns_in_sec_mac;
+	/* MAC MCP counter freq is macfreq / 4 */
+	s64 diff_in_mcp_overflow = (fault_in_sec_mac - fault_in_sec_phy) *
+				   4 * FRAC_PER_NS;
+
+	diff_in_mcp_overflow = div64_s64(diff_in_mcp_overflow,
+					 AQ_HW_MAC_COUNTER_HZ);
+	adj_fns_val = (ptp_adj_freq->fns_mac + FRAC_PER_NS *
+		       ptp_adj_freq->ns_mac) + diff_in_mcp_overflow;
+
+	ptp_adj_freq->mac_ns_adj = div64_s64(adj_fns_val, FRAC_PER_NS);
+	ptp_adj_freq->mac_fns_adj = adj_fns_val - ptp_adj_freq->mac_ns_adj *
+				    FRAC_PER_NS;
+}
+
+static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta)
+{
+	self->ptp_clk_offset += delta;
+
+	return 0;
+}
+
+static int hw_atl_b0_set_sys_clock(struct aq_hw_s *self, u64 time, u64 ts)
+{
+	s64 delta = time - (self->ptp_clk_offset + ts);
+
+	return hw_atl_b0_adj_sys_clock(self, delta);
+}
+
+static int hw_atl_b0_ts_to_sys_clock(struct aq_hw_s *self, u64 ts, u64 *time)
+{
+	*time = self->ptp_clk_offset + ts;
+	return 0;
+}
+
+static int hw_atl_b0_adj_clock_freq(struct aq_hw_s *self, s32 ppb)
+{
+	struct hw_fw_request_iface fwreq;
+	size_t size;
+
+	memset(&fwreq, 0, sizeof(fwreq));
+
+	fwreq.msg_id = HW_AQ_FW_REQUEST_PTP_ADJ_FREQ;
+	hw_atl_b0_adj_params_get(AQ_HW_MAC_COUNTER_HZ, ppb,
+				 &fwreq.ptp_adj_freq.ns_mac,
+				 &fwreq.ptp_adj_freq.fns_mac);
+	hw_atl_b0_adj_params_get(AQ_HW_PHY_COUNTER_HZ, ppb,
+				 &fwreq.ptp_adj_freq.ns_phy,
+				 &fwreq.ptp_adj_freq.fns_phy);
+	hw_atl_b0_mac_adj_param_calc(&fwreq.ptp_adj_freq,
+				     AQ_HW_PHY_COUNTER_HZ,
+				     AQ_HW_MAC_COUNTER_HZ);
+
+	size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_adj_freq);
+	return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
+}
+
+static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index,
+				u64 start, u32 period)
+{
+	struct hw_fw_request_iface fwreq;
+	size_t size;
+
+	memset(&fwreq, 0, sizeof(fwreq));
+
+	fwreq.msg_id = HW_AQ_FW_REQUEST_PTP_GPIO_CTRL;
+	fwreq.ptp_gpio_ctrl.index = index;
+	fwreq.ptp_gpio_ctrl.period = period;
+	/* Apply time offset */
+	fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset;
+
+	size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl);
+	return self->aq_fw_ops->send_fw_request(self, &fwreq, size);
+}
+
+static int hw_atl_b0_extts_gpio_enable(struct aq_hw_s *self, u32 index,
+				       u32 enable)
+{
+	/* Enable/disable Sync1588 GPIO Timestamping */
+	aq_phy_write_reg(self, MDIO_MMD_PCS, 0xc611, enable ? 0x71 : 0);
+
+	return 0;
+}
+
+static int hw_atl_b0_get_sync_ts(struct aq_hw_s *self, u64 *ts)
+{
+	u64 sec_l;
+	u64 sec_h;
+	u64 nsec_l;
+	u64 nsec_h;
+
+	if (!ts)
+		return -1;
+
+	/* PTP external GPIO clock seconds count 15:0 */
+	sec_l = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc914);
+	/* PTP external GPIO clock seconds count 31:16 */
+	sec_h = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc915);
+	/* PTP external GPIO clock nanoseconds count 15:0 */
+	nsec_l = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc916);
+	/* PTP external GPIO clock nanoseconds count 31:16 */
+	nsec_h = aq_phy_read_reg(self, MDIO_MMD_PCS, 0xc917);
+
+	*ts = (nsec_h << 16) + nsec_l + ((sec_h << 16) + sec_l) * NSEC_PER_SEC;
+
+	return 0;
+}
+
+static u16 hw_atl_b0_rx_extract_ts(struct aq_hw_s *self, u8 *p,
+				   unsigned int len, u64 *timestamp)
+{
+	unsigned int offset = 14;
+	struct ethhdr *eth;
+	__be64 sec;
+	__be32 ns;
+	u8 *ptr;
+
+	if (len <= offset || !timestamp)
+		return 0;
+
+	/* The TIMESTAMP in the end of package has following format:
+	 * (big-endian)
+	 *   struct {
+	 *     uint64_t sec;
+	 *     uint32_t ns;
+	 *     uint16_t stream_id;
+	 *   };
+	 */
+	ptr = p + (len - offset);
+	memcpy(&sec, ptr, sizeof(sec));
+	ptr += sizeof(sec);
+	memcpy(&ns, ptr, sizeof(ns));
+
+	*timestamp = (be64_to_cpu(sec) & 0xffffffffffffllu) * NSEC_PER_SEC +
+		     be32_to_cpu(ns) + self->ptp_clk_offset;
+
+	eth = (struct ethhdr *)p;
+
+	return (eth->h_proto == htons(ETH_P_1588)) ? 12 : 14;
+}
+
+static int hw_atl_b0_extract_hwts(struct aq_hw_s *self, u8 *p, unsigned int len,
+				  u64 *timestamp)
+{
+	struct hw_atl_rxd_hwts_wb_s *hwts_wb = (struct hw_atl_rxd_hwts_wb_s *)p;
+	u64 tmp, sec, ns;
+
+	sec = 0;
+	tmp = (hwts_wb->sec_lw0 >> 2) & 0x3ff;
+	sec += tmp;
+	tmp = (u64)((hwts_wb->sec_lw1 >> 16) & 0xffff) << 10;
+	sec += tmp;
+	tmp = (u64)(hwts_wb->sec_hw & 0xfff) << 26;
+	sec += tmp;
+	tmp = (u64)((hwts_wb->sec_hw >> 22) & 0x3ff) << 38;
+	sec += tmp;
+	ns = sec * NSEC_PER_SEC + hwts_wb->ns;
+	if (timestamp)
+		*timestamp = ns + self->ptp_clk_offset;
+	return 0;
+}
+
 static int hw_atl_b0_hw_fl3l4_clear(struct aq_hw_s *self,
 				    struct aq_rx_filter_l3l4 *data)
 {
@@ -1038,7 +1337,8 @@ static int hw_atl_b0_hw_fl3l4_set(struct aq_hw_s *self,
 
 	hw_atl_b0_hw_fl3l4_clear(self, data);
 
-	if (data->cmd) {
+	if (data->cmd & (HW_ATL_RX_ENABLE_CMP_DEST_ADDR_L3 |
+			 HW_ATL_RX_ENABLE_CMP_SRC_ADDR_L3)) {
 		if (!data->is_ipv6) {
 			hw_atl_rpfl3l4_ipv4_dest_addr_set(self,
 							  location,
@@ -1055,8 +1355,13 @@ static int hw_atl_b0_hw_fl3l4_set(struct aq_hw_s *self,
 							 data->ip_src);
 		}
 	}
-	hw_atl_rpf_l4_dpd_set(self, data->p_dst, location);
-	hw_atl_rpf_l4_spd_set(self, data->p_src, location);
+
+	if (data->cmd & (HW_ATL_RX_ENABLE_CMP_DEST_PORT_L4 |
+			 HW_ATL_RX_ENABLE_CMP_SRC_PORT_L4)) {
+		hw_atl_rpf_l4_dpd_set(self, data->p_dst, location);
+		hw_atl_rpf_l4_spd_set(self, data->p_src, location);
+	}
+
 	hw_atl_rpfl3l4_cmd_set(self, location, data->cmd);
 
 	return aq_hw_err_from_flags(self);
@@ -1141,6 +1446,31 @@ static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
 	return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl_b0_set_loopback(struct aq_hw_s *self, u32 mode, bool enable)
+{
+	switch (mode) {
+	case AQ_HW_LOOPBACK_DMA_SYS:
+		hw_atl_tpb_tx_dma_sys_lbk_en_set(self, enable);
+		hw_atl_rpb_dma_sys_lbk_set(self, enable);
+		break;
+	case AQ_HW_LOOPBACK_PKT_SYS:
+		hw_atl_tpo_tx_pkt_sys_lbk_en_set(self, enable);
+		hw_atl_rpf_tpo_to_rpf_sys_lbk_set(self, enable);
+		break;
+	case AQ_HW_LOOPBACK_DMA_NET:
+		hw_atl_rpf_vlan_prom_mode_en_set(self, enable);
+		hw_atl_rpfl2promiscuous_mode_en_set(self, enable);
+		hw_atl_tpb_tx_tx_clk_gate_en_set(self, !enable);
+		hw_atl_tpb_tx_dma_net_lbk_en_set(self, enable);
+		hw_atl_rpb_dma_net_lbk_set(self, enable);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
 	.hw_init              = hw_atl_b0_hw_init,
@@ -1177,6 +1507,27 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_get_regs                 = hw_atl_utils_hw_get_regs,
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
-	.hw_set_offload              = hw_atl_b0_hw_offload_set,
-	.hw_set_fc                   = hw_atl_b0_set_fc,
+
+	.hw_tx_tc_mode_get       = hw_atl_b0_tx_tc_mode_get,
+	.hw_rx_tc_mode_get       = hw_atl_b0_rx_tc_mode_get,
+
+	.hw_ring_hwts_rx_fill        = hw_atl_b0_hw_ring_hwts_rx_fill,
+	.hw_ring_hwts_rx_receive     = hw_atl_b0_hw_ring_hwts_rx_receive,
+
+	.hw_get_ptp_ts           = hw_atl_b0_get_ptp_ts,
+	.hw_adj_sys_clock        = hw_atl_b0_adj_sys_clock,
+	.hw_set_sys_clock        = hw_atl_b0_set_sys_clock,
+	.hw_ts_to_sys_clock      = hw_atl_b0_ts_to_sys_clock,
+	.hw_adj_clock_freq       = hw_atl_b0_adj_clock_freq,
+	.hw_gpio_pulse           = hw_atl_b0_gpio_pulse,
+	.hw_extts_gpio_enable    = hw_atl_b0_extts_gpio_enable,
+	.hw_get_sync_ts          = hw_atl_b0_get_sync_ts,
+	.rx_extract_ts           = hw_atl_b0_rx_extract_ts,
+	.extract_hwts            = hw_atl_b0_extract_hwts,
+	.hw_set_offload          = hw_atl_b0_hw_offload_set,
+	.hw_get_hw_stats         = hw_atl_utils_get_hw_stats,
+	.hw_get_fw_version       = hw_atl_utils_get_fw_version,
+	.hw_set_offload          = hw_atl_b0_hw_offload_set,
+	.hw_set_loopback         = hw_atl_b0_set_loopback,
+	.hw_set_fc               = hw_atl_b0_set_fc,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 808d8cd4252a..7ab23a1751d3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_b0_internal.h: Definition of Atlantic B0 chip specific
@@ -64,8 +64,11 @@
 #define HW_ATL_B0_MPI_SPEED_MSK         0xFFFFU
 #define HW_ATL_B0_MPI_SPEED_SHIFT       16U
 
-#define HW_ATL_B0_TXBUF_MAX  160U
-#define HW_ATL_B0_RXBUF_MAX  320U
+#define HW_ATL_B0_TXBUF_MAX              160U
+#define HW_ATL_B0_PTP_TXBUF_SIZE           8U
+
+#define HW_ATL_B0_RXBUF_MAX              320U
+#define HW_ATL_B0_PTP_RXBUF_SIZE          16U
 
 #define HW_ATL_B0_RSS_REDIRECTION_MAX 64U
 #define HW_ATL_B0_RSS_REDIRECTION_BITS 3U
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 6f340695e6bd..d1f68fc16291 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh.c: Definitions of bitfield and register access functions for
@@ -563,6 +563,13 @@ void hw_atl_rpb_dma_sys_lbk_set(struct aq_hw_s *aq_hw, u32 dma_sys_lbk)
 			    HW_ATL_RPB_DMA_SYS_LBK_SHIFT, dma_sys_lbk);
 }
 
+void hw_atl_rpb_dma_net_lbk_set(struct aq_hw_s *aq_hw, u32 dma_net_lbk)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_DMA_NET_LBK_ADR,
+			    HW_ATL_RPB_DMA_NET_LBK_MSK,
+			    HW_ATL_RPB_DMA_NET_LBK_SHIFT, dma_net_lbk);
+}
+
 void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
 					   u32 rx_traf_class_mode)
 {
@@ -572,6 +579,13 @@ void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
 			    rx_traf_class_mode);
 }
 
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPB_RPF_RX_TC_MODE_ADR,
+			HW_ATL_RPB_RPF_RX_TC_MODE_MSK,
+			HW_ATL_RPB_RPF_RX_TC_MODE_SHIFT);
+}
+
 void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RX_BUF_EN_ADR,
@@ -636,8 +650,8 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
 			    rx_pkt_buff_size_per_tc);
 }
 
-void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
-				      u32 buffer)
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+				      u32 rx_xoff_en_per_tc, u32 buffer)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBXOFF_EN_ADR(buffer),
 			    HW_ATL_RPB_RXBXOFF_EN_MSK,
@@ -1290,6 +1304,13 @@ void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en)
 			    HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
 }
 
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+			HW_ATL_TPB_TX_TC_MODE_MSK,
+			HW_ATL_TPB_TX_TC_MODE_SHIFT);
+}
+
 void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
 				   u32 tx_traf_class_mode)
 {
@@ -1327,7 +1348,26 @@ void hw_atl_tpb_tx_dma_sys_lbk_en_set(struct aq_hw_s *aq_hw, u32 tx_dma_sys_lbk_
 			    tx_dma_sys_lbk_en);
 }
 
+void hw_atl_tpb_tx_dma_net_lbk_en_set(struct aq_hw_s *aq_hw,
+				      u32 tx_dma_net_lbk_en)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_DMA_NET_LBK_ADR,
+			    HW_ATL_TPB_DMA_NET_LBK_MSK,
+			    HW_ATL_TPB_DMA_NET_LBK_SHIFT,
+			    tx_dma_net_lbk_en);
+}
+
+void hw_atl_tpb_tx_tx_clk_gate_en_set(struct aq_hw_s *aq_hw,
+				      u32 tx_clk_gate_en)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_CLK_GATE_EN_ADR,
+			    HW_ATL_TPB_TX_CLK_GATE_EN_MSK,
+			    HW_ATL_TPB_TX_CLK_GATE_EN_SHIFT,
+			    tx_clk_gate_en);
+}
+
 void hw_atl_tpb_tx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
+
 					    u32 tx_pkt_buff_size_per_tc, u32 buffer)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TXBBUF_SIZE_ADR(buffer),
@@ -1526,6 +1566,20 @@ void hw_atl_reg_glb_cpu_scratch_scp_set(struct aq_hw_s *aq_hw,
 			glb_cpu_scratch_scp);
 }
 
+void hw_atl_pcs_ptp_clock_read_enable(struct aq_hw_s *aq_hw,
+				      u32 ptp_clock_read_enable)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_ADR,
+			    HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSK,
+			    HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_SHIFT,
+			    ptp_clock_read_enable);
+}
+
+u32 hw_atl_pcs_ptp_clock_get(struct aq_hw_s *aq_hw, u32 index)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_PCS_PTP_TS_VAL_ADDR(index));
+}
+
 void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR,
@@ -1616,6 +1670,11 @@ u32 hw_atl_sem_ram_get(struct aq_hw_s *self)
 	return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
 }
 
+u32 hw_atl_sem_mdio_get(struct aq_hw_s *self)
+{
+	return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_MDIO);
+}
+
 u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp)
 {
 	return aq_hw_read_reg(aq_hw,
@@ -1631,3 +1690,60 @@ u32 hw_atl_scrpad25_get(struct aq_hw_s *self)
 {
 	return hw_atl_scrpad_get(self, 0x18);
 }
+
+void hw_atl_glb_mdio_iface1_set(struct aq_hw_s *aq_hw, u32 value)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(1), value);
+}
+
+u32 hw_atl_glb_mdio_iface1_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(1));
+}
+
+void hw_atl_glb_mdio_iface2_set(struct aq_hw_s *aq_hw, u32 value)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(2), value);
+}
+
+u32 hw_atl_glb_mdio_iface2_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(2));
+}
+
+void hw_atl_glb_mdio_iface3_set(struct aq_hw_s *aq_hw, u32 value)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(3), value);
+}
+
+u32 hw_atl_glb_mdio_iface3_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(3));
+}
+
+void hw_atl_glb_mdio_iface4_set(struct aq_hw_s *aq_hw, u32 value)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(4), value);
+}
+
+u32 hw_atl_glb_mdio_iface4_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(4));
+}
+
+void hw_atl_glb_mdio_iface5_set(struct aq_hw_s *aq_hw, u32 value)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(5), value);
+}
+
+u32 hw_atl_glb_mdio_iface5_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MDIO_IFACE_N_ADR(5));
+}
+
+u32 hw_atl_mdio_busy_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL_MDIO_BUSY_ADR,
+				  HW_ATL_MDIO_BUSY_MSK,
+				  HW_ATL_MDIO_BUSY_SHIFT);
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index c3ee278c3747..62992b23c0e8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh.h: Declarations of bitfield and register access functions for
@@ -288,10 +288,16 @@ void hw_atl_reg_glb_cpu_scratch_scp_set(struct aq_hw_s *aq_hw,
 /* set dma system loopback */
 void hw_atl_rpb_dma_sys_lbk_set(struct aq_hw_s *aq_hw, u32 dma_sys_lbk);
 
+/* set dma network loopback */
+void hw_atl_rpb_dma_net_lbk_set(struct aq_hw_s *aq_hw, u32 dma_net_lbk);
+
 /* set rx traffic class mode */
 void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
 					   u32 rx_traf_class_mode);
 
+/* get rx traffic class mode */
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw);
+
 /* set rx buffer enable */
 void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en);
 
@@ -306,7 +312,8 @@ void hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
 					 u32 buffer);
 
 /* set rx flow control mode */
-void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode);
+void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw,
+				     u32 rx_flow_ctl_mode);
 
 /* set rx packet buffer size (per tc) */
 void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
@@ -320,7 +327,8 @@ void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
 u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
 
 /* set rx xoff enable (per tc) */
-void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+				      u32 rx_xoff_en_per_tc,
 				      u32 buffer);
 
 /* rpf */
@@ -605,6 +613,9 @@ void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
 void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
 				   u32 tx_traf_class_mode);
 
+/* get TX Traffic Class Mode */
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw);
+
 /* set tx buffer enable */
 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
 
@@ -621,9 +632,18 @@ void hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
 /* set tx dma system loopback enable */
 void hw_atl_tpb_tx_dma_sys_lbk_en_set(struct aq_hw_s *aq_hw, u32 tx_dma_sys_lbk_en);
 
+/* set tx dma network loopback enable */
+void hw_atl_tpb_tx_dma_net_lbk_en_set(struct aq_hw_s *aq_hw,
+				      u32 tx_dma_net_lbk_en);
+
+/* set tx clock gating enable */
+void hw_atl_tpb_tx_tx_clk_gate_en_set(struct aq_hw_s *aq_hw,
+				      u32 tx_clk_gate_en);
+
 /* set tx packet buffer size (per tc) */
 void hw_atl_tpb_tx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
-					    u32 tx_pkt_buff_size_per_tc, u32 buffer);
+					    u32 tx_pkt_buff_size_per_tc,
+					    u32 buffer);
 
 /* set tx path pad insert enable */
 void hw_atl_tpb_tx_path_scp_ins_en_set(struct aq_hw_s *aq_hw, u32 tx_path_scp_ins_en);
@@ -715,6 +735,12 @@ void hw_atl_msm_reg_wr_strobe_set(struct aq_hw_s *aq_hw, u32 reg_wr_strobe);
 /* set pci register reset disable */
 void hw_atl_pci_pci_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 pci_reg_res_dis);
 
+/* pcs */
+void hw_atl_pcs_ptp_clock_read_enable(struct aq_hw_s *aq_hw,
+				      u32 ptp_clock_read_enable);
+
+u32 hw_atl_pcs_ptp_clock_get(struct aq_hw_s *aq_hw, u32 index);
+
 /* set uP Force Interrupt */
 void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr);
 
@@ -752,9 +778,44 @@ void hw_atl_rpfl3l4_ipv6_src_addr_set(struct aq_hw_s *aq_hw, u8 location,
 void hw_atl_rpfl3l4_ipv6_dest_addr_set(struct aq_hw_s *aq_hw, u8 location,
 				       u32 *ipv6_dest);
 
+/* set Global MDIO Interface 1 */
+void hw_atl_glb_mdio_iface1_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 1 */
+u32 hw_atl_glb_mdio_iface1_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 2 */
+void hw_atl_glb_mdio_iface2_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 2 */
+u32 hw_atl_glb_mdio_iface2_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 3 */
+void hw_atl_glb_mdio_iface3_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 3 */
+u32 hw_atl_glb_mdio_iface3_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 4 */
+void hw_atl_glb_mdio_iface4_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 4 */
+u32 hw_atl_glb_mdio_iface4_get(struct aq_hw_s *hw);
+
+/* set Global MDIO Interface 5 */
+void hw_atl_glb_mdio_iface5_set(struct aq_hw_s *hw, u32 value);
+
+/* get Global MDIO Interface 5 */
+u32 hw_atl_glb_mdio_iface5_get(struct aq_hw_s *hw);
+
+u32 hw_atl_mdio_busy_get(struct aq_hw_s *aq_hw);
+
 /* get global microprocessor ram semaphore */
 u32 hw_atl_sem_ram_get(struct aq_hw_s *self);
 
+/* get global microprocessor mdio semaphore */
+u32 hw_atl_sem_mdio_get(struct aq_hw_s *self);
+
 /* get global microprocessor scratch pad register */
 u32 hw_atl_scrpad_get(struct aq_hw_s *aq_hw, u32 scratch_scp);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 35887ad89025..18de2f7b8959 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_llh_internal.h: Preprocessor definitions
@@ -554,6 +554,24 @@
 /* default value of bitfield dma_sys_loopback */
 #define HW_ATL_RPB_DMA_SYS_LBK_DEFAULT 0x0
 
+/* rx dma_net_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "dma_net_loopback".
+ * port="pif_rpb_dma_net_lbk_i"
+ */
+
+/* register address for bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_ADR 0x00005000
+/* bitmask for bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_MSK 0x00000010
+/* inverted bitmask for bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_MSKN 0xffffffef
+/* lower bit position of bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_SHIFT 4
+/* width of bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_WIDTH 1
+/* default value of bitfield dma_net_loopback */
+#define HW_ATL_RPB_DMA_NET_LBK_DEFAULT 0x0
+
 /* rx rx_tc_mode bitfield definitions
  * preprocessor definitions for the bitfield "rx_tc_mode".
  * port="pif_rpb_rx_tc_mode_i,pif_rpf_rx_tc_mode_i"
@@ -1308,6 +1326,52 @@
 /* default value of bitfield et_val{f}[f:0] */
 #define HW_ATL_RPF_ET_VALF_DEFAULT 0x0
 
+/* RX l3_l4_en{F} Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_l4_en{F}".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_l4_en_i[0]"
+ */
+
+#define HW_ATL_RPF_L3_REG_CTRL_ADR(filter) (0x00005380 + (filter) * 0x4)
+
+/* RX rpf_l3_sa{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_sa{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_sa0_i[31:0]"
+ */
+
+/* Register address for bitfield pif_rpf_l3_sa0_i[31:0] */
+#define HW_ATL_RPF_L3_SRCA_ADR(filter) (0x000053B0 + (filter) * 0x4)
+/* Bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_SHIFT 0
+/* Width of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_WIDTH 32
+/* Default value of bitfield l3_sa0[1F:0] */
+#define HW_ATL_RPF_L3_SRCA_DEFAULT 0x0
+
+/* RX rpf_l3_da{D}[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "l3_da{D}[1F:0]".
+ * Parameter: location {D} | stride size 0x4 | range [0, 7]
+ * PORT="pif_rpf_l3_da0_i[31:0]"
+ */
+
+ /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */
+#define HW_ATL_RPF_L3_DSTA_ADR(filter) (0x000053B0 + (filter) * 0x4)
+/* Bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_MSKN 0xFFFFFFFFu
+/* Lower bit position of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_SHIFT 0
+/* Width of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_WIDTH 32
+/* Default value of bitfield l3_da0[1F:0] */
+#define HW_ATL_RPF_L3_DSTA_DEFAULT 0x0
+
 /* RX l4_sp{D}[F:0] Bitfield Definitions
  * Preprocessor definitions for the bitfield "l4_sp{D}[F:0]".
  * Parameter: srcport {D} | stride size 0x4 | range [0, 7]
@@ -2061,6 +2125,24 @@
 /* default value of bitfield dma_sys_loopback */
 #define HW_ATL_TPB_DMA_SYS_LBK_DEFAULT 0x0
 
+/* tx dma_net_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "dma_net_loopback".
+ * port="pif_tpb_dma_net_lbk_i"
+ */
+
+/* register address for bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_ADR 0x00007000
+/* bitmask for bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_MSK 0x00000010
+/* inverted bitmask for bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_MSKN 0xffffffef
+/* lower bit position of bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_SHIFT 4
+/* width of bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_WIDTH 1
+/* default value of bitfield dma_net_loopback */
+#define HW_ATL_TPB_DMA_NET_LBK_DEFAULT 0x0
+
 /* tx tx{b}_buf_size[7:0] bitfield definitions
  * preprocessor definitions for the bitfield "tx{b}_buf_size[7:0]".
  * parameter: buffer {b} | stride size 0x10 | range [0, 7]
@@ -2098,6 +2180,24 @@
 /* default value of bitfield tx_scp_ins_en */
 #define HW_ATL_TPB_TX_SCP_INS_EN_DEFAULT 0x0
 
+/* tx tx_clk_gate_en bitfield definitions
+ * preprocessor definitions for the bitfield "tx_clk_gate_en".
+ * port="pif_tpb_clk_gate_en_i"
+ */
+
+/* register address for bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_ADR 0x00007900
+/* bitmask for bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_MSK 0x00000010
+/* inverted bitmask for bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_MSKN 0xffffffef
+/* lower bit position of bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_SHIFT 4
+/* width of bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_WIDTH 1
+/* default value of bitfield tx_clk_gate_en */
+#define HW_ATL_TPB_TX_CLK_GATE_EN_DEFAULT 0x1
+
 /* tx ipv4_chk_en bitfield definitions
  * preprocessor definitions for the bitfield "ipv4_chk_en".
  * port="pif_tpo_ipv4_chk_en_i"
@@ -2440,6 +2540,22 @@
 /* default value of bitfield register write strobe */
 #define HW_ATL_MSM_REG_WR_STROBE_DEFAULT 0x0
 
+/* register address for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_ADR 0x00004628
+/* bitmask for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSK 0x00000010
+/* inverted bitmask for bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_MSKN 0xFFFFFFEF
+/* lower bit position of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_SHIFT 4
+/* width of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_WIDTH 1
+/* default value of bitfield PTP Digital Clock Read Enable */
+#define HW_ATL_PCS_PTP_CLOCK_READ_ENABLE_DEFAULT 0x0
+
+/* register address for ptp counter reading */
+#define HW_ATL_PCS_PTP_TS_VAL_ADDR(index) (0x00004900 + (index) * 0x4)
+
 /* mif soft reset bitfield definitions
  * preprocessor definitions for the bitfield "soft reset".
  * port="pif_glb_res_i"
@@ -2532,50 +2648,121 @@
 /* default value of bitfield uP Force Interrupt */
 #define HW_ATL_MCP_UP_FORCE_INTERRUPT_DEFAULT 0x0
 
-#define HW_ATL_RX_CTRL_ADDR_BEGIN_FL3L4   0x00005380
-#define HW_ATL_RX_SRCA_ADDR_BEGIN_FL3L4   0x000053B0
-#define HW_ATL_RX_DESTA_ADDR_BEGIN_FL3L4  0x000053D0
-
-#define HW_ATL_RPF_L3_REG_CTRL_ADR(location) (0x00005380 + (location) * 0x4)
-
-/* RX rpf_l3_sa{D}[1F:0] Bitfield Definitions
- * Preprocessor definitions for the bitfield "l3_sa{D}[1F:0]".
- * Parameter: location {D} | stride size 0x4 | range [0, 7]
- * PORT="pif_rpf_l3_sa0_i[31:0]"
- */
-
-/* Register address for bitfield pif_rpf_l3_sa0_i[31:0] */
-#define HW_ATL_RPF_L3_SRCA_ADR(location) (0x000053B0 + (location) * 0x4)
-/* Bitmask for bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_MSK 0xFFFFFFFFu
-/* Inverted bitmask for bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_MSKN 0xFFFFFFFFu
-/* Lower bit position of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_SHIFT 0
-/* Width of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_WIDTH 32
-/* Default value of bitfield l3_sa0[1F:0] */
-#define HW_ATL_RPF_L3_SRCA_DEFAULT 0x0
-
-/* RX rpf_l3_da{D}[1F:0] Bitfield Definitions
- * Preprocessor definitions for the bitfield "l3_da{D}[1F:0]".
- * Parameter: location {D} | stride size 0x4 | range [0, 7]
- * PORT="pif_rpf_l3_da0_i[31:0]"
- */
-
- /* Register address for bitfield pif_rpf_l3_da0_i[31:0] */
-#define HW_ATL_RPF_L3_DSTA_ADR(location) (0x000053B0 + (location) * 0x4)
-/* Bitmask for bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_MSK 0xFFFFFFFFu
-/* Inverted bitmask for bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_MSKN 0xFFFFFFFFu
-/* Lower bit position of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_SHIFT 0
-/* Width of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_WIDTH 32
-/* Default value of bitfield l3_da0[1F:0] */
-#define HW_ATL_RPF_L3_DSTA_DEFAULT 0x0
-
+/* Preprocessor definitions for Global MDIO Interfaces
+ * Address: 0x00000280 + 0x4 * Number of interface
+ */
+#define HW_ATL_GLB_MDIO_IFACE_ADDR_BEGIN   0x00000280u
+
+#define HW_ATL_GLB_MDIO_IFACE_N_ADR(number) \
+	(HW_ATL_GLB_MDIO_IFACE_ADDR_BEGIN + (((number) - 1) * 0x4))
+
+/* MIF MDIO Busy Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Busy".
+ * PORT="mdio_pif_busy_o"
+ */
+
+/* Register address for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_ADR 0x00000284
+/* Bitmask for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_MSK 0x80000000
+/* Inverted bitmask for bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_SHIFT 31
+/* Width of bitfield MDIO Busy */
+#define HW_ATL_MDIO_BUSY_WIDTH 1
+
+/* MIF MDIO Execute Operation Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Execute Operation".
+ * PORT="pif_mdio_op_start_i"
+ */
+
+/* Register address for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_ADR 0x00000284
+/* Bitmask for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_MSK 0x00008000
+/* Inverted bitmask for bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_MSKN 0xFFFF7FFF
+/* Lower bit position of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_SHIFT 15
+/* Width of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_WIDTH 1
+/* Default value of bitfield MDIO Execute Operation */
+#define HW_ATL_MDIO_EXECUTE_OPERATION_DEFAULT 0x0
+
+/* MIF Op Mode [1:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "Op Mode [1:0]".
+ * PORT="pif_mdio_mode_i[1:0]"
+ */
+
+/* Register address for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_ADR 0x00000284
+/* Bitmask for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_MSK 0x00003000
+/* Inverted bitmask for bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_MSKN 0xFFFFCFFF
+/* Lower bit position of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_SHIFT 12
+/* Width of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_WIDTH 2
+/* Default value of bitfield Op Mode [1:0] */
+#define HW_ATL_MDIO_OP_MODE_DEFAULT 0x0
+
+/* MIF PHY address Bitfield Definitions
+ * Preprocessor definitions for the bitfield "PHY address".
+ * PORT="pif_mdio_phy_addr_i[9:0]"
+ */
+
+/* Register address for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_ADR 0x00000284
+/* Bitmask for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_MSK 0x000003FF
+/* Inverted bitmask for bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_MSKN 0xFFFFFC00
+/* Lower bit position of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_SHIFT 0
+/* Width of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_WIDTH 10
+/* Default value of bitfield PHY address */
+#define HW_ATL_MDIO_PHY_ADDRESS_DEFAULT 0x0
+
+/* MIF MDIO WriteData [F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO WriteData [F:0]".
+ * PORT="pif_mdio_wdata_i[15:0]"
+ */
+
+/* Register address for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_ADR 0x00000288
+/* Bitmask for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_MSK 0x0000FFFF
+/* Inverted bitmask for bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_MSKN 0xFFFF0000
+/* Lower bit position of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_SHIFT 0
+/* Width of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_WIDTH 16
+/* Default value of bitfield MDIO WriteData [F:0] */
+#define HW_ATL_MDIO_WRITE_DATA_DEFAULT 0x0
+
+/* MIF MDIO Address [F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "MDIO Address [F:0]".
+ * PORT="pif_mdio_addr_i[15:0]"
+ */
+
+/* Register address for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_ADR 0x0000028C
+/* Bitmask for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_MSK 0x0000FFFF
+/* Inverted bitmask for bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_MSKN 0xFFFF0000
+/* Lower bit position of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_SHIFT 0
+/* Width of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_WIDTH 16
+/* Default value of bitfield MDIO Address [F:0] */
+#define HW_ATL_MDIO_ADDRESS_DEFAULT 0x0
+
+#define HW_ATL_FW_SM_MDIO       0x0U
 #define HW_ATL_FW_SM_RAM        0x2U
 
 #endif /* HW_ATL_LLH_INTERNAL_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 52646855495e..8910b62e67ed 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
@@ -47,6 +47,11 @@
 
 #define FORCE_FLASHLESS 0
 
+enum mcp_area {
+	MCP_AREA_CONFIG = 0x80000000,
+	MCP_AREA_SETTINGS = 0x20000000,
+};
+
 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
 
 static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
@@ -87,6 +92,7 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 	}
 	self->aq_fw_ops = *fw_ops;
 	err = self->aq_fw_ops->init(self);
+
 	return err;
 }
 
@@ -237,9 +243,9 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
 
 int hw_atl_utils_soft_reset(struct aq_hw_s *self)
 {
-	int k;
 	u32 boot_exit_code = 0;
 	u32 val;
+	int k;
 
 	for (k = 0; k < 1000; ++k) {
 		u32 flb_status = aq_hw_read_reg(self,
@@ -327,11 +333,75 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
-					 u32 cnt)
+static int hw_atl_utils_write_b1_mbox(struct aq_hw_s *self, u32 addr,
+				      u32 *p, u32 cnt, enum mcp_area area)
 {
+	u32 data_offset = 0;
+	u32 offset = addr;
+	int err = 0;
 	u32 val;
+
+	switch (area) {
+	case MCP_AREA_CONFIG:
+		offset -= self->rpc_addr;
+		break;
+
+	case MCP_AREA_SETTINGS:
+		offset -= self->settings_addr;
+		break;
+	}
+
+	offset = offset / sizeof(u32);
+
+	for (; data_offset < cnt; ++data_offset, ++offset) {
+		aq_hw_write_reg(self, 0x328, p[data_offset]);
+		aq_hw_write_reg(self, 0x32C,
+				(area | (0xFFFF & (offset * 4))));
+		hw_atl_mcp_up_force_intr_set(self, 1);
+		/* 1000 times by 10us = 10ms */
+		err = readx_poll_timeout_atomic(hw_atl_scrpad12_get,
+						self, val,
+						(val & 0xF0000000) !=
+						area,
+						10U, 10000U);
+
+		if (err < 0)
+			break;
+	}
+
+	return err;
+}
+
+static int hw_atl_utils_write_b0_mbox(struct aq_hw_s *self, u32 addr,
+				      u32 *p, u32 cnt)
+{
+	u32 offset = 0;
 	int err = 0;
+	u32 val;
+
+	aq_hw_write_reg(self, 0x208, addr);
+
+	for (; offset < cnt; ++offset) {
+		aq_hw_write_reg(self, 0x20C, p[offset]);
+		aq_hw_write_reg(self, 0x200, 0xC000);
+
+		err = readx_poll_timeout_atomic(hw_atl_utils_mif_cmd_get,
+						self, val,
+						(val & 0x100) == 0U,
+						10U, 10000U);
+
+		if (err < 0)
+			break;
+	}
+
+	return err;
+}
+
+static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 addr, u32 *p,
+					 u32 cnt, enum mcp_area area)
+{
+	int err = 0;
+	u32 val;
 
 	err = readx_poll_timeout_atomic(hw_atl_sem_ram_get, self,
 					val, val == 1U,
@@ -339,54 +409,47 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
 	if (err < 0)
 		goto err_exit;
 
-	if (IS_CHIP_FEATURE(REVISION_B1)) {
-		u32 offset = 0;
-
-		for (; offset < cnt; ++offset) {
-			aq_hw_write_reg(self, 0x328, p[offset]);
-			aq_hw_write_reg(self, 0x32C,
-					(0x80000000 | (0xFFFF & (offset * 4))));
-			hw_atl_mcp_up_force_intr_set(self, 1);
-			/* 1000 times by 10us = 10ms */
-			err = readx_poll_timeout_atomic(hw_atl_scrpad12_get,
-							self, val,
-							(val & 0xF0000000) !=
-							0x80000000,
-							10U, 10000U);
-		}
-	} else {
-		u32 offset = 0;
-
-		aq_hw_write_reg(self, 0x208, a);
+	if (IS_CHIP_FEATURE(REVISION_B1))
+		err = hw_atl_utils_write_b1_mbox(self, addr, p, cnt, area);
+	else
+		err = hw_atl_utils_write_b0_mbox(self, addr, p, cnt);
 
-		for (; offset < cnt; ++offset) {
-			aq_hw_write_reg(self, 0x20C, p[offset]);
-			aq_hw_write_reg(self, 0x200, 0xC000);
+	hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
 
-			err = readx_poll_timeout_atomic(hw_atl_utils_mif_cmd_get,
-							self, val,
-							(val & 0x100) == 0,
-							1000U, 10000U);
-		}
-	}
+	if (err < 0)
+		goto err_exit;
 
-	hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+	err = aq_hw_err_from_flags(self);
 
 err_exit:
 	return err;
 }
 
+int hw_atl_write_fwcfg_dwords(struct aq_hw_s *self, u32 *p, u32 cnt)
+{
+	return hw_atl_utils_fw_upload_dwords(self, self->rpc_addr, p,
+					     cnt, MCP_AREA_CONFIG);
+}
+
+int hw_atl_write_fwsettings_dwords(struct aq_hw_s *self, u32 offset, u32 *p,
+				   u32 cnt)
+{
+	return hw_atl_utils_fw_upload_dwords(self, self->settings_addr + offset,
+					     p, cnt, MCP_AREA_SETTINGS);
+}
+
 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual)
 {
-	int err = 0;
 	const u32 dw_major_mask = 0xff000000U;
 	const u32 dw_minor_mask = 0x00ffffffU;
+	int err = 0;
 
 	err = (dw_major_mask & (ver_expected ^ ver_actual)) ? -EOPNOTSUPP : 0;
 	if (err < 0)
 		goto err_exit;
 	err = ((dw_minor_mask & ver_expected) > (dw_minor_mask & ver_actual)) ?
 		-EOPNOTSUPP : 0;
+
 err_exit:
 	return err;
 }
@@ -431,17 +494,16 @@ struct aq_hw_atl_utils_fw_rpc_tid_s {
 
 int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size)
 {
-	int err = 0;
 	struct aq_hw_atl_utils_fw_rpc_tid_s sw;
+	int err = 0;
 
 	if (!IS_CHIP_FEATURE(MIPS)) {
 		err = -1;
 		goto err_exit;
 	}
-	err = hw_atl_utils_fw_upload_dwords(self, self->rpc_addr,
-					    (u32 *)(void *)&self->rpc,
-					    (rpc_size + sizeof(u32) -
-					     sizeof(u8)) / sizeof(u32));
+	err = hw_atl_write_fwcfg_dwords(self, (u32 *)(void *)&self->rpc,
+					(rpc_size + sizeof(u32) -
+					 sizeof(u8)) / sizeof(u32));
 	if (err < 0)
 		goto err_exit;
 
@@ -456,9 +518,9 @@ err_exit:
 int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
 			     struct hw_atl_utils_fw_rpc **rpc)
 {
-	int err = 0;
 	struct aq_hw_atl_utils_fw_rpc_tid_s sw;
 	struct aq_hw_atl_utils_fw_rpc_tid_s fw;
+	int err = 0;
 
 	do {
 		sw.val = aq_hw_read_reg(self, HW_ATL_RPC_CONTROL_ADR);
@@ -562,10 +624,10 @@ static int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
 static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
 				      enum hal_atl_utils_fw_state_e state)
 {
-	int err = 0;
-	u32 transaction_id = 0;
-	struct hw_atl_utils_mbox_header mbox;
 	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
+	struct hw_atl_utils_mbox_header mbox;
+	u32 transaction_id = 0;
+	int err = 0;
 
 	if (state == MPI_RESET) {
 		hw_atl_utils_mpi_read_mbox(self, &mbox);
@@ -593,20 +655,26 @@ static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
 	val |= state & HW_ATL_MPI_STATE_MSK;
 
 	aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
+
 err_exit:
 	return err;
 }
 
 int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
 {
-	u32 cp0x036C = hw_atl_utils_mpi_get_state(self);
-	u32 link_speed_mask = cp0x036C >> HW_ATL_MPI_SPEED_SHIFT;
 	struct aq_hw_link_status_s *link_status = &self->aq_link_status;
+	u32 mpi_state;
+	u32 speed;
 
-	if (!link_speed_mask) {
+	mpi_state = hw_atl_utils_mpi_get_state(self);
+	speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
+			     FW2X_RATE_2G5 | FW2X_RATE_5G |
+			     FW2X_RATE_10G);
+
+	if (!speed) {
 		link_status->mbps = 0U;
 	} else {
-		switch (link_speed_mask) {
+		switch (speed) {
 		case HAL_ATLANTIC_RATE_10G:
 			link_status->mbps = 10000U;
 			break;
@@ -639,14 +707,15 @@ int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
 int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self,
 				   u8 *mac)
 {
+	u32 mac_addr[2];
+	u32 efuse_addr;
 	int err = 0;
 	u32 h = 0U;
 	u32 l = 0U;
-	u32 mac_addr[2];
 
 	if (!aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG)) {
-		unsigned int rnd = 0;
 		unsigned int ucp_0x370 = 0;
+		unsigned int rnd = 0;
 
 		get_random_bytes(&rnd, sizeof(unsigned int));
 
@@ -654,11 +723,10 @@ int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self,
 		aq_hw_write_reg(self, HW_ATL_UCP_0X370_REG, ucp_0x370);
 	}
 
-	err = hw_atl_utils_fw_downld_dwords(self,
-					    aq_hw_read_reg(self, 0x00000374U) +
-					    (40U * 4U),
-					    mac_addr,
-					    ARRAY_SIZE(mac_addr));
+	efuse_addr = aq_hw_read_reg(self, 0x00000374U);
+
+	err = hw_atl_utils_fw_downld_dwords(self, efuse_addr + (40U * 4U),
+					    mac_addr, ARRAY_SIZE(mac_addr));
 	if (err < 0) {
 		mac_addr[0] = 0U;
 		mac_addr[1] = 0U;
@@ -720,14 +788,15 @@ unsigned int hw_atl_utils_mbps_2_speed_index(unsigned int mbps)
 	default:
 		break;
 	}
+
 	return ret;
 }
 
 void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
 {
-	u32 chip_features = 0U;
 	u32 val = hw_atl_reg_glb_mif_id_get(self);
 	u32 mif_rev = val & 0xFFU;
+	u32 chip_features = 0U;
 
 	if ((0xFU & mif_rev) == 1U) {
 		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
@@ -754,13 +823,14 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
 {
 	hw_atl_utils_mpi_set_speed(self, 0);
 	hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
+
 	return 0;
 }
 
 int hw_atl_utils_update_stats(struct aq_hw_s *self)
 {
-	struct hw_atl_utils_mbox mbox;
 	struct aq_stats_s *cs = &self->curr_stats;
+	struct hw_atl_utils_mbox mbox;
 
 	hw_atl_utils_mpi_read_stats(self, &mbox);
 
@@ -837,16 +907,19 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
 	for (i = 0; i < aq_hw_caps->mac_regs_count; i++)
 		regs_buff[i] = aq_hw_read_reg(self,
 					      hw_atl_utils_hw_mac_regs[i]);
+
 	return 0;
 }
 
 int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version)
 {
 	*fw_version = aq_hw_read_reg(self, 0x18U);
+
 	return 0;
 }
 
-static int aq_fw1x_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac)
+static int aq_fw1x_set_wake_magic(struct aq_hw_s *self, bool wol_enabled,
+				  u8 *mac)
 {
 	struct hw_atl_utils_fw_rpc *prpc = NULL;
 	unsigned int rpc_size = 0U;
@@ -859,22 +932,26 @@ static int aq_fw1x_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac)
 	memset(prpc, 0, sizeof(*prpc));
 
 	if (wol_enabled) {
-		rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_wol);
+		rpc_size = offsetof(struct hw_atl_utils_fw_rpc, msg_wol_add) +
+			   sizeof(prpc->msg_wol_add);
+
 
 		prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD;
-		prpc->msg_wol.priority =
+		prpc->msg_wol_add.priority =
 				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PRIOR;
-		prpc->msg_wol.pattern_id =
+		prpc->msg_wol_add.pattern_id =
 				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN;
-		prpc->msg_wol.wol_packet_type =
+		prpc->msg_wol_add.packet_type =
 				HAL_ATLANTIC_UTILS_FW_MSG_WOL_MAG_PKT;
 
-		ether_addr_copy((u8 *)&prpc->msg_wol.wol_pattern, mac);
+		ether_addr_copy((u8 *)&prpc->msg_wol_add.magic_packet_pattern,
+				mac);
 	} else {
-		rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_del_id);
+		rpc_size = sizeof(prpc->msg_wol_remove) +
+			   offsetof(struct hw_atl_utils_fw_rpc, msg_wol_remove);
 
 		prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL;
-		prpc->msg_wol.pattern_id =
+		prpc->msg_wol_add.pattern_id =
 				HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN;
 	}
 
@@ -891,8 +968,8 @@ static int aq_fw1x_set_power(struct aq_hw_s *self, unsigned int power_state,
 	unsigned int rpc_size = 0U;
 	int err = 0;
 
-	if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
-		err = aq_fw1x_set_wol(self, 1, mac);
+	if (self->aq_nic_cfg->wol & WAKE_MAGIC) {
+		err = aq_fw1x_set_wake_magic(self, 1, mac);
 
 		if (err < 0)
 			goto err_exit;
@@ -964,4 +1041,7 @@ const struct aq_fw_ops aq_fw_1x_ops = {
 	.set_eee_rate = NULL,
 	.get_eee_rate = NULL,
 	.set_flow_control = NULL,
+	.send_fw_request = NULL,
+	.enable_ptp = NULL,
+	.led_control = NULL,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 692bed70e104..42f0c5c6ec2d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils.h: Declaration of common functions for Atlantic hardware
@@ -41,7 +41,15 @@ struct __packed hw_atl_rxd_wb_s {
 	u16 status;
 	u16 pkt_len;
 	u16 next_desc_ptr;
-	u16 vlan;
+	__le16 vlan;
+};
+
+/* Hardware rx HW TIMESTAMP writeback */
+struct __packed hw_atl_rxd_hwts_wb_s {
+	u32 sec_hw;
+	u32 ns;
+	u32 sec_lw0;
+	u32 sec_lw1;
 };
 
 struct __packed hw_atl_stats_s {
@@ -62,104 +70,41 @@ struct __packed hw_atl_stats_s {
 	u32 dpc;
 };
 
-union __packed ip_addr {
-	struct {
-		u8 addr[16];
-	} v6;
-	struct {
-		u8 padding[12];
-		u8 addr[4];
-	} v4;
-};
-
-struct __packed hw_atl_utils_fw_rpc {
-	u32 msg_id;
-
+struct __packed drv_msg_enable_wakeup {
 	union {
-		struct {
-			u32 pong;
-		} msg_ping;
+		u32 pattern_mask;
 
 		struct {
-			u8 mac_addr[6];
-			u32 ip_addr_cnt;
+			u32 reason_arp_v4_pkt : 1;
+			u32 reason_ipv4_ping_pkt : 1;
+			u32 reason_ipv6_ns_pkt : 1;
+			u32 reason_ipv6_ping_pkt : 1;
+			u32 reason_link_up : 1;
+			u32 reason_link_down : 1;
+			u32 reason_maximum : 1;
+		};
+	};
 
-			struct {
-				union ip_addr addr;
-				union ip_addr mask;
-			} ip[1];
-		} msg_arp;
+	union {
+		u32 offload_mask;
+	};
+};
 
-		struct {
-			u32 len;
-			u8 packet[1514U];
-		} msg_inject;
+struct __packed magic_packet_pattern_s {
+	u8 mac_addr[ETH_ALEN];
+};
 
-		struct {
-			u32 priority;
-			u32 wol_packet_type;
-			u32 pattern_id;
-			u32 next_wol_pattern_offset;
-
-			union {
-				struct {
-					u32 flags;
-					u8 ipv4_source_address[4];
-					u8 ipv4_dest_address[4];
-					u16 tcp_source_port_number;
-					u16 tcp_dest_port_number;
-				} ipv4_tcp_syn_parameters;
-
-				struct {
-					u32 flags;
-					u8 ipv6_source_address[16];
-					u8 ipv6_dest_address[16];
-					u16 tcp_source_port_number;
-					u16 tcp_dest_port_number;
-				} ipv6_tcp_syn_parameters;
-
-				struct {
-					u32 flags;
-				} eapol_request_id_message_parameters;
-
-				struct {
-					u32 flags;
-					u32 mask_offset;
-					u32 mask_size;
-					u32 pattern_offset;
-					u32 pattern_size;
-				} wol_bit_map_pattern;
-
-				struct {
-					u8 mac_addr[ETH_ALEN];
-				} wol_magic_packet_patter;
-			} wol_pattern;
-		} msg_wol;
+struct __packed drv_msg_wol_add {
+	u32 priority;
+	u32 packet_type;
+	u32 pattern_id;
+	u32 next_pattern_offset;
 
-		struct {
-			union {
-				u32 pattern_mask;
-
-				struct {
-					u32 reason_arp_v4_pkt : 1;
-					u32 reason_ipv4_ping_pkt : 1;
-					u32 reason_ipv6_ns_pkt : 1;
-					u32 reason_ipv6_ping_pkt : 1;
-					u32 reason_link_up : 1;
-					u32 reason_link_down : 1;
-					u32 reason_maximum : 1;
-				};
-			};
-
-			union {
-				u32 offload_mask;
-			};
-		} msg_enable_wakeup;
+	struct magic_packet_pattern_s magic_packet_pattern;
+};
 
-		struct {
-			u32 id;
-		} msg_del_id;
-	};
+struct __packed drv_msg_wol_remove {
+	u32 id;
 };
 
 struct __packed hw_atl_utils_mbox_header {
@@ -168,43 +113,89 @@ struct __packed hw_atl_utils_mbox_header {
 	u32 error;
 };
 
-struct __packed hw_aq_info {
+struct __packed hw_atl_ptp_offset {
+	u16 ingress_100;
+	u16 egress_100;
+	u16 ingress_1000;
+	u16 egress_1000;
+	u16 ingress_2500;
+	u16 egress_2500;
+	u16 ingress_5000;
+	u16 egress_5000;
+	u16 ingress_10000;
+	u16 egress_10000;
+};
+
+struct __packed hw_atl_cable_diag {
+	u8 fault;
+	u8 distance;
+	u8 far_distance;
+	u8 reserved;
+};
+
+enum gpio_pin_function {
+	GPIO_PIN_FUNCTION_NC,
+	GPIO_PIN_FUNCTION_VAUX_ENABLE,
+	GPIO_PIN_FUNCTION_EFUSE_BURN_ENABLE,
+	GPIO_PIN_FUNCTION_SFP_PLUS_DETECT,
+	GPIO_PIN_FUNCTION_TX_DISABLE,
+	GPIO_PIN_FUNCTION_RATE_SEL_0,
+	GPIO_PIN_FUNCTION_RATE_SEL_1,
+	GPIO_PIN_FUNCTION_TX_FAULT,
+	GPIO_PIN_FUNCTION_PTP0,
+	GPIO_PIN_FUNCTION_PTP1,
+	GPIO_PIN_FUNCTION_PTP2,
+	GPIO_PIN_FUNCTION_SIZE
+};
+
+struct __packed hw_atl_info {
 	u8 reserved[6];
 	u16 phy_fault_code;
 	u16 phy_temperature;
 	u8 cable_len;
 	u8 reserved1;
-	u32 cable_diag_data[4];
-	u8 reserved2[32];
+	struct hw_atl_cable_diag cable_diag_data[4];
+	struct hw_atl_ptp_offset ptp_offset;
+	u8 reserved2[12];
 	u32 caps_lo;
 	u32 caps_hi;
+	u32 reserved_datapath;
+	u32 reserved3[7];
+	u32 reserved_simpleresp[3];
+	u32 reserved_linkstat[7];
+	u32 reserved_wakes_count;
+	u32 reserved_eee_stat[12];
+	u32 tx_stuck_cnt;
+	u32 setting_address;
+	u32 setting_length;
+	u32 caps_ex;
+	enum gpio_pin_function gpio_pin[3];
+	u32 pcie_aer_dump[18];
+	u16 snr_margin[4];
 };
 
 struct __packed hw_atl_utils_mbox {
 	struct hw_atl_utils_mbox_header header;
 	struct hw_atl_stats_s stats;
-	struct hw_aq_info info;
+	struct hw_atl_info info;
 };
 
-/* fw2x */
-typedef u32	fw_offset_t;
-
 struct __packed offload_ip_info {
 	u8 v4_local_addr_count;
 	u8 v4_addr_count;
 	u8 v6_local_addr_count;
 	u8 v6_addr_count;
-	fw_offset_t v4_addr;
-	fw_offset_t v4_prefix;
-	fw_offset_t v6_addr;
-	fw_offset_t v6_prefix;
+	u32 v4_addr;
+	u32 v4_prefix;
+	u32 v6_addr;
+	u32 v6_prefix;
 };
 
 struct __packed offload_port_info {
 	u16 udp_port_count;
 	u16 tcp_port_count;
-	fw_offset_t udp_port;
-	fw_offset_t tcp_port;
+	u32 udp_port;
+	u32 tcp_port;
 };
 
 struct __packed offload_ka_info {
@@ -212,15 +203,15 @@ struct __packed offload_ka_info {
 	u16 v6_ka_count;
 	u32 retry_count;
 	u32 retry_interval;
-	fw_offset_t v4_ka;
-	fw_offset_t v6_ka;
+	u32 v4_ka;
+	u32 v6_ka;
 };
 
 struct __packed offload_rr_info {
 	u32 rr_count;
 	u32 rr_buf_len;
-	fw_offset_t rr_id_x;
-	fw_offset_t rr_buf;
+	u32 rr_id_x;
+	u32 rr_buf;
 };
 
 struct __packed offload_info {
@@ -237,9 +228,103 @@ struct __packed offload_info {
 	u8 buf[0];
 };
 
+struct __packed hw_atl_utils_fw_rpc {
+	u32 msg_id;
+
+	union {
+		/* fw1x structures */
+		struct drv_msg_wol_add msg_wol_add;
+		struct drv_msg_wol_remove msg_wol_remove;
+		struct drv_msg_enable_wakeup msg_enable_wakeup;
+		/* fw2x structures */
+		struct offload_info fw2x_offloads;
+	};
+};
+
+/* Mailbox FW Request interface */
+struct __packed hw_fw_request_ptp_gpio_ctrl {
+	u32 index;
+	u32 period;
+	u64 start;
+};
+
+struct __packed hw_fw_request_ptp_adj_freq {
+	u32 ns_mac;
+	u32 fns_mac;
+	u32 ns_phy;
+	u32 fns_phy;
+	u32 mac_ns_adj;
+	u32 mac_fns_adj;
+};
+
+struct __packed hw_fw_request_ptp_adj_clock {
+	u32 ns;
+	u32 sec;
+	int sign;
+};
+
+#define HW_AQ_FW_REQUEST_PTP_GPIO_CTRL	         0x11
+#define HW_AQ_FW_REQUEST_PTP_ADJ_FREQ	         0x12
+#define HW_AQ_FW_REQUEST_PTP_ADJ_CLOCK	         0x13
+
+struct __packed hw_fw_request_iface {
+	u32 msg_id;
+	union {
+		/* PTP FW Request */
+		struct hw_fw_request_ptp_gpio_ctrl ptp_gpio_ctrl;
+		struct hw_fw_request_ptp_adj_freq ptp_adj_freq;
+		struct hw_fw_request_ptp_adj_clock ptp_adj_clock;
+	};
+};
+
+struct __packed hw_atl_utils_settings {
+	u32 mtu;
+	u32 downshift_retry_count;
+	u32 link_pause_frame_quanta_100m;
+	u32 link_pause_frame_threshold_100m;
+	u32 link_pause_frame_quanta_1g;
+	u32 link_pause_frame_threshold_1g;
+	u32 link_pause_frame_quanta_2p5g;
+	u32 link_pause_frame_threshold_2p5g;
+	u32 link_pause_frame_quanta_5g;
+	u32 link_pause_frame_threshold_5g;
+	u32 link_pause_frame_quanta_10g;
+	u32 link_pause_frame_threshold_10g;
+	u32 pfc_quanta_class_0;
+	u32 pfc_threshold_class_0;
+	u32 pfc_quanta_class_1;
+	u32 pfc_threshold_class_1;
+	u32 pfc_quanta_class_2;
+	u32 pfc_threshold_class_2;
+	u32 pfc_quanta_class_3;
+	u32 pfc_threshold_class_3;
+	u32 pfc_quanta_class_4;
+	u32 pfc_threshold_class_4;
+	u32 pfc_quanta_class_5;
+	u32 pfc_threshold_class_5;
+	u32 pfc_quanta_class_6;
+	u32 pfc_threshold_class_6;
+	u32 pfc_quanta_class_7;
+	u32 pfc_threshold_class_7;
+	u32 eee_link_down_timeout;
+	u32 eee_link_up_timeout;
+	u32 eee_max_link_drops;
+	u32 eee_rates_mask;
+	u32 wake_timer;
+	u32 thermal_shutdown_off_temp;
+	u32 thermal_shutdown_warning_temp;
+	u32 thermal_shutdown_cold_temp;
+	u32 msm_options;
+	u32 dac_cable_serdes_modes;
+	u32 media_detect;
+};
+
 enum hw_atl_rx_action_with_traffic {
 	HW_ATL_RX_DISCARD,
 	HW_ATL_RX_HOST,
+	HW_ATL_RX_MNGMNT,
+	HW_ATL_RX_HOST_AND_MNGMNT,
+	HW_ATL_RX_WOL
 };
 
 struct aq_rx_filter_vlan {
@@ -321,20 +406,12 @@ enum hal_atl_utils_fw_state_e {
 #define HAL_ATLANTIC_RATE_100M       BIT(5)
 #define HAL_ATLANTIC_RATE_INVALID    BIT(6)
 
-#define HAL_ATLANTIC_UTILS_FW_MSG_PING          0x1U
-#define HAL_ATLANTIC_UTILS_FW_MSG_ARP           0x2U
-#define HAL_ATLANTIC_UTILS_FW_MSG_INJECT        0x3U
 #define HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD       0x4U
 #define HAL_ATLANTIC_UTILS_FW_MSG_WOL_PRIOR     0x10000000U
 #define HAL_ATLANTIC_UTILS_FW_MSG_WOL_PATTERN   0x1U
 #define HAL_ATLANTIC_UTILS_FW_MSG_WOL_MAG_PKT   0x2U
 #define HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL       0x5U
 #define HAL_ATLANTIC_UTILS_FW_MSG_ENABLE_WAKEUP 0x6U
-#define HAL_ATLANTIC_UTILS_FW_MSG_MSM_PFC       0x7U
-#define HAL_ATLANTIC_UTILS_FW_MSG_PROVISIONING  0x8U
-#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_ADD   0x9U
-#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_DEL   0xAU
-#define HAL_ATLANTIC_UTILS_FW_MSG_CABLE_DIAG    0xDU
 
 enum hw_atl_fw2x_rate {
 	FW2X_RATE_100M    = 0x20,
@@ -344,91 +421,135 @@ enum hw_atl_fw2x_rate {
 	FW2X_RATE_10G     = 0x800,
 };
 
+/* 0x370
+ * Link capabilities resolution register
+ */
 enum hw_atl_fw2x_caps_lo {
-	CAPS_LO_10BASET_HD = 0x00,
+	CAPS_LO_10BASET_HD        = 0,
 	CAPS_LO_10BASET_FD,
 	CAPS_LO_100BASETX_HD,
 	CAPS_LO_100BASET4_HD,
 	CAPS_LO_100BASET2_HD,
-	CAPS_LO_100BASETX_FD,
+	CAPS_LO_100BASETX_FD      = 5,
 	CAPS_LO_100BASET2_FD,
 	CAPS_LO_1000BASET_HD,
 	CAPS_LO_1000BASET_FD,
 	CAPS_LO_2P5GBASET_FD,
-	CAPS_LO_5GBASET_FD,
+	CAPS_LO_5GBASET_FD        = 10,
 	CAPS_LO_10GBASET_FD,
 };
 
+/* 0x374
+ * Status register
+ */
 enum hw_atl_fw2x_caps_hi {
-	CAPS_HI_RESERVED1 = 0x00,
+	CAPS_HI_RESERVED1         = 0,
 	CAPS_HI_10BASET_EEE,
 	CAPS_HI_RESERVED2,
 	CAPS_HI_PAUSE,
 	CAPS_HI_ASYMMETRIC_PAUSE,
-	CAPS_HI_100BASETX_EEE,
+	CAPS_HI_100BASETX_EEE     = 5,
 	CAPS_HI_RESERVED3,
 	CAPS_HI_RESERVED4,
 	CAPS_HI_1000BASET_FD_EEE,
 	CAPS_HI_2P5GBASET_FD_EEE,
-	CAPS_HI_5GBASET_FD_EEE,
+	CAPS_HI_5GBASET_FD_EEE    = 10,
 	CAPS_HI_10GBASET_FD_EEE,
-	CAPS_HI_RESERVED5,
+	CAPS_HI_FW_REQUEST,
 	CAPS_HI_RESERVED6,
 	CAPS_HI_RESERVED7,
-	CAPS_HI_RESERVED8,
+	CAPS_HI_RESERVED8         = 15,
 	CAPS_HI_RESERVED9,
 	CAPS_HI_CABLE_DIAG,
 	CAPS_HI_TEMPERATURE,
 	CAPS_HI_DOWNSHIFT,
-	CAPS_HI_PTP_AVB_EN,
+	CAPS_HI_PTP_AVB_EN_FW2X   = 20,
 	CAPS_HI_MEDIA_DETECT,
 	CAPS_HI_LINK_DROP,
 	CAPS_HI_SLEEP_PROXY,
 	CAPS_HI_WOL,
-	CAPS_HI_MAC_STOP,
+	CAPS_HI_MAC_STOP          = 25,
 	CAPS_HI_EXT_LOOPBACK,
 	CAPS_HI_INT_LOOPBACK,
 	CAPS_HI_EFUSE_AGENT,
 	CAPS_HI_WOL_TIMER,
-	CAPS_HI_STATISTICS,
+	CAPS_HI_STATISTICS        = 30,
 	CAPS_HI_TRANSACTION_ID,
 };
 
+/* 0x36C
+ * Control register
+ */
 enum hw_atl_fw2x_ctrl {
-	CTRL_RESERVED1 = 0x00,
+	CTRL_RESERVED1            = 0,
 	CTRL_RESERVED2,
 	CTRL_RESERVED3,
 	CTRL_PAUSE,
 	CTRL_ASYMMETRIC_PAUSE,
-	CTRL_RESERVED4,
+	CTRL_RESERVED4            = 5,
 	CTRL_RESERVED5,
 	CTRL_RESERVED6,
 	CTRL_1GBASET_FD_EEE,
 	CTRL_2P5GBASET_FD_EEE,
-	CTRL_5GBASET_FD_EEE,
+	CTRL_5GBASET_FD_EEE       = 10,
 	CTRL_10GBASET_FD_EEE,
 	CTRL_THERMAL_SHUTDOWN,
 	CTRL_PHY_LOGS,
 	CTRL_EEE_AUTO_DISABLE,
-	CTRL_PFC,
+	CTRL_PFC                  = 15,
 	CTRL_WAKE_ON_LINK,
 	CTRL_CABLE_DIAG,
 	CTRL_TEMPERATURE,
 	CTRL_DOWNSHIFT,
-	CTRL_PTP_AVB,
+	CTRL_PTP_AVB              = 20,
 	CTRL_RESERVED7,
 	CTRL_LINK_DROP,
 	CTRL_SLEEP_PROXY,
 	CTRL_WOL,
-	CTRL_MAC_STOP,
+	CTRL_MAC_STOP             = 25,
 	CTRL_EXT_LOOPBACK,
 	CTRL_INT_LOOPBACK,
 	CTRL_RESERVED8,
 	CTRL_WOL_TIMER,
-	CTRL_STATISTICS,
+	CTRL_STATISTICS           = 30,
 	CTRL_FORCE_RECONNECT,
 };
 
+enum hw_atl_caps_ex {
+	CAPS_EX_LED_CONTROL       =  0,
+	CAPS_EX_LED0_MODE_LO,
+	CAPS_EX_LED0_MODE_HI,
+	CAPS_EX_LED1_MODE_LO,
+	CAPS_EX_LED1_MODE_HI,
+	CAPS_EX_LED2_MODE_LO      =  5,
+	CAPS_EX_LED2_MODE_HI,
+	CAPS_EX_RESERVED07,
+	CAPS_EX_RESERVED08,
+	CAPS_EX_RESERVED09,
+	CAPS_EX_RESERVED10        = 10,
+	CAPS_EX_RESERVED11,
+	CAPS_EX_RESERVED12,
+	CAPS_EX_RESERVED13,
+	CAPS_EX_RESERVED14,
+	CAPS_EX_RESERVED15        = 15,
+	CAPS_EX_PHY_PTP_EN,
+	CAPS_EX_MAC_PTP_EN,
+	CAPS_EX_EXT_CLK_EN,
+	CAPS_EX_SCHED_DMA_EN,
+	CAPS_EX_PTP_GPIO_EN       = 20,
+	CAPS_EX_UPDATE_SETTINGS,
+	CAPS_EX_PHY_CTRL_TS_PIN,
+	CAPS_EX_SNR_OPERATING_MARGIN,
+	CAPS_EX_RESERVED24,
+	CAPS_EX_RESERVED25        = 25,
+	CAPS_EX_RESERVED26,
+	CAPS_EX_RESERVED27,
+	CAPS_EX_RESERVED28,
+	CAPS_EX_RESERVED29,
+	CAPS_EX_RESERVED30        = 30,
+	CAPS_EX_RESERVED31
+};
+
 struct aq_hw_s;
 struct aq_fw_ops;
 struct aq_hw_caps_s;
@@ -475,6 +596,11 @@ struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
 int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
 				  u32 *p, u32 cnt);
 
+int hw_atl_write_fwcfg_dwords(struct aq_hw_s *self, u32 *p, u32 cnt);
+
+int hw_atl_write_fwsettings_dwords(struct aq_hw_s *self, u32 offset, u32 *p,
+				   u32 cnt);
+
 int hw_atl_utils_fw_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac);
 
 int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 7bc51f8d6f2f..97ebf849695f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+ * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
  */
 
 /* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for
@@ -17,26 +17,34 @@
 #include "hw_atl_utils.h"
 #include "hw_atl_llh.h"
 
-#define HW_ATL_FW2X_MPI_RPC_ADDR        0x334
+#define HW_ATL_FW2X_MPI_LED_ADDR         0x31c
+#define HW_ATL_FW2X_MPI_RPC_ADDR         0x334
 
-#define HW_ATL_FW2X_MPI_MBOX_ADDR       0x360
-#define HW_ATL_FW2X_MPI_EFUSE_ADDR	0x364
-#define HW_ATL_FW2X_MPI_CONTROL_ADDR	0x368
-#define HW_ATL_FW2X_MPI_CONTROL2_ADDR	0x36C
-#define HW_ATL_FW2X_MPI_STATE_ADDR	0x370
-#define HW_ATL_FW2X_MPI_STATE2_ADDR     0x374
+#define HW_ATL_FW2X_MPI_MBOX_ADDR        0x360
+#define HW_ATL_FW2X_MPI_EFUSE_ADDR       0x364
+#define HW_ATL_FW2X_MPI_CONTROL_ADDR     0x368
+#define HW_ATL_FW2X_MPI_CONTROL2_ADDR    0x36C
+#define HW_ATL_FW2X_MPI_STATE_ADDR       0x370
+#define HW_ATL_FW2X_MPI_STATE2_ADDR      0x374
+
+#define HW_ATL_FW3X_EXT_CONTROL_ADDR     0x378
+#define HW_ATL_FW3X_EXT_STATE_ADDR       0x37c
 
 #define HW_ATL_FW2X_CAP_PAUSE            BIT(CAPS_HI_PAUSE)
 #define HW_ATL_FW2X_CAP_ASYM_PAUSE       BIT(CAPS_HI_ASYMMETRIC_PAUSE)
 #define HW_ATL_FW2X_CAP_SLEEP_PROXY      BIT(CAPS_HI_SLEEP_PROXY)
 #define HW_ATL_FW2X_CAP_WOL              BIT(CAPS_HI_WOL)
 
+#define HW_ATL_FW2X_CTRL_WAKE_ON_LINK     BIT(CTRL_WAKE_ON_LINK)
 #define HW_ATL_FW2X_CTRL_SLEEP_PROXY      BIT(CTRL_SLEEP_PROXY)
 #define HW_ATL_FW2X_CTRL_WOL              BIT(CTRL_WOL)
 #define HW_ATL_FW2X_CTRL_LINK_DROP        BIT(CTRL_LINK_DROP)
 #define HW_ATL_FW2X_CTRL_PAUSE            BIT(CTRL_PAUSE)
 #define HW_ATL_FW2X_CTRL_TEMPERATURE      BIT(CTRL_TEMPERATURE)
 #define HW_ATL_FW2X_CTRL_ASYMMETRIC_PAUSE BIT(CTRL_ASYMMETRIC_PAUSE)
+#define HW_ATL_FW2X_CTRL_INT_LOOPBACK     BIT(CTRL_INT_LOOPBACK)
+#define HW_ATL_FW2X_CTRL_EXT_LOOPBACK     BIT(CTRL_EXT_LOOPBACK)
+#define HW_ATL_FW2X_CTRL_DOWNSHIFT        BIT(CTRL_DOWNSHIFT)
 #define HW_ATL_FW2X_CTRL_FORCE_RECONNECT  BIT(CTRL_FORCE_RECONNECT)
 
 #define HW_ATL_FW2X_CAP_EEE_1G_MASK      BIT(CAPS_HI_1000BASET_FD_EEE)
@@ -47,6 +55,9 @@
 #define HAL_ATLANTIC_WOL_FILTERS_COUNT   8
 #define HAL_ATLANTIC_UTILS_FW2X_MSG_WOL  0x0E
 
+#define HW_ATL_FW_VER_LED                0x03010026U
+#define HW_ATL_FW_VER_MEDIA_CONTROL      0x0301005aU
+
 struct __packed fw2x_msg_wol_pattern {
 	u8 mask[16];
 	u32 crc;
@@ -71,6 +82,7 @@ static int aq_fw2x_set_state(struct aq_hw_s *self,
 
 static u32 aq_fw2x_mbox_get(struct aq_hw_s *self);
 static u32 aq_fw2x_rpc_get(struct aq_hw_s *self);
+static int aq_fw2x_settings_get(struct aq_hw_s *self, u32 *addr);
 static u32 aq_fw2x_state2_get(struct aq_hw_s *self);
 
 static int aq_fw2x_init(struct aq_hw_s *self)
@@ -88,6 +100,8 @@ static int aq_fw2x_init(struct aq_hw_s *self)
 					self->rpc_addr != 0U,
 					1000U, 100000U);
 
+	err = aq_fw2x_settings_get(self, &self->settings_addr);
+
 	return err;
 }
 
@@ -167,17 +181,26 @@ static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
 	return 0;
 }
 
-static void aq_fw2x_set_mpi_flow_control(struct aq_hw_s *self, u32 *mpi_state)
+static void aq_fw2x_upd_flow_control_bits(struct aq_hw_s *self,
+					  u32 *mpi_state, u32 fc)
 {
-	if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
-		*mpi_state |= BIT(CAPS_HI_PAUSE);
-	else
-		*mpi_state &= ~BIT(CAPS_HI_PAUSE);
+	*mpi_state &= ~(HW_ATL_FW2X_CTRL_PAUSE |
+			HW_ATL_FW2X_CTRL_ASYMMETRIC_PAUSE);
 
-	if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
-		*mpi_state |= BIT(CAPS_HI_ASYMMETRIC_PAUSE);
-	else
-		*mpi_state &= ~BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+	switch (fc) {
+	/* There is not explicit mode of RX only pause frames,
+	 * thus, we join this mode with FC full.
+	 * FC full is either Rx, either Tx, or both.
+	 */
+	case AQ_NIC_FC_FULL:
+	case AQ_NIC_FC_RX:
+		*mpi_state |= HW_ATL_FW2X_CTRL_PAUSE |
+			      HW_ATL_FW2X_CTRL_ASYMMETRIC_PAUSE;
+		break;
+	case AQ_NIC_FC_TX:
+		*mpi_state |= HW_ATL_FW2X_CTRL_ASYMMETRIC_PAUSE;
+		break;
+	}
 }
 
 static void aq_fw2x_upd_eee_rate_bits(struct aq_hw_s *self, u32 *mpi_opts,
@@ -201,7 +224,8 @@ static int aq_fw2x_set_state(struct aq_hw_s *self,
 	case MPI_INIT:
 		mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
 		aq_fw2x_upd_eee_rate_bits(self, &mpi_state, cfg->eee_speeds);
-		aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+		aq_fw2x_upd_flow_control_bits(self, &mpi_state,
+					      self->aq_nic_cfg->fc.req);
 		break;
 	case MPI_DEINIT:
 		mpi_state |= BIT(CAPS_HI_LINK_DROP);
@@ -212,15 +236,20 @@ static int aq_fw2x_set_state(struct aq_hw_s *self,
 		break;
 	}
 	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
+
 	return 0;
 }
 
 static int aq_fw2x_update_link_status(struct aq_hw_s *self)
 {
-	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR);
-	u32 speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
-				 FW2X_RATE_2G5 | FW2X_RATE_5G | FW2X_RATE_10G);
 	struct aq_hw_link_status_s *link_status = &self->aq_link_status;
+	u32 mpi_state;
+	u32 speed;
+
+	mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR);
+	speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
+			     FW2X_RATE_2G5 | FW2X_RATE_5G |
+			     FW2X_RATE_10G);
 
 	if (speed) {
 		if (speed & FW2X_RATE_10G)
@@ -244,11 +273,11 @@ static int aq_fw2x_update_link_status(struct aq_hw_s *self)
 
 static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 {
+	u32 efuse_addr = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_EFUSE_ADDR);
+	u32 mac_addr[2] = { 0 };
 	int err = 0;
 	u32 h = 0U;
 	u32 l = 0U;
-	u32 mac_addr[2] = { 0 };
-	u32 efuse_addr = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_EFUSE_ADDR);
 
 	if (efuse_addr != 0) {
 		err = hw_atl_utils_fw_downld_dwords(self,
@@ -282,15 +311,16 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 		h >>= 8;
 		mac[0] = (u8)(0xFFU & h);
 	}
+
 	return err;
 }
 
 static int aq_fw2x_update_stats(struct aq_hw_s *self)
 {
-	int err = 0;
 	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
 	u32 orig_stats_val = mpi_opts & BIT(CAPS_HI_STATISTICS);
 	u32 stats_val;
+	int err = 0;
 
 	/* Toggle statistics bit for FW to update */
 	mpi_opts = mpi_opts ^ BIT(CAPS_HI_STATISTICS);
@@ -317,9 +347,9 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
 	int err = 0;
 	u32 val;
 
-	phy_temp_offset = self->mbox_addr +
-			  offsetof(struct hw_atl_utils_mbox, info) +
-			  offsetof(struct hw_aq_info, phy_temperature);
+	phy_temp_offset = self->mbox_addr + offsetof(struct hw_atl_utils_mbox,
+						     info.phy_temperature);
+
 	/* Toggle statistics bit for FW to 0x36C.18 (CTRL_TEMPERATURE) */
 	mpi_opts = mpi_opts ^ HW_ATL_FW2X_CTRL_TEMPERATURE;
 	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
@@ -342,106 +372,117 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
 	return 0;
 }
 
-static int aq_fw2x_set_sleep_proxy(struct aq_hw_s *self, u8 *mac)
+static int aq_fw2x_set_wol(struct aq_hw_s *self, u8 *mac)
 {
 	struct hw_atl_utils_fw_rpc *rpc = NULL;
-	struct offload_info *cfg = NULL;
-	unsigned int rpc_size = 0U;
-	u32 mpi_opts;
+	struct offload_info *info = NULL;
+	u32 wol_bits = 0;
+	u32 rpc_size;
 	int err = 0;
 	u32 val;
 
-	rpc_size = sizeof(rpc->msg_id) + sizeof(*cfg);
-
-	err = hw_atl_utils_fw_rpc_wait(self, &rpc);
-	if (err < 0)
-		goto err_exit;
-
-	memset(rpc, 0, rpc_size);
-	cfg = (struct offload_info *)(&rpc->msg_id + 1);
+	if (self->aq_nic_cfg->wol & WAKE_PHY) {
+		aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR,
+				HW_ATL_FW2X_CTRL_LINK_DROP);
+		readx_poll_timeout_atomic(aq_fw2x_state2_get, self, val,
+					  (val &
+					   HW_ATL_FW2X_CTRL_LINK_DROP) != 0,
+					  1000, 100000);
+		wol_bits |= HW_ATL_FW2X_CTRL_WAKE_ON_LINK;
+	}
 
-	memcpy(cfg->mac_addr, mac, ETH_ALEN);
-	cfg->len = sizeof(*cfg);
+	if (self->aq_nic_cfg->wol & WAKE_MAGIC) {
+		wol_bits |= HW_ATL_FW2X_CTRL_SLEEP_PROXY |
+			    HW_ATL_FW2X_CTRL_WOL;
 
-	/* Clear bit 0x36C.23 and 0x36C.22 */
-	mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
-	mpi_opts &= ~HW_ATL_FW2X_CTRL_SLEEP_PROXY;
-	mpi_opts &= ~HW_ATL_FW2X_CTRL_LINK_DROP;
+		err = hw_atl_utils_fw_rpc_wait(self, &rpc);
+		if (err < 0)
+			goto err_exit;
 
-	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+		rpc_size = sizeof(*info) +
+			   offsetof(struct hw_atl_utils_fw_rpc, fw2x_offloads);
+		memset(rpc, 0, rpc_size);
+		info = &rpc->fw2x_offloads;
+		memcpy(info->mac_addr, mac, ETH_ALEN);
+		info->len = sizeof(*info);
 
-	err = hw_atl_utils_fw_rpc_call(self, rpc_size);
-	if (err < 0)
-		goto err_exit;
+		err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+		if (err < 0)
+			goto err_exit;
+	}
 
-	/* Set bit 0x36C.23 */
-	mpi_opts |= HW_ATL_FW2X_CTRL_SLEEP_PROXY;
-	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
-
-	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
-					self, val,
-					val & HW_ATL_FW2X_CTRL_SLEEP_PROXY,
-					1U, 100000U);
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, wol_bits);
 
 err_exit:
 	return err;
 }
 
-static int aq_fw2x_set_wol_params(struct aq_hw_s *self, u8 *mac)
+static int aq_fw2x_set_power(struct aq_hw_s *self, unsigned int power_state,
+			     u8 *mac)
 {
-	struct hw_atl_utils_fw_rpc *rpc = NULL;
-	struct fw2x_msg_wol *msg = NULL;
-	u32 mpi_opts;
 	int err = 0;
-	u32 val;
-
-	err = hw_atl_utils_fw_rpc_wait(self, &rpc);
-	if (err < 0)
-		goto err_exit;
-
-	msg = (struct fw2x_msg_wol *)rpc;
-
-	memset(msg, 0, sizeof(*msg));
 
-	msg->msg_id = HAL_ATLANTIC_UTILS_FW2X_MSG_WOL;
-	msg->magic_packet_enabled = true;
-	memcpy(msg->hw_addr, mac, ETH_ALEN);
+	if (self->aq_nic_cfg->wol)
+		err = aq_fw2x_set_wol(self, mac);
 
-	mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
-	mpi_opts &= ~(HW_ATL_FW2X_CTRL_SLEEP_PROXY | HW_ATL_FW2X_CTRL_WOL);
+	return err;
+}
 
-	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+static int aq_fw2x_send_fw_request(struct aq_hw_s *self,
+				   const struct hw_fw_request_iface *fw_req,
+				   size_t size)
+{
+	u32 ctrl2, orig_ctrl2;
+	u32 dword_cnt;
+	int err = 0;
+	u32 val;
 
-	err = hw_atl_utils_fw_rpc_call(self, sizeof(*msg));
+	/* Write data to drvIface Mailbox */
+	dword_cnt = size / sizeof(u32);
+	if (size % sizeof(u32))
+		dword_cnt++;
+	err = hw_atl_write_fwcfg_dwords(self, (void *)fw_req, dword_cnt);
 	if (err < 0)
 		goto err_exit;
 
-	/* Set bit 0x36C.24 */
-	mpi_opts |= HW_ATL_FW2X_CTRL_WOL;
-	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+	/* Toggle statistics bit for FW to update */
+	ctrl2 = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+	orig_ctrl2 = ctrl2 & BIT(CAPS_HI_FW_REQUEST);
+	ctrl2 = ctrl2 ^ BIT(CAPS_HI_FW_REQUEST);
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, ctrl2);
 
-	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
-					self, val, val & HW_ATL_FW2X_CTRL_WOL,
+	/* Wait FW to report back */
+	err = readx_poll_timeout_atomic(aq_fw2x_state2_get, self, val,
+					orig_ctrl2 != (val &
+						       BIT(CAPS_HI_FW_REQUEST)),
 					1U, 10000U);
 
 err_exit:
 	return err;
 }
 
-static int aq_fw2x_set_power(struct aq_hw_s *self, unsigned int power_state,
-			     u8 *mac)
+static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable)
 {
-	int err = 0;
+	u32 ptp_opts = aq_hw_read_reg(self, HW_ATL_FW3X_EXT_STATE_ADDR);
+	u32 all_ptp_features = BIT(CAPS_EX_PHY_PTP_EN) |
+						   BIT(CAPS_EX_PTP_GPIO_EN);
 
-	if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
-		err = aq_fw2x_set_sleep_proxy(self, mac);
-		if (err < 0)
-			goto err_exit;
-		err = aq_fw2x_set_wol_params(self, mac);
-	}
+	if (enable)
+		ptp_opts |= all_ptp_features;
+	else
+		ptp_opts &= ~all_ptp_features;
 
-err_exit:
-	return err;
+	aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts);
+}
+
+static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode)
+{
+	if (self->fw_ver_actual < HW_ATL_FW_VER_LED)
+		return -EOPNOTSUPP;
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_LED_ADDR, mode);
+
+	return 0;
 }
 
 static int aq_fw2x_set_eee_rate(struct aq_hw_s *self, u32 speed)
@@ -461,11 +502,12 @@ static int aq_fw2x_get_eee_rate(struct aq_hw_s *self, u32 *rate,
 	u32 mpi_state;
 	u32 caps_hi;
 	int err = 0;
-	u32 addr = self->mbox_addr + offsetof(struct hw_atl_utils_mbox, info) +
-		   offsetof(struct hw_aq_info, caps_hi);
+	u32 offset;
+
+	offset = self->mbox_addr + offsetof(struct hw_atl_utils_mbox,
+					    info.caps_hi);
 
-	err = hw_atl_utils_fw_downld_dwords(self, addr, &caps_hi,
-					    sizeof(caps_hi) / sizeof(u32));
+	err = hw_atl_utils_fw_downld_dwords(self, offset, &caps_hi, 1);
 
 	if (err)
 		return err;
@@ -493,7 +535,8 @@ static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
 {
 	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
 
-	aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+	aq_fw2x_upd_flow_control_bits(self, &mpi_state,
+				      self->aq_nic_cfg->fc.req);
 
 	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
 
@@ -503,17 +546,41 @@ static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
 static u32 aq_fw2x_get_flow_control(struct aq_hw_s *self, u32 *fcmode)
 {
 	u32 mpi_state = aq_fw2x_state2_get(self);
+	*fcmode = 0;
 
 	if (mpi_state & HW_ATL_FW2X_CAP_PAUSE)
-		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
-			*fcmode = AQ_NIC_FC_RX;
+		*fcmode |= AQ_NIC_FC_RX;
+
+	if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
+		*fcmode |= AQ_NIC_FC_TX;
+
+	return 0;
+}
+
+static int aq_fw2x_set_phyloopback(struct aq_hw_s *self, u32 mode, bool enable)
+{
+	u32 mpi_opts;
+
+	switch (mode) {
+	case AQ_HW_LOOPBACK_PHYINT_SYS:
+		mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+		if (enable)
+			mpi_opts |= HW_ATL_FW2X_CTRL_INT_LOOPBACK;
 		else
-			*fcmode = AQ_NIC_FC_RX | AQ_NIC_FC_TX;
-	else
-		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
-			*fcmode = AQ_NIC_FC_TX;
+			mpi_opts &= ~HW_ATL_FW2X_CTRL_INT_LOOPBACK;
+		aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+		break;
+	case AQ_HW_LOOPBACK_PHYEXT_SYS:
+		mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+		if (enable)
+			mpi_opts |= HW_ATL_FW2X_CTRL_EXT_LOOPBACK;
 		else
-			*fcmode = 0;
+			mpi_opts &= ~HW_ATL_FW2X_CTRL_EXT_LOOPBACK;
+		aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -528,25 +595,42 @@ static u32 aq_fw2x_rpc_get(struct aq_hw_s *self)
 	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_RPC_ADDR);
 }
 
+static int aq_fw2x_settings_get(struct aq_hw_s *self, u32 *addr)
+{
+	int err = 0;
+	u32 offset;
+
+	offset = self->mbox_addr + offsetof(struct hw_atl_utils_mbox,
+					    info.setting_address);
+
+	err = hw_atl_utils_fw_downld_dwords(self, offset, addr, 1);
+
+	return err;
+}
+
 static u32 aq_fw2x_state2_get(struct aq_hw_s *self)
 {
 	return aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR);
 }
 
 const struct aq_fw_ops aq_fw_2x_ops = {
-	.init = aq_fw2x_init,
-	.deinit = aq_fw2x_deinit,
-	.reset = NULL,
-	.renegotiate = aq_fw2x_renegotiate,
-	.get_mac_permanent = aq_fw2x_get_mac_permanent,
-	.set_link_speed = aq_fw2x_set_link_speed,
-	.set_state = aq_fw2x_set_state,
+	.init               = aq_fw2x_init,
+	.deinit             = aq_fw2x_deinit,
+	.reset              = NULL,
+	.renegotiate        = aq_fw2x_renegotiate,
+	.get_mac_permanent  = aq_fw2x_get_mac_permanent,
+	.set_link_speed     = aq_fw2x_set_link_speed,
+	.set_state          = aq_fw2x_set_state,
 	.update_link_status = aq_fw2x_update_link_status,
-	.update_stats = aq_fw2x_update_stats,
-	.get_phy_temp = aq_fw2x_get_phy_temp,
-	.set_power = aq_fw2x_set_power,
-	.set_eee_rate = aq_fw2x_set_eee_rate,
-	.get_eee_rate = aq_fw2x_get_eee_rate,
-	.set_flow_control = aq_fw2x_set_flow_control,
-	.get_flow_control = aq_fw2x_get_flow_control
+	.update_stats       = aq_fw2x_update_stats,
+	.get_phy_temp       = aq_fw2x_get_phy_temp,
+	.set_power          = aq_fw2x_set_power,
+	.set_eee_rate       = aq_fw2x_set_eee_rate,
+	.get_eee_rate       = aq_fw2x_get_eee_rate,
+	.set_flow_control   = aq_fw2x_set_flow_control,
+	.get_flow_control   = aq_fw2x_get_flow_control,
+	.send_fw_request    = aq_fw2x_send_fw_request,
+	.enable_ptp         = aq_fw3x_enable_ptp,
+	.led_control        = aq_fw2x_led_control,
+	.set_phyloopback    = aq_fw2x_set_phyloopback,
 };
diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c
index 78e52d217e56..539166112993 100644
--- a/drivers/net/ethernet/arc/emac_arc.c
+++ b/drivers/net/ethernet/arc/emac_arc.c
@@ -20,9 +20,10 @@
 static int emac_arc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct net_device *ndev;
 	struct arc_emac_priv *priv;
-	int interface, err;
+	phy_interface_t interface;
+	struct net_device *ndev;
+	int err;
 
 	if (!dev->of_node)
 		return -ENODEV;
@@ -37,9 +38,13 @@ static int emac_arc_probe(struct platform_device *pdev)
 	priv->drv_name = DRV_NAME;
 	priv->drv_version = DRV_VERSION;
 
-	interface = of_get_phy_mode(dev->of_node);
-	if (interface < 0)
-		interface = PHY_INTERFACE_MODE_MII;
+	err = of_get_phy_mode(dev->of_node, &interface);
+	if (err) {
+		if (err == -ENODEV)
+			interface = PHY_INTERFACE_MODE_MII;
+		else
+			goto out_netdev;
+	}
 
 	priv->clk = devm_clk_get(dev, "hclk");
 	if (IS_ERR(priv->clk)) {
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 664d664e0925..aae231c5224f 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -97,8 +97,9 @@ static int emac_rockchip_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct rockchip_priv_data *priv;
 	const struct of_device_id *match;
+	phy_interface_t interface;
 	u32 data;
-	int err, interface;
+	int err;
 
 	if (!pdev->dev.of_node)
 		return -ENODEV;
@@ -114,7 +115,9 @@ static int emac_rockchip_probe(struct platform_device *pdev)
 	priv->emac.drv_version = DRV_VERSION;
 	priv->emac.set_mac_speed = emac_rockchip_set_mac_speed;
 
-	interface = of_get_phy_mode(dev->of_node);
+	err = of_get_phy_mode(dev->of_node, &interface);
+	if (err)
+		goto out_netdev;
 
 	/* RK3036/RK3066/RK3188 SoCs only support RMII */
 	if (interface != PHY_INTERFACE_MODE_RMII) {
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 1b1a09095c0d..8f5021091eee 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1744,10 +1744,9 @@ static int ag71xx_probe(struct platform_device *pdev)
 		eth_random_addr(ndev->dev_addr);
 	}
 
-	ag->phy_if_mode = of_get_phy_mode(np);
-	if (ag->phy_if_mode < 0) {
+	err = of_get_phy_mode(np, ag->phy_if_mode);
+	if (err) {
 		netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
-		err = ag->phy_if_mode;
 		goto err_free;
 	}
 
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 37752d9514e7..30b455013bf3 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1371,8 +1371,8 @@ static int nb8800_probe(struct platform_device *pdev)
 	priv = netdev_priv(dev);
 	priv->base = base;
 
-	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-	if (priv->phy_mode < 0)
+	ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
+	if (ret)
 		priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
 
 	priv->clk = devm_clk_get(&pdev->dev, NULL);
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
index aacc3cce2cc0..40941fb6065b 100644
--- a/drivers/net/ethernet/aurora/nb8800.h
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -287,7 +287,7 @@ struct nb8800_priv {
 	struct device_node		*phy_node;
 
 	/* PHY connection type from DT */
-	int				phy_mode;
+	phy_interface_t			phy_mode;
 
 	/* Current link status */
 	int				speed;
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 97ab0dd25552..035dbb1b2c98 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -511,9 +511,6 @@ static void b44_stats_update(struct b44 *bp)
 		*val++ += br32(bp, reg);
 	}
 
-	/* Pad */
-	reg += 8*4UL;
-
 	for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) {
 		*val++ += br32(bp, reg);
 	}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index a977a459bd20..825af709708e 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2479,9 +2479,9 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	priv->netdev = dev;
 	priv->pdev = pdev;
 
-	priv->phy_interface = of_get_phy_mode(dn);
+	ret = of_get_phy_mode(dn, &priv->phy_interface);
 	/* Default to GMII interface mode */
-	if ((int)priv->phy_interface < 0)
+	if (ret)
 		priv->phy_interface = PHY_INTERFACE_MODE_GMII;
 
 	/* In the case of a fixed PHY, the DT node associated
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index d10b421ed1f1..5e037a305b83 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1934,7 +1934,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return netdev_pick_tx(dev, skb, NULL) % (BNX2X_NUM_ETH_QUEUES(bp));
+	return netdev_pick_tx(dev, skb, NULL) %
+			(BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
index 226ab29f4cb6..3f8435208bf4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
@@ -32,31 +32,31 @@
 	* IRO[142].m2) + ((sbId) * IRO[142].m3))
 #define CSTORM_IGU_MODE_OFFSET (IRO[161].base)
 #define CSTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-	(IRO[323].base + ((pfId) * IRO[323].m1))
-#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
 	(IRO[324].base + ((pfId) * IRO[324].m1))
+#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+	(IRO[325].base + ((pfId) * IRO[325].m1))
 #define CSTORM_ISCSI_EQ_CONS_OFFSET(pfId, iscsiEqId) \
-	(IRO[316].base + ((pfId) * IRO[316].m1) + ((iscsiEqId) * IRO[316].m2))
+	(IRO[317].base + ((pfId) * IRO[317].m1) + ((iscsiEqId) * IRO[317].m2))
 #define CSTORM_ISCSI_EQ_NEXT_EQE_ADDR_OFFSET(pfId, iscsiEqId) \
-	(IRO[318].base + ((pfId) * IRO[318].m1) + ((iscsiEqId) * IRO[318].m2))
+	(IRO[319].base + ((pfId) * IRO[319].m1) + ((iscsiEqId) * IRO[319].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_OFFSET(pfId, iscsiEqId) \
-	(IRO[317].base + ((pfId) * IRO[317].m1) + ((iscsiEqId) * IRO[317].m2))
+	(IRO[318].base + ((pfId) * IRO[318].m1) + ((iscsiEqId) * IRO[318].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_VALID_OFFSET(pfId, iscsiEqId) \
-	(IRO[319].base + ((pfId) * IRO[319].m1) + ((iscsiEqId) * IRO[319].m2))
+	(IRO[320].base + ((pfId) * IRO[320].m1) + ((iscsiEqId) * IRO[320].m2))
 #define CSTORM_ISCSI_EQ_PROD_OFFSET(pfId, iscsiEqId) \
-	(IRO[315].base + ((pfId) * IRO[315].m1) + ((iscsiEqId) * IRO[315].m2))
+	(IRO[316].base + ((pfId) * IRO[316].m1) + ((iscsiEqId) * IRO[316].m2))
 #define CSTORM_ISCSI_EQ_SB_INDEX_OFFSET(pfId, iscsiEqId) \
-	(IRO[321].base + ((pfId) * IRO[321].m1) + ((iscsiEqId) * IRO[321].m2))
+	(IRO[322].base + ((pfId) * IRO[322].m1) + ((iscsiEqId) * IRO[322].m2))
 #define CSTORM_ISCSI_EQ_SB_NUM_OFFSET(pfId, iscsiEqId) \
-	(IRO[320].base + ((pfId) * IRO[320].m1) + ((iscsiEqId) * IRO[320].m2))
+	(IRO[321].base + ((pfId) * IRO[321].m1) + ((iscsiEqId) * IRO[321].m2))
 #define CSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-	(IRO[322].base + ((pfId) * IRO[322].m1))
+	(IRO[323].base + ((pfId) * IRO[323].m1))
 #define CSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[314].base + ((pfId) * IRO[314].m1))
+	(IRO[315].base + ((pfId) * IRO[315].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[313].base + ((pfId) * IRO[313].m1))
+	(IRO[314].base + ((pfId) * IRO[314].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[312].base + ((pfId) * IRO[312].m1))
+	(IRO[313].base + ((pfId) * IRO[313].m1))
 #define CSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 	(IRO[155].base + ((funcId) * IRO[155].m1))
 #define CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(pfId) \
@@ -99,81 +99,81 @@
 #define TSTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[107].base + ((funcId) * IRO[107].m1))
 #define TSTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-	(IRO[278].base + ((pfId) * IRO[278].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
 	(IRO[279].base + ((pfId) * IRO[279].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
 	(IRO[280].base + ((pfId) * IRO[280].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
 	(IRO[281].base + ((pfId) * IRO[281].m1))
+#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+	(IRO[282].base + ((pfId) * IRO[282].m1))
 #define TSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[277].base + ((pfId) * IRO[277].m1))
+	(IRO[278].base + ((pfId) * IRO[278].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[276].base + ((pfId) * IRO[276].m1))
+	(IRO[277].base + ((pfId) * IRO[277].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[275].base + ((pfId) * IRO[275].m1))
+	(IRO[276].base + ((pfId) * IRO[276].m1))
 #define TSTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
-	(IRO[274].base + ((pfId) * IRO[274].m1))
+	(IRO[275].base + ((pfId) * IRO[275].m1))
 #define TSTORM_ISCSI_TCP_LOCAL_ADV_WND_OFFSET(pfId) \
-	(IRO[284].base + ((pfId) * IRO[284].m1))
+	(IRO[285].base + ((pfId) * IRO[285].m1))
 #define TSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-	(IRO[270].base + ((pfId) * IRO[270].m1))
-#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[271].base + ((pfId) * IRO[271].m1))
-#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[272].base + ((pfId) * IRO[272].m1))
-#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[273].base + ((pfId) * IRO[273].m1))
+#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+	(IRO[274].base + ((pfId) * IRO[274].m1))
 #define TSTORM_MAC_FILTER_CONFIG_OFFSET(pfId) \
 	(IRO[206].base + ((pfId) * IRO[206].m1))
 #define TSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 	(IRO[109].base + ((funcId) * IRO[109].m1))
 #define TSTORM_TCP_MAX_CWND_OFFSET(pfId) \
-	(IRO[223].base + ((pfId) * IRO[223].m1))
+	(IRO[224].base + ((pfId) * IRO[224].m1))
 #define TSTORM_VF_TO_PF_OFFSET(funcId) \
 	(IRO[108].base + ((funcId) * IRO[108].m1))
-#define USTORM_AGG_DATA_OFFSET (IRO[212].base)
-#define USTORM_AGG_DATA_SIZE (IRO[212].size)
+#define USTORM_AGG_DATA_OFFSET (IRO[213].base)
+#define USTORM_AGG_DATA_SIZE (IRO[213].size)
 #define USTORM_ASSERT_LIST_INDEX_OFFSET	(IRO[181].base)
 #define USTORM_ASSERT_LIST_OFFSET(assertListEntry) \
 	(IRO[180].base + ((assertListEntry) * IRO[180].m1))
 #define USTORM_ETH_PAUSE_ENABLED_OFFSET(portId) \
 	(IRO[187].base + ((portId) * IRO[187].m1))
 #define USTORM_FCOE_EQ_PROD_OFFSET(pfId) \
-	(IRO[325].base + ((pfId) * IRO[325].m1))
+	(IRO[326].base + ((pfId) * IRO[326].m1))
 #define USTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[182].base + ((funcId) * IRO[182].m1))
 #define USTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-	(IRO[289].base + ((pfId) * IRO[289].m1))
-#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
 	(IRO[290].base + ((pfId) * IRO[290].m1))
+#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+	(IRO[291].base + ((pfId) * IRO[291].m1))
 #define USTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-	(IRO[294].base + ((pfId) * IRO[294].m1))
+	(IRO[295].base + ((pfId) * IRO[295].m1))
 #define USTORM_ISCSI_GLOBAL_BUF_PHYS_ADDR_OFFSET(pfId) \
-	(IRO[291].base + ((pfId) * IRO[291].m1))
+	(IRO[292].base + ((pfId) * IRO[292].m1))
 #define USTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[287].base + ((pfId) * IRO[287].m1))
+	(IRO[288].base + ((pfId) * IRO[288].m1))
 #define USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[286].base + ((pfId) * IRO[286].m1))
+	(IRO[287].base + ((pfId) * IRO[287].m1))
 #define USTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[285].base + ((pfId) * IRO[285].m1))
+	(IRO[286].base + ((pfId) * IRO[286].m1))
 #define USTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-	(IRO[288].base + ((pfId) * IRO[288].m1))
+	(IRO[289].base + ((pfId) * IRO[289].m1))
 #define USTORM_ISCSI_RQ_BUFFER_SIZE_OFFSET(pfId) \
-	(IRO[292].base + ((pfId) * IRO[292].m1))
-#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
 	(IRO[293].base + ((pfId) * IRO[293].m1))
+#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
+	(IRO[294].base + ((pfId) * IRO[294].m1))
 #define USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(pfId) \
 	(IRO[186].base + ((pfId) * IRO[186].m1))
 #define USTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 	(IRO[184].base + ((funcId) * IRO[184].m1))
 #define USTORM_RX_PRODS_E1X_OFFSET(portId, clientId) \
-	(IRO[215].base + ((portId) * IRO[215].m1) + ((clientId) * \
-	IRO[215].m2))
+	(IRO[216].base + ((portId) * IRO[216].m1) + ((clientId) * \
+	IRO[216].m2))
 #define USTORM_RX_PRODS_E2_OFFSET(qzoneId) \
-	(IRO[216].base + ((qzoneId) * IRO[216].m1))
-#define USTORM_TPA_BTR_OFFSET (IRO[213].base)
-#define USTORM_TPA_BTR_SIZE (IRO[213].size)
+	(IRO[217].base + ((qzoneId) * IRO[217].m1))
+#define USTORM_TPA_BTR_OFFSET (IRO[214].base)
+#define USTORM_TPA_BTR_SIZE (IRO[214].size)
 #define USTORM_VF_TO_PF_OFFSET(funcId) \
 	(IRO[183].base + ((funcId) * IRO[183].m1))
 #define XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE (IRO[67].base)
@@ -188,39 +188,39 @@
 #define XSTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[47].base + ((funcId) * IRO[47].m1))
 #define XSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-	(IRO[302].base + ((pfId) * IRO[302].m1))
+	(IRO[303].base + ((pfId) * IRO[303].m1))
 #define XSTORM_ISCSI_LOCAL_MAC_ADDR0_OFFSET(pfId) \
-	(IRO[305].base + ((pfId) * IRO[305].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
 	(IRO[306].base + ((pfId) * IRO[306].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
 	(IRO[307].base + ((pfId) * IRO[307].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
 	(IRO[308].base + ((pfId) * IRO[308].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
 	(IRO[309].base + ((pfId) * IRO[309].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
 	(IRO[310].base + ((pfId) * IRO[310].m1))
-#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
 	(IRO[311].base + ((pfId) * IRO[311].m1))
+#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+	(IRO[312].base + ((pfId) * IRO[312].m1))
 #define XSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[301].base + ((pfId) * IRO[301].m1))
+	(IRO[302].base + ((pfId) * IRO[302].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[300].base + ((pfId) * IRO[300].m1))
+	(IRO[301].base + ((pfId) * IRO[301].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[299].base + ((pfId) * IRO[299].m1))
+	(IRO[300].base + ((pfId) * IRO[300].m1))
 #define XSTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-	(IRO[304].base + ((pfId) * IRO[304].m1))
+	(IRO[305].base + ((pfId) * IRO[305].m1))
 #define XSTORM_ISCSI_SQ_SIZE_OFFSET(pfId) \
-	(IRO[303].base + ((pfId) * IRO[303].m1))
+	(IRO[304].base + ((pfId) * IRO[304].m1))
 #define XSTORM_ISCSI_TCP_VARS_ADV_WND_SCL_OFFSET(pfId) \
-	(IRO[298].base + ((pfId) * IRO[298].m1))
+	(IRO[299].base + ((pfId) * IRO[299].m1))
 #define XSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-	(IRO[297].base + ((pfId) * IRO[297].m1))
+	(IRO[298].base + ((pfId) * IRO[298].m1))
 #define XSTORM_ISCSI_TCP_VARS_TOS_OFFSET(pfId) \
-	(IRO[296].base + ((pfId) * IRO[296].m1))
+	(IRO[297].base + ((pfId) * IRO[297].m1))
 #define XSTORM_ISCSI_TCP_VARS_TTL_OFFSET(pfId) \
-	(IRO[295].base + ((pfId) * IRO[295].m1))
+	(IRO[296].base + ((pfId) * IRO[296].m1))
 #define XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(pfId) \
 	(IRO[44].base + ((pfId) * IRO[44].m1))
 #define XSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
@@ -233,12 +233,12 @@
 #define XSTORM_SPQ_PROD_OFFSET(funcId) \
 	(IRO[31].base + ((funcId) * IRO[31].m1))
 #define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_ENABLED_OFFSET(portId) \
-	(IRO[217].base + ((portId) * IRO[217].m1))
-#define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_MAX_COUNT_OFFSET(portId) \
 	(IRO[218].base + ((portId) * IRO[218].m1))
+#define XSTORM_TCP_GLOBAL_DEL_ACK_COUNTER_MAX_COUNT_OFFSET(portId) \
+	(IRO[219].base + ((portId) * IRO[219].m1))
 #define XSTORM_TCP_TX_SWS_TIMER_VAL_OFFSET(pfId) \
-	(IRO[220].base + (((pfId)>>1) * IRO[220].m1) + (((pfId)&1) * \
-	IRO[220].m2))
+	(IRO[221].base + (((pfId)>>1) * IRO[221].m1) + (((pfId)&1) * \
+	IRO[221].m2))
 #define XSTORM_VF_TO_PF_OFFSET(funcId) \
 	(IRO[48].base + ((funcId) * IRO[48].m1))
 #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 78326a6c0aba..622fadc50316 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -3024,7 +3024,7 @@ struct afex_stats {
 
 #define BCM_5710_FW_MAJOR_VERSION			7
 #define BCM_5710_FW_MINOR_VERSION			13
-#define BCM_5710_FW_REVISION_VERSION		11
+#define BCM_5710_FW_REVISION_VERSION		15
 #define BCM_5710_FW_ENGINEERING_VERSION		0
 #define BCM_5710_FW_COMPILE_FLAGS			1
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 0edbb0a76847..5097a44686b3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2397,15 +2397,21 @@ static int bnx2x_set_pf_tx_switching(struct bnx2x *bp, bool enable)
 	/* send the ramrod on all the queues of the PF */
 	for_each_eth_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
+		int tx_idx;
 
 		/* Set the appropriate Queue object */
 		q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj;
 
-		/* Update the Queue state */
-		rc = bnx2x_queue_state_change(bp, &q_params);
-		if (rc) {
-			BNX2X_ERR("Failed to configure Tx switching\n");
-			return rc;
+		for (tx_idx = FIRST_TX_COS_INDEX;
+		     tx_idx < fp->max_cos; tx_idx++) {
+			q_params.params.update.cid_index = tx_idx;
+
+			/* Update the Queue state */
+			rc = bnx2x_queue_state_change(bp, &q_params);
+			if (rc) {
+				BNX2X_ERR("Failed to configure Tx switching\n");
+				return rc;
+			}
 		}
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 04ec909e06df..c07172429c70 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8762,6 +8762,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	}
 	if (resc_reinit || fw_reset) {
 		if (fw_reset) {
+			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+				bnxt_ulp_stop(bp);
 			rc = bnxt_fw_init_one(bp);
 			if (rc) {
 				set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
@@ -9224,13 +9226,16 @@ static int bnxt_open(struct net_device *dev)
 	if (rc) {
 		bnxt_hwrm_if_change(bp, false);
 	} else {
-		if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state) &&
-		    BNXT_PF(bp)) {
-			struct bnxt_pf_info *pf = &bp->pf;
-			int n = pf->active_vfs;
+		if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
+			if (BNXT_PF(bp)) {
+				struct bnxt_pf_info *pf = &bp->pf;
+				int n = pf->active_vfs;
 
-			if (n)
-				bnxt_cfg_hw_sriov(bp, &n, true);
+				if (n)
+					bnxt_cfg_hw_sriov(bp, &n, true);
+			}
+			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+				bnxt_ulp_start(bp, 0);
 		}
 		bnxt_hwmon_open(bp);
 	}
@@ -9927,12 +9932,15 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
 	if (netif_running(bp->dev)) {
 		int rc;
 
-		if (!silent)
+		if (silent) {
+			bnxt_close_nic(bp, false, false);
+			bnxt_open_nic(bp, false, false);
+		} else {
 			bnxt_ulp_stop(bp);
-		bnxt_close_nic(bp, false, false);
-		rc = bnxt_open_nic(bp, false, false);
-		if (!silent && !rc)
-			bnxt_ulp_start(bp);
+			bnxt_close_nic(bp, true, false);
+			rc = bnxt_open_nic(bp, true, false);
+			bnxt_ulp_start(bp, rc);
+		}
 	}
 }
 
@@ -10004,7 +10012,7 @@ static void bnxt_timer(struct timer_list *t)
 
 	if (bp->link_info.phy_retry) {
 		if (time_after(jiffies, bp->link_info.phy_retry_expires)) {
-			bp->link_info.phy_retry = 0;
+			bp->link_info.phy_retry = false;
 			netdev_warn(bp->dev, "failed to update phy settings after maximum retries.\n");
 		} else {
 			set_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event);
@@ -10048,8 +10056,8 @@ static void bnxt_reset(struct bnxt *bp, bool silent)
 
 static void bnxt_fw_reset_close(struct bnxt *bp)
 {
+	bnxt_ulp_stop(bp);
 	__bnxt_close_nic(bp, true, false);
-	bnxt_ulp_irq_stop(bp);
 	bnxt_clear_int_mode(bp);
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	bnxt_free_ctx_mem(bp);
@@ -10582,14 +10590,23 @@ static void bnxt_fw_reset_writel(struct bnxt *bp, int reg_idx)
 static void bnxt_reset_all(struct bnxt *bp)
 {
 	struct bnxt_fw_health *fw_health = bp->fw_health;
-	int i;
+	int i, rc;
+
+	if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
+#ifdef CONFIG_TEE_BNXT_FW
+		rc = tee_bnxt_fw_load();
+		if (rc)
+			netdev_err(bp->dev, "Unable to reset FW rc=%d\n", rc);
+		bp->fw_reset_timestamp = jiffies;
+#endif
+		return;
+	}
 
 	if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST) {
 		for (i = 0; i < fw_health->fw_reset_seq_cnt; i++)
 			bnxt_fw_reset_writel(bp, i);
 	} else if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) {
 		struct hwrm_fw_reset_input req = {0};
-		int rc;
 
 		bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
 		req.resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
@@ -10720,13 +10737,13 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 			clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
 			dev_close(bp->dev);
 		}
-		bnxt_ulp_irq_restart(bp, rc);
-		rtnl_unlock();
 
 		bp->fw_reset_state = 0;
 		/* Make sure fw_reset_state is 0 before clearing the flag */
 		smp_mb__before_atomic();
 		clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+		bnxt_ulp_start(bp, rc);
+		rtnl_unlock();
 		break;
 	}
 	return;
@@ -10934,7 +10951,7 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
-static LIST_HEAD(bnxt_block_cb_list);
+LIST_HEAD(bnxt_block_cb_list);
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
@@ -11882,6 +11899,7 @@ static int bnxt_suspend(struct device *device)
 	int rc = 0;
 
 	rtnl_lock();
+	bnxt_ulp_stop(bp);
 	if (netif_running(dev)) {
 		netif_device_detach(dev);
 		rc = bnxt_close(dev);
@@ -11915,6 +11933,7 @@ static int bnxt_resume(struct device *device)
 	}
 
 resume_exit:
+	bnxt_ulp_start(bp, rc);
 	rtnl_unlock();
 	return rc;
 }
@@ -11994,10 +12013,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
 		if (!err && netif_running(netdev))
 			err = bnxt_open(netdev);
 
-		if (!err) {
+		if (!err)
 			result = PCI_ERS_RESULT_RECOVERED;
-			bnxt_ulp_start(bp);
-		}
+		bnxt_ulp_start(bp, err);
 	}
 
 	if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index d333589811a5..a3545c846bfb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -25,6 +25,11 @@
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
 #include <linux/dim.h>
+#ifdef CONFIG_TEE_BNXT_FW
+#include <linux/firmware/broadcom/tee_bnxt_fw.h>
+#endif
+
+extern struct list_head bnxt_block_cb_list;
 
 struct page_pool;
 
@@ -1241,6 +1246,14 @@ struct bnxt_tc_flow_stats {
 	u64		bytes;
 };
 
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+struct bnxt_flower_indr_block_cb_priv {
+	struct net_device *tunnel_netdev;
+	struct bnxt *bp;
+	struct list_head list;
+};
+#endif
+
 struct bnxt_tc_info {
 	bool				enabled;
 
@@ -1804,6 +1817,9 @@ struct bnxt {
 
 	u8			num_leds;
 	struct bnxt_led_info	leds[BNXT_MAX_LED];
+	u16			dump_flag;
+#define BNXT_DUMP_LIVE		0
+#define BNXT_DUMP_CRASH		1
 
 	struct bpf_prog		*xdp_prog;
 
@@ -1815,6 +1831,8 @@ struct bnxt {
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
 	u8			switch_id[8];
 	struct bnxt_tc_info	*tc_info;
+	struct list_head	tc_indr_block_list;
+	struct notifier_block	tc_netdev_nb;
 	struct dentry		*debugfs_pdev;
 	struct device		*hwmon_dev;
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 7151244f8c7d..ae4ddf33fe5c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -16,7 +16,8 @@
 #include "bnxt_devlink.h"
 
 static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-				     struct devlink_fmsg *fmsg)
+				     struct devlink_fmsg *fmsg,
+				     struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 	struct bnxt_fw_health *health = bp->fw_health;
@@ -61,7 +62,8 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
 };
 
 static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
-				 void *priv_ctx)
+				 void *priv_ctx,
+				 struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 
@@ -79,7 +81,8 @@ struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
 };
 
 static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
-				 void *priv_ctx)
+				 void *priv_ctx,
+				 struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
 	struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 51c140476717..f2220b826d61 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3311,6 +3311,24 @@ err:
 	return rc;
 }
 
+static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	if (dump->flag > BNXT_DUMP_CRASH) {
+		netdev_info(dev, "Supports only Live(0) and Crash(1) dumps.\n");
+		return -EINVAL;
+	}
+
+	if (!IS_ENABLED(CONFIG_TEE_BNXT_FW) && dump->flag == BNXT_DUMP_CRASH) {
+		netdev_info(dev, "Cannot collect crash dump as TEE_BNXT_FW config option is not enabled.\n");
+		return -EOPNOTSUPP;
+	}
+
+	bp->dump_flag = dump->flag;
+	return 0;
+}
+
 static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
 {
 	struct bnxt *bp = netdev_priv(dev);
@@ -3323,7 +3341,12 @@ static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
 			bp->ver_resp.hwrm_fw_bld_8b << 8 |
 			bp->ver_resp.hwrm_fw_rsvd_8b;
 
-	return bnxt_get_coredump(bp, NULL, &dump->len);
+	dump->flag = bp->dump_flag;
+	if (bp->dump_flag == BNXT_DUMP_CRASH)
+		dump->len = BNXT_CRASH_DUMP_LEN;
+	else
+		bnxt_get_coredump(bp, NULL, &dump->len);
+	return 0;
 }
 
 static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
@@ -3336,7 +3359,16 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
 
 	memset(buf, 0, dump->len);
 
-	return bnxt_get_coredump(bp, buf, &dump->len);
+	dump->flag = bp->dump_flag;
+	if (dump->flag == BNXT_DUMP_CRASH) {
+#ifdef CONFIG_TEE_BNXT_FW
+		return tee_bnxt_copy_coredump(buf, 0, dump->len);
+#endif
+	} else {
+		return bnxt_get_coredump(bp, buf, &dump->len);
+	}
+
+	return 0;
 }
 
 void bnxt_ethtool_init(struct bnxt *bp)
@@ -3446,6 +3478,7 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 	.set_phys_id		= bnxt_set_phys_id,
 	.self_test		= bnxt_self_test,
 	.reset			= bnxt_reset,
+	.set_dump		= bnxt_set_dump,
 	.get_dump_flag		= bnxt_get_dump_flag,
 	.get_dump_data		= bnxt_get_dump_data,
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index b5b65b3f8534..01de7e79d14f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -59,6 +59,8 @@ struct hwrm_dbg_cmn_output {
 	#define HWRM_DBG_CMN_FLAGS_MORE	1
 };
 
+#define BNXT_CRASH_DUMP_LEN	(8 << 20)
+
 #define BNXT_LED_DFLT_ENA				\
 	(PORT_LED_CFG_REQ_ENABLES_LED0_ID |		\
 	 PORT_LED_CFG_REQ_ENABLES_LED0_STATE |		\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index c8062d020d1e..174412a55e53 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -16,7 +16,9 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_tunnel_key.h>
+#include <net/vxlan.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -36,6 +38,8 @@
 #define is_vid_exactmatch(vlan_tci_mask)	\
 	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
 
+static bool is_wildcard(void *mask, int len);
+static bool is_exactmatch(void *mask, int len);
 /* Return the dst fid of the func for flow forwarding
  * For PFs: src_fid is the fid of the PF
  * For VF-reps: src_fid the fid of the VF
@@ -111,10 +115,182 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
 	return 0;
 }
 
+/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
+ * each(u32).
+ * This routine consolidates such multiple unaligned values into one
+ * field each for Key & Mask (for src and dst macs separately)
+ * For example,
+ *			Mask/Key	Offset	Iteration
+ *			==========	======	=========
+ *	dst mac		0xffffffff	0	1
+ *	dst mac		0x0000ffff	4	2
+ *
+ *	src mac		0xffff0000	4	1
+ *	src mac		0xffffffff	8	2
+ *
+ * The above combination coming from the stack will be consolidated as
+ *			Mask/Key
+ *			==============
+ *	src mac:	0xffffffffffff
+ *	dst mac:	0xffffffffffff
+ */
+static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
+				 u8 *actual_key, u8 *actual_mask)
+{
+	u32 key = get_unaligned((u32 *)actual_key);
+	u32 mask = get_unaligned((u32 *)actual_mask);
+
+	part_key &= part_mask;
+	part_key |= key & ~part_mask;
+
+	put_unaligned(mask | part_mask, (u32 *)actual_mask);
+	put_unaligned(part_key, (u32 *)actual_key);
+}
+
+static int
+bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
+			    u16 *eth_addr, u16 *eth_addr_mask)
+{
+	u16 *p;
+	int j;
+
+	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
+		return -EINVAL;
+
+	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
+		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
+			return -EINVAL;
+		/* FW expects dmac to be in u16 array format */
+		p = eth_addr;
+		for (j = 0; j < 3; j++)
+			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
+	}
+
+	if (!is_wildcard(&eth_addr_mask[ETH_ALEN], ETH_ALEN)) {
+		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN], ETH_ALEN))
+			return -EINVAL;
+		/* FW expects smac to be in u16 array format */
+		p = &eth_addr[ETH_ALEN / 2];
+		for (j = 0; j < 3; j++)
+			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
+	}
+
+	return 0;
+}
+
+static int
+bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
+		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
+		    u8 *eth_addr_mask)
+{
+	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
+	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
+	u32 mask, val, offset, idx;
+	u8 htype;
+
+	offset = act->mangle.offset;
+	htype = act->mangle.htype;
+	mask = ~act->mangle.mask;
+	val = act->mangle.val;
+
+	switch (htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
+			netdev_err(bp->dev,
+				   "%s: eth_hdr: Invalid pedit field\n",
+				   __func__);
+			return -EINVAL;
+		}
+		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
+
+		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
+				     &eth_addr_mask[offset]);
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
+		actions->nat.l3_is_ipv4 = true;
+		if (offset ==  offsetof(struct iphdr, saddr)) {
+			actions->nat.src_xlate = true;
+			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
+		} else if (offset ==  offsetof(struct iphdr, daddr)) {
+			actions->nat.src_xlate = false;
+			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
+		} else {
+			netdev_err(bp->dev,
+				   "%s: IPv4_hdr: Invalid pedit field\n",
+				   __func__);
+			return -EINVAL;
+		}
+
+		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
+			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
+			   &actions->nat.l3.ipv4.daddr);
+		break;
+
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
+		actions->nat.l3_is_ipv4 = false;
+		if (offset >= offsetof(struct ipv6hdr, saddr) &&
+		    offset < offset_of_ip6_daddr) {
+			/* 16 byte IPv6 address comes in 4 iterations of
+			 * 4byte chunks each
+			 */
+			actions->nat.src_xlate = true;
+			idx = (offset - offset_of_ip6_saddr) / 4;
+			/* First 4bytes will be copied to idx 0 and so on */
+			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
+		} else if (offset >= offset_of_ip6_daddr &&
+			   offset < offset_of_ip6_daddr + 16) {
+			actions->nat.src_xlate = false;
+			idx = (offset - offset_of_ip6_daddr) / 4;
+			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
+		} else {
+			netdev_err(bp->dev,
+				   "%s: IPv6_hdr: Invalid pedit field\n",
+				   __func__);
+			return -EINVAL;
+		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+		/* HW does not support L4 rewrite alone without L3
+		 * rewrite
+		 */
+		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
+			netdev_err(bp->dev,
+				   "Need to specify L3 rewrite as well\n");
+			return -EINVAL;
+		}
+		if (actions->nat.src_xlate)
+			actions->nat.l4.ports.sport = htons(val);
+		else
+			actions->nat.l4.ports.dport = htons(val);
+		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
+			   actions->nat.l4.ports.sport,
+			   actions->nat.l4.ports.dport);
+		break;
+	default:
+		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
+			   __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int bnxt_tc_parse_actions(struct bnxt *bp,
 				 struct bnxt_tc_actions *actions,
 				 struct flow_action *flow_action)
 {
+	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
+	 * smac (6 bytes) if rewrite of both is specified, otherwise either
+	 * dmac or smac
+	 */
+	u16 eth_addr_mask[ETH_ALEN] = { 0 };
+	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
+	 * smac (6 bytes) if rewrite of both is specified, otherwise either
+	 * dmac or smac
+	 */
+	u16 eth_addr[ETH_ALEN] = { 0 };
 	struct flow_action_entry *act;
 	int i, rc;
 
@@ -148,11 +324,26 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 		case FLOW_ACTION_TUNNEL_DECAP:
 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
 			break;
+		/* Packet edit: L2 rewrite, NAT, NAPT */
+		case FLOW_ACTION_MANGLE:
+			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
+						 (u8 *)eth_addr,
+						 (u8 *)eth_addr_mask);
+			if (rc)
+				return rc;
+			break;
 		default:
 			break;
 		}
 	}
 
+	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
+		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
+						 eth_addr_mask);
+		if (rc)
+			return rc;
+	}
+
 	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
 		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
 			/* dst_fid is PF's fid */
@@ -401,6 +592,76 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	req.src_fid = cpu_to_le16(flow->src_fid);
 	req.ref_flow_handle = ref_flow_handle;
 
+	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
+		memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
+		       ETH_ALEN);
+		memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
+		       ETH_ALEN);
+		action_flags |=
+			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+	}
+
+	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
+		if (actions->nat.l3_is_ipv4) {
+			action_flags |=
+				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
+
+			if (actions->nat.src_xlate) {
+				action_flags |=
+					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
+				/* L3 source rewrite */
+				req.nat_ip_address[0] =
+					actions->nat.l3.ipv4.saddr.s_addr;
+				/* L4 source port */
+				if (actions->nat.l4.ports.sport)
+					req.nat_port =
+						actions->nat.l4.ports.sport;
+			} else {
+				action_flags |=
+					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
+				/* L3 destination rewrite */
+				req.nat_ip_address[0] =
+					actions->nat.l3.ipv4.daddr.s_addr;
+				/* L4 destination port */
+				if (actions->nat.l4.ports.dport)
+					req.nat_port =
+						actions->nat.l4.ports.dport;
+			}
+			netdev_dbg(bp->dev,
+				   "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
+				   req.nat_ip_address, actions->nat.src_xlate,
+				   req.nat_port);
+		} else {
+			if (actions->nat.src_xlate) {
+				action_flags |=
+					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
+				/* L3 source rewrite */
+				memcpy(req.nat_ip_address,
+				       actions->nat.l3.ipv6.saddr.s6_addr32,
+				       sizeof(req.nat_ip_address));
+				/* L4 source port */
+				if (actions->nat.l4.ports.sport)
+					req.nat_port =
+						actions->nat.l4.ports.sport;
+			} else {
+				action_flags |=
+					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
+				/* L3 destination rewrite */
+				memcpy(req.nat_ip_address,
+				       actions->nat.l3.ipv6.daddr.s6_addr32,
+				       sizeof(req.nat_ip_address));
+				/* L4 destination port */
+				if (actions->nat.l4.ports.dport)
+					req.nat_port =
+						actions->nat.l4.ports.dport;
+			}
+			netdev_dbg(bp->dev,
+				   "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
+				   req.nat_ip_address, actions->nat.src_xlate,
+				   req.nat_port);
+		}
+	}
+
 	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
 	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
 		req.tunnel_handle = tunnel_handle;
@@ -1274,7 +1535,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 
 	if (!bnxt_tc_can_offload(bp, flow)) {
 		rc = -EOPNOTSUPP;
-		goto free_node;
+		kfree_rcu(new_node, rcu);
+		return rc;
 	}
 
 	/* If a flow exists with the same cookie, delete it */
@@ -1580,6 +1842,147 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
 	}
 }
 
+static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
+				       void *type_data, void *cb_priv)
+{
+	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
+	struct flow_cls_offload *flower = type_data;
+	struct bnxt *bp = priv->bp;
+
+	if (flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct bnxt_flower_indr_block_cb_priv *
+bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
+{
+	struct bnxt_flower_indr_block_cb_priv *cb_priv;
+
+	/* All callback list access should be protected by RTNL. */
+	ASSERT_RTNL();
+
+	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
+		if (cb_priv->tunnel_netdev == netdev)
+			return cb_priv;
+
+	return NULL;
+}
+
+static void bnxt_tc_setup_indr_rel(void *cb_priv)
+{
+	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
+
+	list_del(&priv->list);
+	kfree(priv);
+}
+
+static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
+				    struct flow_block_offload *f)
+{
+	struct bnxt_flower_indr_block_cb_priv *cb_priv;
+	struct flow_block_cb *block_cb;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
+		if (!cb_priv)
+			return -ENOMEM;
+
+		cb_priv->tunnel_netdev = netdev;
+		cb_priv->bp = bp;
+		list_add(&cb_priv->list, &bp->tc_indr_block_list);
+
+		block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
+					       cb_priv, cb_priv,
+					       bnxt_tc_setup_indr_rel);
+		if (IS_ERR(block_cb)) {
+			list_del(&cb_priv->list);
+			kfree(cb_priv);
+			return PTR_ERR(block_cb);
+		}
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
+		if (!cb_priv)
+			return -ENOENT;
+
+		block_cb = flow_block_cb_lookup(f->block,
+						bnxt_tc_setup_indr_block_cb,
+						cb_priv);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
+				 enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
+{
+	return netif_is_vxlan(netdev);
+}
+
+static int bnxt_tc_indr_block_event(struct notifier_block *nb,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *netdev;
+	struct bnxt *bp;
+	int rc;
+
+	netdev = netdev_notifier_info_to_dev(ptr);
+	if (!bnxt_is_netdev_indr_offload(netdev))
+		return NOTIFY_OK;
+
+	bp = container_of(nb, struct bnxt, tc_netdev_nb);
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		rc = __flow_indr_block_cb_register(netdev, bp,
+						   bnxt_tc_setup_indr_cb,
+						   bp);
+		if (rc)
+			netdev_info(bp->dev,
+				    "Failed to register indirect blk: dev: %s",
+				    netdev->name);
+		break;
+	case NETDEV_UNREGISTER:
+		__flow_indr_block_cb_unregister(netdev,
+						bnxt_tc_setup_indr_cb,
+						bp);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
 	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
 	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
@@ -1663,7 +2066,15 @@ int bnxt_init_tc(struct bnxt *bp)
 	bp->dev->hw_features |= NETIF_F_HW_TC;
 	bp->dev->features |= NETIF_F_HW_TC;
 	bp->tc_info = tc_info;
-	return 0;
+
+	/* init indirect block notifications */
+	INIT_LIST_HEAD(&bp->tc_indr_block_list);
+	bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event;
+	rc = register_netdevice_notifier(&bp->tc_netdev_nb);
+	if (!rc)
+		return 0;
+
+	rhashtable_destroy(&tc_info->encap_table);
 
 destroy_decap_table:
 	rhashtable_destroy(&tc_info->decap_table);
@@ -1685,6 +2096,7 @@ void bnxt_shutdown_tc(struct bnxt *bp)
 	if (!bnxt_tc_flower_enabled(bp))
 		return;
 
+	unregister_netdevice_notifier(&bp->tc_netdev_nb);
 	rhashtable_destroy(&tc_info->flow_table);
 	rhashtable_destroy(&tc_info->l2_table);
 	rhashtable_destroy(&tc_info->decap_l2_table);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
index 4f05305052f2..286754903543 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -62,6 +62,12 @@ struct bnxt_tc_tunnel_key {
 	__be32			id;
 };
 
+#define bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)		\
+	((is_wildcard(&(eth_addr)[0], ETH_ALEN) &&			\
+	 is_wildcard(&(eth_addr)[ETH_ALEN], ETH_ALEN)) ||		\
+	(is_wildcard(&(eth_addr_mask)[0], ETH_ALEN) &&			\
+	 is_wildcard(&(eth_addr_mask)[ETH_ALEN], ETH_ALEN)))
+
 struct bnxt_tc_actions {
 	u32				flags;
 #define BNXT_TC_ACTION_FLAG_FWD			BIT(0)
@@ -71,6 +77,8 @@ struct bnxt_tc_actions {
 #define BNXT_TC_ACTION_FLAG_DROP		BIT(5)
 #define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP	BIT(6)
 #define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP	BIT(7)
+#define BNXT_TC_ACTION_FLAG_L2_REWRITE		BIT(8)
+#define BNXT_TC_ACTION_FLAG_NAT_XLATE		BIT(9)
 
 	u16				dst_fid;
 	struct net_device		*dst_dev;
@@ -79,6 +87,18 @@ struct bnxt_tc_actions {
 
 	/* tunnel encap */
 	struct ip_tunnel_key		tun_encap_key;
+#define	PEDIT_OFFSET_SMAC_LAST_4_BYTES		0x8
+	__be16				l2_rewrite_dmac[3];
+	__be16				l2_rewrite_smac[3];
+	struct {
+		bool src_xlate;  /* true => translate src,
+				  * false => translate dst
+				  * Mutually exclusive, i.e cannot set both
+				  */
+		bool l3_is_ipv4; /* false means L3 is ipv6 */
+		struct bnxt_tc_l3_key l3;
+		struct bnxt_tc_l4_key l4;
+	} nat;
 };
 
 struct bnxt_tc_flow {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index b2c160947fc8..077fd101be60 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -182,7 +182,7 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 
 	edev->ulp_tbl[ulp_id].msix_requested = 0;
 	edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED;
-	if (netif_running(dev)) {
+	if (netif_running(dev) && !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) {
 		bnxt_close_nic(bp, true, false);
 		bnxt_open_nic(bp, true, false);
 	}
@@ -266,6 +266,7 @@ void bnxt_ulp_stop(struct bnxt *bp)
 	if (!edev)
 		return;
 
+	edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
 	for (i = 0; i < BNXT_MAX_ULP; i++) {
 		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
 
@@ -276,7 +277,7 @@ void bnxt_ulp_stop(struct bnxt *bp)
 	}
 }
 
-void bnxt_ulp_start(struct bnxt *bp)
+void bnxt_ulp_start(struct bnxt *bp, int err)
 {
 	struct bnxt_en_dev *edev = bp->edev;
 	struct bnxt_ulp_ops *ops;
@@ -285,6 +286,11 @@ void bnxt_ulp_start(struct bnxt *bp)
 	if (!edev)
 		return;
 
+	edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
+
+	if (err)
+		return;
+
 	for (i = 0; i < BNXT_MAX_ULP; i++) {
 		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index cd78453d0bf0..9895406b9830 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -64,6 +64,7 @@ struct bnxt_en_dev {
 	#define BNXT_EN_FLAG_ROCE_CAP		(BNXT_EN_FLAG_ROCEV1_CAP | \
 						 BNXT_EN_FLAG_ROCEV2_CAP)
 	#define BNXT_EN_FLAG_MSIX_REQUESTED	0x4
+	#define BNXT_EN_FLAG_ULP_STOPPED	0x8
 	const struct bnxt_en_ops	*en_ops;
 	struct bnxt_ulp			ulp_tbl[BNXT_MAX_ULP];
 };
@@ -92,7 +93,7 @@ int bnxt_get_ulp_msix_num(struct bnxt *bp);
 int bnxt_get_ulp_msix_base(struct bnxt *bp);
 int bnxt_get_ulp_stat_ctxs(struct bnxt *bp);
 void bnxt_ulp_stop(struct bnxt *bp);
-void bnxt_ulp_start(struct bnxt *bp);
+void bnxt_ulp_start(struct bnxt *bp, int err);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
 void bnxt_ulp_shutdown(struct bnxt *bp);
 void bnxt_ulp_irq_stop(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 155599dcee76..61ab7d21f6bd 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -5208,6 +5208,8 @@ static void cnic_init_rings(struct cnic_dev *dev)
 		cnic_init_bnx2x_tx_ring(dev, data);
 		cnic_init_bnx2x_rx_ring(dev, data);
 
+		data->general.fp_hsi_ver =  ETH_FP_HSI_VERSION;
+
 		l5_data.phy_address.lo = udev->l2_buf_map & 0xffffffff;
 		l5_data.phy_address.hi = (u64) udev->l2_buf_map >> 32;
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 1de51811fcb4..3504f77b1a2f 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3466,16 +3466,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	if (dn) {
+	if (dn)
 		macaddr = of_get_mac_address(dn);
-		if (IS_ERR(macaddr)) {
-			dev_err(&pdev->dev, "can't find MAC address\n");
-			err = -EINVAL;
-			goto err;
-		}
-	} else {
+	else
 		macaddr = pd->mac_address;
-	}
 
 	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
@@ -3487,7 +3481,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	dev_set_drvdata(&pdev->dev, dev);
-	ether_addr_copy(dev->dev_addr, macaddr);
+	if (IS_ERR_OR_NULL(macaddr) || !is_valid_ether_addr(macaddr)) {
+		dev_warn(&pdev->dev, "using random Ethernet MAC\n");
+		eth_hw_addr_random(dev);
+	} else {
+		ether_addr_copy(dev->dev_addr, macaddr);
+	}
 	dev->watchdog_timeo = 2 * HZ;
 	dev->ethtool_ops = &bcmgenet_ethtool_ops;
 	dev->netdev_ops = &bcmgenet_netdev_ops;
@@ -3602,6 +3601,11 @@ static int bcmgenet_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void bcmgenet_shutdown(struct platform_device *pdev)
+{
+	bcmgenet_remove(pdev);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int bcmgenet_resume(struct device *d)
 {
@@ -3721,6 +3725,7 @@ static SIMPLE_DEV_PM_OPS(bcmgenet_pm_ops, bcmgenet_suspend, bcmgenet_resume);
 static struct platform_driver bcmgenet_driver = {
 	.probe	= bcmgenet_probe,
 	.remove	= bcmgenet_remove,
+	.shutdown = bcmgenet_shutdown,
 	.driver	= {
 		.name	= "bcmgenet",
 		.of_match_table = bcmgenet_match,
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index dbe18cdf6c1b..6f291ee193a8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -479,7 +479,7 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct device *kdev = &priv->pdev->dev;
 	struct phy_device *phydev;
-	int phy_mode;
+	phy_interface_t phy_mode;
 	int ret;
 
 	/* Fetch the PHY phandle */
@@ -497,10 +497,10 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 	}
 
 	/* Get the link mode */
-	phy_mode = of_get_phy_mode(dn);
-	if (phy_mode < 0) {
+	ret = of_get_phy_mode(dn, &phy_mode);
+	if (ret) {
 		dev_err(kdev, "invalid PHY mode property\n");
-		return phy_mode;
+		return ret;
 	}
 
 	priv->phy_interface = phy_mode;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 1e1b774e1953..b884cf7f339b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4182,6 +4182,7 @@ static int macb_probe(struct platform_device *pdev)
 	unsigned int queue_mask, num_queues;
 	bool native_io;
 	struct phy_device *phydev;
+	phy_interface_t interface;
 	struct net_device *dev;
 	struct resource *regs;
 	void __iomem *mem;
@@ -4308,12 +4309,12 @@ static int macb_probe(struct platform_device *pdev)
 		macb_get_hwaddr(bp);
 	}
 
-	err = of_get_phy_mode(np);
-	if (err < 0)
+	err = of_get_phy_mode(np, &interface);
+	if (err)
 		/* not found in DT, MII by default */
 		bp->phy_interface = PHY_INTERFACE_MODE_MII;
 	else
-		bp->phy_interface = err;
+		bp->phy_interface = interface;
 
 	/* IP specific init */
 	err = init(pdev);
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index f96a42af1014..af04a2c81adb 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1914,10 +1914,10 @@ static struct platform_driver xgmac_driver = {
 	.driver = {
 		.name = "calxedaxgmac",
 		.of_match_table = xgmac_of_match,
+		.pm = &xgmac_pm_ops,
 	},
 	.probe = xgmac_probe,
 	.remove = xgmac_remove,
-	.driver.pm = &xgmac_pm_ops,
 };
 
 module_platform_driver(xgmac_driver);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index acb016834f04..1e09fdb63c4f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1007,14 +1007,14 @@ static void bgx_poll_for_link(struct work_struct *work)
 
 	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
 	    !(smu_link & SMU_RX_CTL_STATUS)) {
-		lmac->link_up = 1;
+		lmac->link_up = true;
 		if (lmac->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = SPEED_40000;
 		else
 			lmac->last_speed = SPEED_10000;
 		lmac->last_duplex = DUPLEX_FULL;
 	} else {
-		lmac->link_up = 0;
+		lmac->link_up = false;
 		lmac->last_speed = SPEED_UNKNOWN;
 		lmac->last_duplex = DUPLEX_UNKNOWN;
 	}
@@ -1023,7 +1023,7 @@ static void bgx_poll_for_link(struct work_struct *work)
 		if (lmac->link_up) {
 			if (bgx_xaui_check_link(lmac)) {
 				/* Errors, clear link_up state */
-				lmac->link_up = 0;
+				lmac->link_up = false;
 				lmac->last_speed = SPEED_UNKNOWN;
 				lmac->last_duplex = DUPLEX_UNKNOWN;
 			}
@@ -1055,11 +1055,11 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 	if ((lmac->lmac_type == BGX_MODE_SGMII) ||
 	    (lmac->lmac_type == BGX_MODE_QSGMII) ||
 	    (lmac->lmac_type == BGX_MODE_RGMII)) {
-		lmac->is_sgmii = 1;
+		lmac->is_sgmii = true;
 		if (bgx_lmac_sgmii_init(bgx, lmac))
 			return -1;
 	} else {
-		lmac->is_sgmii = 0;
+		lmac->is_sgmii = false;
 		if (bgx_lmac_xaui_init(bgx, lmac))
 			return -1;
 	}
@@ -1304,7 +1304,7 @@ static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
 {
 	if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
 	    (lmac->lmac_type != BGX_MODE_40G_KR)) {
-		lmac->use_training = 0;
+		lmac->use_training = false;
 		return;
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 20390f6afbb4..49a19308073b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
 	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
 	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \
-	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
+	      cudbg_common.o cudbg_lib.o cudbg_zlib.o cxgb4_tc_mqprio.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index 69746696a929..f5be3ee1bdb4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -325,6 +325,9 @@ enum cudbg_qdesc_qtype {
 	CUDBG_QTYPE_CRYPTO_FLQ,
 	CUDBG_QTYPE_TLS_RXQ,
 	CUDBG_QTYPE_TLS_FLQ,
+	CUDBG_QTYPE_ETHOFLD_TXQ,
+	CUDBG_QTYPE_ETHOFLD_RXQ,
+	CUDBG_QTYPE_ETHOFLD_FLQ,
 	CUDBG_QTYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index c2e92786608b..19c11568113a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sort.h>
+#include <linux/string.h>
 
 #include "t4_regs.h"
 #include "cxgb4.h"
@@ -776,24 +777,18 @@ static int cudbg_get_mem_region(struct adapter *padap,
 				struct cudbg_mem_desc *mem_desc)
 {
 	u8 mc, found = 0;
-	u32 i, idx = 0;
-	int rc;
+	u32 idx = 0;
+	int rc, i;
 
 	rc = cudbg_meminfo_get_mem_index(padap, meminfo, mem_type, &mc);
 	if (rc)
 		return rc;
 
-	for (i = 0; i < ARRAY_SIZE(cudbg_region); i++) {
-		if (!strcmp(cudbg_region[i], region_name)) {
-			found = 1;
-			idx = i;
-			break;
-		}
-	}
-	if (!found)
+	i = match_string(cudbg_region, ARRAY_SIZE(cudbg_region), region_name);
+	if (i < 0)
 		return -EINVAL;
 
-	found = 0;
+	idx = i;
 	for (i = 0; i < meminfo->mem_c; i++) {
 		if (meminfo->mem[i].idx >= ARRAY_SIZE(cudbg_region))
 			continue; /* Skip holes */
@@ -2930,6 +2925,10 @@ void cudbg_fill_qdesc_num_and_size(const struct adapter *padap,
 	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * SGE_MAX_IQ_SIZE *
 		    MAX_RXQ_DESC_SIZE;
 
+	/* ETHOFLD TXQ, RXQ, and FLQ */
+	tot_entries += MAX_OFLD_QSETS * 3;
+	tot_size += MAX_OFLD_QSETS * MAX_TXQ_ENTRIES * MAX_TXQ_DESC_SIZE;
+
 	tot_size += sizeof(struct cudbg_ver_hdr) +
 		    sizeof(struct cudbg_qdesc_info) +
 		    sizeof(struct cudbg_qdesc_entry) * tot_entries;
@@ -3087,6 +3086,23 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 		}
 	}
 
+	/* ETHOFLD TXQ */
+	if (s->eohw_txq)
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_TXQ(&s->eohw_txq[i].q,
+				      CUDBG_QTYPE_ETHOFLD_TXQ, out);
+
+	/* ETHOFLD RXQ and FLQ */
+	if (s->eohw_rxq) {
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_RXQ(&s->eohw_rxq[i].rspq,
+				      CUDBG_QTYPE_ETHOFLD_RXQ, out);
+
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_FLQ(&s->eohw_rxq[i].fl,
+				      CUDBG_QTYPE_ETHOFLD_FLQ, out);
+	}
+
 out_unlock:
 	mutex_unlock(&uld_mutex);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1fbb640e896a..1fb273b294a1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -392,6 +392,7 @@ struct adapter_params {
 	struct arch_specific_params arch;  /* chip specific params */
 	unsigned char offload;
 	unsigned char crypto;		/* HW capability for crypto */
+	unsigned char ethofld;		/* QoS support */
 
 	unsigned char bypass;
 	unsigned char hash_filter;
@@ -711,6 +712,7 @@ struct sge_eth_rxq {                /* SW Ethernet Rx queue */
 	struct sge_rspq rspq;
 	struct sge_fl fl;
 	struct sge_eth_stats stats;
+	struct msix_info *msix;
 } ____cacheline_aligned_in_smp;
 
 struct sge_ofld_stats {             /* offload queue statistics */
@@ -724,6 +726,7 @@ struct sge_ofld_rxq {               /* SW offload Rx queue */
 	struct sge_rspq rspq;
 	struct sge_fl fl;
 	struct sge_ofld_stats stats;
+	struct msix_info *msix;
 } ____cacheline_aligned_in_smp;
 
 struct tx_desc {
@@ -788,7 +791,6 @@ struct sge_ctrl_txq {               /* state for an SGE control Tx queue */
 struct sge_uld_rxq_info {
 	char name[IFNAMSIZ];	/* name of ULD driver */
 	struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
-	u16 *msix_tbl;		/* msix_tbl for uld */
 	u16 *rspq_id;		/* response queue id's of rxq */
 	u16 nrxq;		/* # of ingress uld queues */
 	u16 nciq;		/* # of completion queues */
@@ -801,6 +803,55 @@ struct sge_uld_txq_info {
 	u16 ntxq;		/* # of egress uld queues */
 };
 
+enum sge_eosw_state {
+	CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */
+	CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */
+	CXGB4_EO_STATE_FLOWC_OPEN_REPLY, /* Waiting for FLOWC open reply */
+	CXGB4_EO_STATE_ACTIVE, /* Ready to accept traffic */
+	CXGB4_EO_STATE_FLOWC_CLOSE_SEND, /* Send FLOWC close request */
+	CXGB4_EO_STATE_FLOWC_CLOSE_REPLY, /* Waiting for FLOWC close reply */
+};
+
+struct sge_eosw_desc {
+	struct sk_buff *skb; /* SKB to free after getting completion */
+	dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */
+};
+
+struct sge_eosw_txq {
+	spinlock_t lock; /* Per queue lock to synchronize completions */
+	enum sge_eosw_state state; /* Current ETHOFLD State */
+	struct sge_eosw_desc *desc; /* Descriptor ring to hold packets */
+	u32 ndesc; /* Number of descriptors */
+	u32 pidx; /* Current Producer Index */
+	u32 last_pidx; /* Last successfully transmitted Producer Index */
+	u32 cidx; /* Current Consumer Index */
+	u32 last_cidx; /* Last successfully reclaimed Consumer Index */
+	u32 flowc_idx; /* Descriptor containing a FLOWC request */
+	u32 inuse; /* Number of packets held in ring */
+
+	u32 cred; /* Current available credits */
+	u32 ncompl; /* # of completions posted */
+	u32 last_compl; /* # of credits consumed since last completion req */
+
+	u32 eotid; /* Index into EOTID table in software */
+	u32 hwtid; /* Hardware EOTID index */
+
+	u32 hwqid; /* Underlying hardware queue index */
+	struct net_device *netdev; /* Pointer to netdevice */
+	struct tasklet_struct qresume_tsk; /* Restarts the queue */
+	struct completion completion; /* completion for FLOWC rendezvous */
+};
+
+struct sge_eohw_txq {
+	spinlock_t lock; /* Per queue lock */
+	struct sge_txq q; /* HW Txq */
+	struct adapter *adap; /* Backpointer to adapter */
+	unsigned long tso; /* # of TSO requests */
+	unsigned long tx_cso; /* # of Tx checksum offloads */
+	unsigned long vlan_ins; /* # of Tx VLAN insertions */
+	unsigned long mapping_err; /* # of I/O MMU packet mapping errors */
+};
+
 struct sge {
 	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
 	struct sge_eth_txq ptptxq;
@@ -814,11 +865,16 @@ struct sge {
 	struct sge_rspq intrq ____cacheline_aligned_in_smp;
 	spinlock_t intrq_lock;
 
+	struct sge_eohw_txq *eohw_txq;
+	struct sge_ofld_rxq *eohw_rxq;
+
 	u16 max_ethqsets;           /* # of available Ethernet queue sets */
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 ofldqsets;              /* # of active ofld queue sets */
 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
+	u16 eoqsets;                /* # of ETHOFLD queues */
+
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u16 dbqtimer_tick;
@@ -841,6 +897,9 @@ struct sge {
 	unsigned long *blocked_fl;
 	struct timer_list rx_timer; /* refills starving FLs */
 	struct timer_list tx_timer; /* checks Tx queues */
+
+	int fwevtq_msix_idx; /* Index to firmware event queue MSI-X info */
+	int nd_msix_idx; /* Index to non-data interrupts MSI-X info */
 };
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
@@ -870,13 +929,13 @@ struct hash_mac_addr {
 	unsigned int iface_mac;
 };
 
-struct uld_msix_bmap {
+struct msix_bmap {
 	unsigned long *msix_bmap;
 	unsigned int mapsize;
 	spinlock_t lock; /* lock for acquiring bitmap */
 };
 
-struct uld_msix_info {
+struct msix_info {
 	unsigned short vec;
 	char desc[IFNAMSIZ + 10];
 	unsigned int idx;
@@ -945,14 +1004,9 @@ struct adapter {
 	struct cxgb4_virt_res vres;
 	unsigned int swintr;
 
-	struct msix_info {
-		unsigned short vec;
-		char desc[IFNAMSIZ + 10];
-		cpumask_var_t aff_mask;
-	} msix_info[MAX_INGQ + 1];
-	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
-	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
-	int msi_idx;
+	/* MSI-X Info for NIC and OFLD queues */
+	struct msix_info *msix_info;
+	struct msix_bmap msix_bmap;
 
 	struct doorbell_stats db_stats;
 	struct sge sge;
@@ -1044,6 +1098,9 @@ struct adapter {
 #if IS_ENABLED(CONFIG_THERMAL)
 	struct ch_thermal ch_thermal;
 #endif
+
+	/* TC MQPRIO offload */
+	struct cxgb4_tc_mqprio *tc_mqprio;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1077,6 +1134,7 @@ enum {
 
 enum {
 	SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
+	SCHED_CLASS_MODE_FLOW,          /* per-flow scheduling */
 };
 
 enum {
@@ -1100,6 +1158,14 @@ struct ch_sched_queue {
 	s8   class;    /* class index */
 };
 
+/* Support for "sched_flowc" command to allow one or more FLOWC
+ * to be bound to a TX Scheduling Class.
+ */
+struct ch_sched_flowc {
+	s32 tid;   /* TID to bind */
+	s8  class; /* class index */
+};
+
 /* Defined bit width of user definable filter tuples
  */
 #define ETHTYPE_BITWIDTH 16
@@ -1293,6 +1359,11 @@ static inline int is_uld(const struct adapter *adap)
 	return (adap->params.offload || adap->params.crypto);
 }
 
+static inline int is_ethofld(const struct adapter *adap)
+{
+	return adap->params.ethofld;
+}
+
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
 	return readl(adap->regs + reg_addr);
@@ -1426,6 +1497,9 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 			 struct net_device *dev, unsigned int iqid,
 			 unsigned int uld_type);
+int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq,
+			     struct net_device *dev, u32 iqid);
+void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
@@ -1890,6 +1964,12 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
+void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *txq,
+			      u32 ndesc);
+int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc);
+void cxgb4_ethofld_restart(unsigned long data);
+int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
+			     const struct pkt_gl *si);
 void free_txq(struct adapter *adap, struct sge_txq *q);
 void cxgb4_reclaim_completed_tx(struct adapter *adap,
 				struct sge_txq *q, bool unmap);
@@ -1948,5 +2028,10 @@ int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
 int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
 			  int *tcam_idx, const u8 *addr,
 			  bool persistent, u8 *smt_idx);
-
+int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
+void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
+int cxgb_open(struct net_device *dev);
+int cxgb_close(struct net_device *dev);
+void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
+void cxgb4_quiesce_rx(struct sge_rspq *q);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index ae6a47dd7dc9..a13b03f771cc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2658,6 +2658,7 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
 
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
+	int eth_entries, ctrl_entries, eo_entries = 0;
 	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
@@ -2665,11 +2666,12 @@ static int sge_qinfo_show(struct seq_file *seq, void *v)
 	const struct sge_uld_rxq_info *urxq_info;
 	struct adapter *adap = seq->private;
 	int i, n, r = (uintptr_t)v - 1;
-	int eth_entries, ctrl_entries;
 	struct sge *s = &adap->sge;
 
 	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
+	if (adap->sge.eohw_txq)
+		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
 
 	mutex_lock(&uld_mutex);
 	if (s->uld_txq_info)
@@ -2761,6 +2763,54 @@ do { \
 	}
 
 	r -= eth_entries;
+	if (r < eo_entries) {
+		int base_qset = r * 4;
+		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
+		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
+
+		n = min(4, s->eoqsets - 4 * r);
+
+		S("QType:", "ETHOFLD");
+		S("Interface:",
+		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
+		T("TxQ ID:", q.cntxt_id);
+		T("TxQ size:", q.size);
+		T("TxQ inuse:", q.in_use);
+		T("TxQ CIDX:", q.cidx);
+		T("TxQ PIDX:", q.pidx);
+		R("RspQ ID:", rspq.abs_id);
+		R("RspQ size:", rspq.size);
+		R("RspQE size:", rspq.iqe_len);
+		R("RspQ CIDX:", rspq.cidx);
+		R("RspQ Gen:", rspq.gen);
+		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+		S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+		R("FL ID:", fl.cntxt_id);
+		S3("u", "FL size:", rx->fl.size ? rx->fl.size - 8 : 0);
+		R("FL pend:", fl.pend_cred);
+		R("FL avail:", fl.avail);
+		R("FL PIDX:", fl.pidx);
+		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImm:", stats.imm);
+		RL("RxAN", stats.an);
+		RL("RxNoMem", stats.nomem);
+		TL("TSO:", tso);
+		TL("TxCSO:", tx_cso);
+		TL("VLANins:", vlan_ins);
+		TL("TxQFull:", q.stops);
+		TL("TxQRestarts:", q.restarts);
+		TL("TxMapErr:", mapping_err);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
+		RL("FLStarving:", fl.starving);
+
+		goto unlock;
+	}
+
+	r -= eo_entries;
 	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
 		const struct sge_uld_txq *tx;
 
@@ -3007,6 +3057,7 @@ static int sge_queue_entries(const struct adapter *adap)
 	mutex_unlock(&uld_mutex);
 
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
+	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
 	       tot_uld_entries +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38024877751c..b5148c57e8bf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -65,6 +65,7 @@
 #include <linux/uaccess.h>
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
+#include <net/xfrm.h>
 
 #include "cxgb4.h"
 #include "cxgb4_filter.h"
@@ -82,6 +83,7 @@
 #include "sched.h"
 #include "cxgb4_tc_u32.h"
 #include "cxgb4_tc_flower.h"
+#include "cxgb4_tc_mqprio.h"
 #include "cxgb4_ptp.h"
 #include "cxgb4_cudbg.h"
 
@@ -184,6 +186,8 @@ static struct dentry *cxgb4_debugfs_root;
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
 
+static int cfg_queues(struct adapter *adap);
+
 static void link_report(struct net_device *dev)
 {
 	if (!netif_carrier_ok(dev))
@@ -683,31 +687,6 @@ static irqreturn_t t4_nondata_intr(int irq, void *cookie)
 	return IRQ_HANDLED;
 }
 
-/*
- * Name the MSI-X interrupts.
- */
-static void name_msix_vecs(struct adapter *adap)
-{
-	int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
-
-	/* non-data interrupts */
-	snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
-
-	/* FW events */
-	snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
-		 adap->port[0]->name);
-
-	/* Ethernet queues */
-	for_each_port(adap, j) {
-		struct net_device *d = adap->port[j];
-		const struct port_info *pi = netdev_priv(d);
-
-		for (i = 0; i < pi->nqsets; i++, msi_idx++)
-			snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
-				 d->name, i);
-	}
-}
-
 int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
 		       cpumask_var_t *aff_mask, int idx)
 {
@@ -741,15 +720,19 @@ static int request_msix_queue_irqs(struct adapter *adap)
 	struct sge *s = &adap->sge;
 	struct msix_info *minfo;
 	int err, ethqidx;
-	int msi_index = 2;
 
-	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
-			  adap->msix_info[1].desc, &s->fw_evtq);
+	if (s->fwevtq_msix_idx < 0)
+		return -ENOMEM;
+
+	err = request_irq(adap->msix_info[s->fwevtq_msix_idx].vec,
+			  t4_sge_intr_msix, 0,
+			  adap->msix_info[s->fwevtq_msix_idx].desc,
+			  &s->fw_evtq);
 	if (err)
 		return err;
 
 	for_each_ethrxq(s, ethqidx) {
-		minfo = &adap->msix_info[msi_index];
+		minfo = s->ethrxq[ethqidx].msix;
 		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
 				  minfo->desc,
@@ -759,18 +742,16 @@ static int request_msix_queue_irqs(struct adapter *adap)
 
 		cxgb4_set_msix_aff(adap, minfo->vec,
 				   &minfo->aff_mask, ethqidx);
-		msi_index++;
 	}
 	return 0;
 
 unwind:
 	while (--ethqidx >= 0) {
-		msi_index--;
-		minfo = &adap->msix_info[msi_index];
+		minfo = s->ethrxq[ethqidx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq);
 	}
-	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
+	free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 	return err;
 }
 
@@ -778,11 +759,11 @@ static void free_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 	struct msix_info *minfo;
-	int i, msi_index = 2;
+	int i;
 
-	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
+	free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 	for_each_ethrxq(s, i) {
-		minfo = &adap->msix_info[msi_index++];
+		minfo = s->ethrxq[i].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_irq(minfo->vec, &s->ethrxq[i].rspq);
 	}
@@ -899,6 +880,12 @@ static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
 	return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
 }
 
+void cxgb4_quiesce_rx(struct sge_rspq *q)
+{
+	if (q->handler)
+		napi_disable(&q->napi);
+}
+
 /*
  * Wait until all NAPI handlers are descheduled.
  */
@@ -909,19 +896,24 @@ static void quiesce_rx(struct adapter *adap)
 	for (i = 0; i < adap->sge.ingr_sz; i++) {
 		struct sge_rspq *q = adap->sge.ingr_map[i];
 
-		if (q && q->handler)
-			napi_disable(&q->napi);
+		if (!q)
+			continue;
+
+		cxgb4_quiesce_rx(q);
 	}
 }
 
 /* Disable interrupt and napi handler */
 static void disable_interrupts(struct adapter *adap)
 {
+	struct sge *s = &adap->sge;
+
 	if (adap->flags & CXGB4_FULL_INIT_DONE) {
 		t4_intr_disable(adap);
 		if (adap->flags & CXGB4_USING_MSIX) {
 			free_msix_queue_irqs(adap);
-			free_irq(adap->msix_info[0].vec, adap);
+			free_irq(adap->msix_info[s->nd_msix_idx].vec,
+				 adap);
 		} else {
 			free_irq(adap->pdev->irq, adap);
 		}
@@ -929,6 +921,17 @@ static void disable_interrupts(struct adapter *adap)
 	}
 }
 
+void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q)
+{
+	if (q->handler)
+		napi_enable(&q->napi);
+
+	/* 0-increment GTS to start the timer and enable interrupts */
+	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
+		     SEINTARM_V(q->intr_params) |
+		     INGRESSQID_V(q->cntxt_id));
+}
+
 /*
  * Enable NAPI scheduling and interrupt generation for all Rx queues.
  */
@@ -941,37 +944,63 @@ static void enable_rx(struct adapter *adap)
 
 		if (!q)
 			continue;
-		if (q->handler)
-			napi_enable(&q->napi);
 
-		/* 0-increment GTS to start the timer and enable interrupts */
-		t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
-			     SEINTARM_V(q->intr_params) |
-			     INGRESSQID_V(q->cntxt_id));
+		cxgb4_enable_rx(adap, q);
 	}
 }
 
+static int setup_non_data_intr(struct adapter *adap)
+{
+	int msix;
+
+	adap->sge.nd_msix_idx = -1;
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		return 0;
+
+	/* Request MSI-X vector for non-data interrupt */
+	msix = cxgb4_get_msix_idx_from_bmap(adap);
+	if (msix < 0)
+		return -ENOMEM;
+
+	snprintf(adap->msix_info[msix].desc,
+		 sizeof(adap->msix_info[msix].desc),
+		 "%s", adap->port[0]->name);
+
+	adap->sge.nd_msix_idx = msix;
+	return 0;
+}
 
 static int setup_fw_sge_queues(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int err = 0;
+	int msix, err = 0;
 
 	bitmap_zero(s->starving_fl, s->egr_sz);
 	bitmap_zero(s->txq_maperr, s->egr_sz);
 
-	if (adap->flags & CXGB4_USING_MSIX)
-		adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
-	else {
+	if (adap->flags & CXGB4_USING_MSIX) {
+		s->fwevtq_msix_idx = -1;
+		msix = cxgb4_get_msix_idx_from_bmap(adap);
+		if (msix < 0)
+			return -ENOMEM;
+
+		snprintf(adap->msix_info[msix].desc,
+			 sizeof(adap->msix_info[msix].desc),
+			 "%s-FWeventq", adap->port[0]->name);
+	} else {
 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
 				       NULL, NULL, NULL, -1);
 		if (err)
 			return err;
-		adap->msi_idx = -((int)s->intrq.abs_id + 1);
+		msix = -((int)s->intrq.abs_id + 1);
 	}
 
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
+			       msix, NULL, fwevtq_handler, NULL, -1);
+	if (err && msix >= 0)
+		cxgb4_free_msix_idx_in_bmap(adap, msix);
+
+	s->fwevtq_msix_idx = msix;
 	return err;
 }
 
@@ -985,14 +1014,17 @@ static int setup_fw_sge_queues(struct adapter *adap)
  */
 static int setup_sge_queues(struct adapter *adap)
 {
-	int err, i, j;
-	struct sge *s = &adap->sge;
 	struct sge_uld_rxq_info *rxq_info = NULL;
+	struct sge *s = &adap->sge;
 	unsigned int cmplqid = 0;
+	int err, i, j, msix = 0;
 
 	if (is_uld(adap))
 		rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
 
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		msix = -((int)s->intrq.abs_id + 1);
+
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
 		struct port_info *pi = netdev_priv(dev);
@@ -1000,10 +1032,21 @@ static int setup_sge_queues(struct adapter *adap)
 		struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
 
 		for (j = 0; j < pi->nqsets; j++, q++) {
-			if (adap->msi_idx > 0)
-				adap->msi_idx++;
+			if (msix >= 0) {
+				msix = cxgb4_get_msix_idx_from_bmap(adap);
+				if (msix < 0) {
+					err = msix;
+					goto freeout;
+				}
+
+				snprintf(adap->msix_info[msix].desc,
+					 sizeof(adap->msix_info[msix].desc),
+					 "%s-Rx%d", dev->name, j);
+				q->msix = &adap->msix_info[msix];
+			}
+
 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
-					       adap->msi_idx, &q->fl,
+					       msix, &q->fl,
 					       t4_ethrx_handler,
 					       NULL,
 					       t4_get_tp_ch_map(adap,
@@ -1090,6 +1133,18 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 #endif /* CONFIG_CHELSIO_T4_DCB */
 
+	if (dev->num_tc) {
+		struct port_info *pi = netdev2pinfo(dev);
+
+		/* Send unsupported traffic pattern to normal NIC queues. */
+		txq = netdev_pick_tx(dev, skb, sb_dev);
+		if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
+		    ip_hdr(skb)->protocol != IPPROTO_TCP)
+			txq = txq % pi->nqsets;
+
+		return txq;
+	}
+
 	if (select_queue) {
 		txq = (skb_rx_queue_recorded(skb)
 			? skb_get_rx_queue(skb)
@@ -1456,19 +1511,23 @@ static int tid_init(struct tid_info *t)
 	struct adapter *adap = container_of(t, struct adapter, tids);
 	unsigned int max_ftids = t->nftids + t->nsftids;
 	unsigned int natids = t->natids;
+	unsigned int eotid_bmap_size;
 	unsigned int stid_bmap_size;
 	unsigned int ftid_bmap_size;
 	size_t size;
 
 	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
 	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
+	eotid_bmap_size = BITS_TO_LONGS(t->neotids);
 	size = t->ntids * sizeof(*t->tid_tab) +
 	       natids * sizeof(*t->atid_tab) +
 	       t->nstids * sizeof(*t->stid_tab) +
 	       t->nsftids * sizeof(*t->stid_tab) +
 	       stid_bmap_size * sizeof(long) +
 	       max_ftids * sizeof(*t->ftid_tab) +
-	       ftid_bmap_size * sizeof(long);
+	       ftid_bmap_size * sizeof(long) +
+	       t->neotids * sizeof(*t->eotid_tab) +
+	       eotid_bmap_size * sizeof(long);
 
 	t->tid_tab = kvzalloc(size, GFP_KERNEL);
 	if (!t->tid_tab)
@@ -1479,6 +1538,8 @@ static int tid_init(struct tid_info *t)
 	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
 	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
 	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
+	t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size];
+	t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids];
 	spin_lock_init(&t->stid_lock);
 	spin_lock_init(&t->atid_lock);
 	spin_lock_init(&t->ftid_lock);
@@ -1505,6 +1566,9 @@ static int tid_init(struct tid_info *t)
 		if (!t->stid_base &&
 		    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
 			__set_bit(0, t->stid_bmap);
+
+		if (t->neotids)
+			bitmap_zero(t->eotid_bmap, t->neotids);
 	}
 
 	bitmap_zero(t->ftid_bmap, t->nftids);
@@ -2361,6 +2425,7 @@ static void update_clip(const struct adapter *adap)
  */
 static int cxgb_up(struct adapter *adap)
 {
+	struct sge *s = &adap->sge;
 	int err;
 
 	mutex_lock(&uld_mutex);
@@ -2372,16 +2437,20 @@ static int cxgb_up(struct adapter *adap)
 		goto freeq;
 
 	if (adap->flags & CXGB4_USING_MSIX) {
-		name_msix_vecs(adap);
-		err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
-				  adap->msix_info[0].desc, adap);
+		if (s->nd_msix_idx < 0) {
+			err = -ENOMEM;
+			goto irq_err;
+		}
+
+		err = request_irq(adap->msix_info[s->nd_msix_idx].vec,
+				  t4_nondata_intr, 0,
+				  adap->msix_info[s->nd_msix_idx].desc, adap);
 		if (err)
 			goto irq_err;
+
 		err = request_msix_queue_irqs(adap);
-		if (err) {
-			free_irq(adap->msix_info[0].vec, adap);
-			goto irq_err;
-		}
+		if (err)
+			goto irq_err_free_nd_msix;
 	} else {
 		err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
 				  (adap->flags & CXGB4_USING_MSI) ? 0
@@ -2403,11 +2472,13 @@ static int cxgb_up(struct adapter *adap)
 #endif
 	return err;
 
- irq_err:
+irq_err_free_nd_msix:
+	free_irq(adap->msix_info[s->nd_msix_idx].vec, adap);
+irq_err:
 	dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
- freeq:
+freeq:
 	t4_free_sge_resources(adap);
- rel_lock:
+rel_lock:
 	mutex_unlock(&uld_mutex);
 	return err;
 }
@@ -2429,11 +2500,11 @@ static void cxgb_down(struct adapter *adapter)
 /*
  * net_device operations
  */
-static int cxgb_open(struct net_device *dev)
+int cxgb_open(struct net_device *dev)
 {
-	int err;
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
+	int err;
 
 	netif_carrier_off(dev);
 
@@ -2456,7 +2527,7 @@ static int cxgb_open(struct net_device *dev)
 	return err;
 }
 
-static int cxgb_close(struct net_device *dev)
+int cxgb_close(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -3190,6 +3261,17 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	}
 }
 
+static int cxgb_setup_tc_mqprio(struct net_device *dev,
+				struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!is_ethofld(adap) || !adap->tc_mqprio)
+		return -ENOMEM;
+
+	return cxgb4_setup_tc_mqprio(dev, mqprio);
+}
+
 static LIST_HEAD(cxgb_block_cb_list);
 
 static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -3198,6 +3280,8 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	struct port_info *pi = netdev2pinfo(dev);
 
 	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return cxgb_setup_tc_mqprio(dev, type_data);
 	case TC_SETUP_BLOCK:
 		return flow_block_cb_setup_simple(type_data,
 						  &cxgb_block_cb_list,
@@ -4286,14 +4370,14 @@ static struct fw_info *find_fw_info(int chip)
 /*
  * Phase 0 of initialization: contact FW, obtain config, perform basic init.
  */
-static int adap_init0(struct adapter *adap)
+static int adap_init0(struct adapter *adap, int vpd_skip)
 {
-	int ret;
-	u32 v, port_vec;
-	enum dev_state state;
-	u32 params[7], val[7];
 	struct fw_caps_config_cmd caps_cmd;
+	u32 params[7], val[7];
+	enum dev_state state;
+	u32 v, port_vec;
 	int reset = 1;
+	int ret;
 
 	/* Grab Firmware Device Log parameters as early as possible so we have
 	 * access to it for debugging, etc.
@@ -4448,9 +4532,11 @@ static int adap_init0(struct adapter *adap)
 	 * could have FLASHed a new VPD which won't be read by the firmware
 	 * until we do the RESET ...
 	 */
-	ret = t4_get_vpd_params(adap, &adap->params.vpd);
-	if (ret < 0)
-		goto bye;
+	if (!vpd_skip) {
+		ret = t4_get_vpd_params(adap, &adap->params.vpd);
+		if (ret < 0)
+			goto bye;
+	}
 
 	/* Find out what ports are available to us.  Note that we need to do
 	 * this before calling adap_init0_no_config() since it needs nports
@@ -4600,11 +4686,18 @@ static int adap_init0(struct adapter *adap)
 	adap->clipt_start = val[0];
 	adap->clipt_end = val[1];
 
-	/* We don't yet have a PARAMs calls to retrieve the number of Traffic
-	 * Classes supported by the hardware/firmware so we hard code it here
-	 * for now.
-	 */
-	adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+	/* Get the supported number of traffic classes */
+	params[0] = FW_PARAM_DEV(NUM_TM_CLASS);
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params, val);
+	if (ret < 0) {
+		/* We couldn't retrieve the number of Traffic Classes
+		 * supported by the hardware/firmware. So we hard
+		 * code it here.
+		 */
+		adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+	} else {
+		adap->params.nsched_cls = val[0];
+	}
 
 	/* query params related to active filter region */
 	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
@@ -4689,7 +4782,8 @@ static int adap_init0(struct adapter *adap)
 		adap->params.offload = 1;
 
 	if (caps_cmd.ofldcaps ||
-	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
+	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) ||
+	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD))) {
 		/* query offload-related parameters */
 		params[0] = FW_PARAM_DEV(NTID);
 		params[1] = FW_PARAM_PFVF(SERVER_START);
@@ -4731,6 +4825,19 @@ static int adap_init0(struct adapter *adap)
 		} else {
 			adap->num_ofld_uld += 1;
 		}
+
+		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD)) {
+			params[0] = FW_PARAM_PFVF(ETHOFLD_START);
+			params[1] = FW_PARAM_PFVF(ETHOFLD_END);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+					      params, val);
+			if (!ret) {
+				adap->tids.eotid_base = val[0];
+				adap->tids.neotids = min_t(u32, MAX_ATIDS,
+							   val[1] - val[0] + 1);
+				adap->params.ethofld = 1;
+			}
+		}
 	}
 	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
@@ -5050,10 +5157,93 @@ static void eeh_resume(struct pci_dev *pdev)
 	rtnl_unlock();
 }
 
+static void eeh_reset_prepare(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+	int i;
+
+	if (adapter->pf != 4)
+		return;
+
+	adapter->flags &= ~CXGB4_FW_OK;
+
+	notify_ulds(adapter, CXGB4_STATE_DOWN);
+
+	for_each_port(adapter, i)
+		if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+			cxgb_close(adapter->port[i]);
+
+	disable_interrupts(adapter);
+	cxgb4_free_mps_ref_entries(adapter);
+
+	adap_free_hma_mem(adapter);
+
+	if (adapter->flags & CXGB4_FULL_INIT_DONE)
+		cxgb_down(adapter);
+}
+
+static void eeh_reset_done(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+	int err, i;
+
+	if (adapter->pf != 4)
+		return;
+
+	err = t4_wait_dev_ready(adapter->regs);
+	if (err < 0) {
+		dev_err(adapter->pdev_dev,
+			"Device not ready, err %d", err);
+		return;
+	}
+
+	setup_memwin(adapter);
+
+	err = adap_init0(adapter, 1);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Adapter init failed, err %d", err);
+		return;
+	}
+
+	setup_memwin_rdma(adapter);
+
+	if (adapter->flags & CXGB4_FW_OK) {
+		err = t4_port_init(adapter, adapter->pf, adapter->pf, 0);
+		if (err) {
+			dev_err(adapter->pdev_dev,
+				"Port init failed, err %d", err);
+			return;
+		}
+	}
+
+	err = cfg_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Config queues failed, err %d", err);
+		return;
+	}
+
+	cxgb4_init_mps_ref_entries(adapter);
+
+	err = setup_fw_sge_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"FW sge queue allocation failed, err %d", err);
+		return;
+	}
+
+	for_each_port(adapter, i)
+		if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+			cxgb_open(adapter->port[i]);
+}
+
 static const struct pci_error_handlers cxgb4_eeh = {
 	.error_detected = eeh_err_detected,
 	.slot_reset     = eeh_slot_reset,
 	.resume         = eeh_resume,
+	.reset_prepare  = eeh_reset_prepare,
+	.reset_done     = eeh_reset_done,
 };
 
 /* Return true if the Link Configuration supports "High Speeds" (those greater
@@ -5070,26 +5260,25 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 	return high_speeds != 0;
 }
 
-/*
- * Perform default configuration of DMA queues depending on the number and type
+/* Perform default configuration of DMA queues depending on the number and type
  * of ports we found and the number of available CPUs.  Most settings can be
  * modified by the admin prior to actual use.
  */
 static int cfg_queues(struct adapter *adap)
 {
+	u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
+	u32 niqflint, neq, num_ulds;
 	struct sge *s = &adap->sge;
-	int i, n10g = 0, qidx = 0;
-	int niqflint, neq, avail_eth_qsets;
-	int max_eth_qsets = 32;
+	u32 i, n10g = 0, qidx = 0;
 #ifndef CONFIG_CHELSIO_T4_DCB
 	int q10g = 0;
 #endif
 
-	/* Reduce memory usage in kdump environment, disable all offload.
-	 */
+	/* Reduce memory usage in kdump environment, disable all offload. */
 	if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) {
 		adap->params.offload = 0;
 		adap->params.crypto = 0;
+		adap->params.ethofld = 0;
 	}
 
 	/* Calculate the number of Ethernet Queue Sets available based on
@@ -5108,14 +5297,11 @@ static int cfg_queues(struct adapter *adap)
 	if (!(adap->flags & CXGB4_USING_MSIX))
 		niqflint--;
 	neq = adap->params.pfres.neq / 2;
-	avail_eth_qsets = min(niqflint, neq);
+	avail_qsets = min(niqflint, neq);
 
-	if (avail_eth_qsets > max_eth_qsets)
-		avail_eth_qsets = max_eth_qsets;
-
-	if (avail_eth_qsets < adap->params.nports) {
+	if (avail_qsets < adap->params.nports) {
 		dev_err(adap->pdev_dev, "avail_eth_qsets=%d < nports=%d\n",
-			avail_eth_qsets, adap->params.nports);
+			avail_qsets, adap->params.nports);
 		return -ENOMEM;
 	}
 
@@ -5123,6 +5309,7 @@ static int cfg_queues(struct adapter *adap)
 	for_each_port(adap, i)
 		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 
+	avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS);
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging support we need to be able to support up
 	 * to 8 Traffic Priorities; each of which will be assigned to its
@@ -5142,8 +5329,7 @@ static int cfg_queues(struct adapter *adap)
 		qidx += pi->nqsets;
 	}
 #else /* !CONFIG_CHELSIO_T4_DCB */
-	/*
-	 * We default to 1 queue per non-10G port and up to # of cores queues
+	/* We default to 1 queue per non-10G port and up to # of cores queues
 	 * per 10G port.
 	 */
 	if (n10g)
@@ -5165,19 +5351,40 @@ static int cfg_queues(struct adapter *adap)
 
 	s->ethqsets = qidx;
 	s->max_ethqsets = qidx;   /* MSI-X may lower it later */
+	avail_qsets -= qidx;
 
 	if (is_uld(adap)) {
-		/*
-		 * For offload we use 1 queue/channel if all ports are up to 1G,
+		/* For offload we use 1 queue/channel if all ports are up to 1G,
 		 * otherwise we divide all available queues amongst the channels
 		 * capped by the number of available cores.
 		 */
-		if (n10g) {
-			i = min_t(int, MAX_OFLD_QSETS, num_online_cpus());
-			s->ofldqsets = roundup(i, adap->params.nports);
-		} else {
+		num_ulds = adap->num_uld + adap->num_ofld_uld;
+		i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus());
+		avail_uld_qsets = roundup(i, adap->params.nports);
+		if (avail_qsets < num_ulds * adap->params.nports) {
+			adap->params.offload = 0;
+			adap->params.crypto = 0;
+			s->ofldqsets = 0;
+		} else if (avail_qsets < num_ulds * avail_uld_qsets || !n10g) {
 			s->ofldqsets = adap->params.nports;
+		} else {
+			s->ofldqsets = avail_uld_qsets;
+		}
+
+		avail_qsets -= num_ulds * s->ofldqsets;
+	}
+
+	/* ETHOFLD Queues used for QoS offload should follow same
+	 * allocation scheme as normal Ethernet Queues.
+	 */
+	if (is_ethofld(adap)) {
+		if (avail_qsets < s->max_ethqsets) {
+			adap->params.ethofld = 0;
+			s->eoqsets = 0;
+		} else {
+			s->eoqsets = s->max_ethqsets;
 		}
+		avail_qsets -= s->eoqsets;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -5230,42 +5437,62 @@ static void reduce_ethqs(struct adapter *adap, int n)
 	}
 }
 
-static int get_msix_info(struct adapter *adap)
+static int alloc_msix_info(struct adapter *adap, u32 num_vec)
 {
-	struct uld_msix_info *msix_info;
-	unsigned int max_ingq = 0;
-
-	if (is_offload(adap))
-		max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
-	if (is_pci_uld(adap))
-		max_ingq += MAX_OFLD_QSETS * adap->num_uld;
+	struct msix_info *msix_info;
 
-	if (!max_ingq)
-		goto out;
-
-	msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+	msix_info = kcalloc(num_vec, sizeof(*msix_info), GFP_KERNEL);
 	if (!msix_info)
 		return -ENOMEM;
 
-	adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
-						 sizeof(long), GFP_KERNEL);
-	if (!adap->msix_bmap_ulds.msix_bmap) {
+	adap->msix_bmap.msix_bmap = kcalloc(BITS_TO_LONGS(num_vec),
+					    sizeof(long), GFP_KERNEL);
+	if (!adap->msix_bmap.msix_bmap) {
 		kfree(msix_info);
 		return -ENOMEM;
 	}
-	spin_lock_init(&adap->msix_bmap_ulds.lock);
-	adap->msix_info_ulds = msix_info;
-out:
+
+	spin_lock_init(&adap->msix_bmap.lock);
+	adap->msix_bmap.mapsize = num_vec;
+
+	adap->msix_info = msix_info;
 	return 0;
 }
 
 static void free_msix_info(struct adapter *adap)
 {
-	if (!(adap->num_uld && adap->num_ofld_uld))
-		return;
+	kfree(adap->msix_bmap.msix_bmap);
+	kfree(adap->msix_info);
+}
+
+int cxgb4_get_msix_idx_from_bmap(struct adapter *adap)
+{
+	struct msix_bmap *bmap = &adap->msix_bmap;
+	unsigned int msix_idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bmap->lock, flags);
+	msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
+	if (msix_idx < bmap->mapsize) {
+		__set_bit(msix_idx, bmap->msix_bmap);
+	} else {
+		spin_unlock_irqrestore(&bmap->lock, flags);
+		return -ENOSPC;
+	}
+
+	spin_unlock_irqrestore(&bmap->lock, flags);
+	return msix_idx;
+}
+
+void cxgb4_free_msix_idx_in_bmap(struct adapter *adap,
+				 unsigned int msix_idx)
+{
+	struct msix_bmap *bmap = &adap->msix_bmap;
+	unsigned long flags;
 
-	kfree(adap->msix_info_ulds);
-	kfree(adap->msix_bmap_ulds.msix_bmap);
+	spin_lock_irqsave(&bmap->lock, flags);
+	__clear_bit(msix_idx, bmap->msix_bmap);
+	spin_unlock_irqrestore(&bmap->lock, flags);
 }
 
 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
@@ -5273,88 +5500,161 @@ static void free_msix_info(struct adapter *adap)
 
 static int enable_msix(struct adapter *adap)
 {
-	int ofld_need = 0, uld_need = 0;
-	int i, j, want, need, allocated;
+	u32 eth_need, uld_need = 0, ethofld_need = 0;
+	u32 ethqsets = 0, ofldqsets = 0, eoqsets = 0;
+	u8 num_uld = 0, nchan = adap->params.nports;
+	u32 i, want, need, num_vec;
 	struct sge *s = &adap->sge;
-	unsigned int nchan = adap->params.nports;
 	struct msix_entry *entries;
-	int max_ingq = MAX_INGQ;
-
-	if (is_pci_uld(adap))
-		max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
-	if (is_offload(adap))
-		max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
-	entries = kmalloc_array(max_ingq + 1, sizeof(*entries),
-				GFP_KERNEL);
-	if (!entries)
-		return -ENOMEM;
-
-	/* map for msix */
-	if (get_msix_info(adap)) {
-		adap->params.offload = 0;
-		adap->params.crypto = 0;
-	}
-
-	for (i = 0; i < max_ingq + 1; ++i)
-		entries[i].entry = i;
+	struct port_info *pi;
+	int allocated, ret;
 
-	want = s->max_ethqsets + EXTRA_VECS;
-	if (is_offload(adap)) {
-		want += adap->num_ofld_uld * s->ofldqsets;
-		ofld_need = adap->num_ofld_uld * nchan;
-	}
-	if (is_pci_uld(adap)) {
-		want += adap->num_uld * s->ofldqsets;
-		uld_need = adap->num_uld * nchan;
-	}
+	want = s->max_ethqsets;
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
 	 * each port.
 	 */
-	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
+	need = 8 * nchan;
 #else
-	need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
+	need = nchan;
 #endif
+	eth_need = need;
+	if (is_uld(adap)) {
+		num_uld = adap->num_ofld_uld + adap->num_uld;
+		want += num_uld * s->ofldqsets;
+		uld_need = num_uld * nchan;
+		need += uld_need;
+	}
+
+	if (is_ethofld(adap)) {
+		want += s->eoqsets;
+		ethofld_need = eth_need;
+		need += ethofld_need;
+	}
+
+	want += EXTRA_VECS;
+	need += EXTRA_VECS;
+
+	entries = kmalloc_array(want, sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < want; i++)
+		entries[i].entry = i;
+
 	allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
 	if (allocated < 0) {
-		dev_info(adap->pdev_dev, "not enough MSI-X vectors left,"
-			 " not using MSI-X\n");
-		kfree(entries);
-		return allocated;
+		/* Disable offload and attempt to get vectors for NIC
+		 * only mode.
+		 */
+		want = s->max_ethqsets + EXTRA_VECS;
+		need = eth_need + EXTRA_VECS;
+		allocated = pci_enable_msix_range(adap->pdev, entries,
+						  need, want);
+		if (allocated < 0) {
+			dev_info(adap->pdev_dev,
+				 "Disabling MSI-X due to insufficient MSI-X vectors\n");
+			ret = allocated;
+			goto out_free;
+		}
+
+		dev_info(adap->pdev_dev,
+			 "Disabling offload due to insufficient MSI-X vectors\n");
+		adap->params.offload = 0;
+		adap->params.crypto = 0;
+		adap->params.ethofld = 0;
+		s->ofldqsets = 0;
+		s->eoqsets = 0;
+		uld_need = 0;
+		ethofld_need = 0;
+	}
+
+	num_vec = allocated;
+	if (num_vec < want) {
+		/* Distribute available vectors to the various queue groups.
+		 * Every group gets its minimum requirement and NIC gets top
+		 * priority for leftovers.
+		 */
+		ethqsets = eth_need;
+		if (is_uld(adap))
+			ofldqsets = nchan;
+		if (is_ethofld(adap))
+			eoqsets = ethofld_need;
+
+		num_vec -= need;
+		while (num_vec) {
+			if (num_vec < eth_need + ethofld_need ||
+			    ethqsets > s->max_ethqsets)
+				break;
+
+			for_each_port(adap, i) {
+				pi = adap2pinfo(adap, i);
+				if (pi->nqsets < 2)
+					continue;
+
+				ethqsets++;
+				num_vec--;
+				if (ethofld_need) {
+					eoqsets++;
+					num_vec--;
+				}
+			}
+		}
+
+		if (is_uld(adap)) {
+			while (num_vec) {
+				if (num_vec < uld_need ||
+				    ofldqsets > s->ofldqsets)
+					break;
+
+				ofldqsets++;
+				num_vec -= uld_need;
+			}
+		}
+	} else {
+		ethqsets = s->max_ethqsets;
+		if (is_uld(adap))
+			ofldqsets = s->ofldqsets;
+		if (is_ethofld(adap))
+			eoqsets = s->eoqsets;
 	}
 
-	/* Distribute available vectors to the various queue groups.
-	 * Every group gets its minimum requirement and NIC gets top
-	 * priority for leftovers.
-	 */
-	i = allocated - EXTRA_VECS - ofld_need - uld_need;
-	if (i < s->max_ethqsets) {
-		s->max_ethqsets = i;
-		if (i < s->ethqsets)
-			reduce_ethqs(adap, i);
+	if (ethqsets < s->max_ethqsets) {
+		s->max_ethqsets = ethqsets;
+		reduce_ethqs(adap, ethqsets);
 	}
+
 	if (is_uld(adap)) {
-		if (allocated < want)
-			s->nqs_per_uld = nchan;
-		else
-			s->nqs_per_uld = s->ofldqsets;
+		s->ofldqsets = ofldqsets;
+		s->nqs_per_uld = s->ofldqsets;
 	}
 
-	for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
+	if (is_ethofld(adap))
+		s->eoqsets = eoqsets;
+
+	/* map for msix */
+	ret = alloc_msix_info(adap, allocated);
+	if (ret)
+		goto out_disable_msix;
+
+	for (i = 0; i < allocated; i++) {
 		adap->msix_info[i].vec = entries[i].vector;
-	if (is_uld(adap)) {
-		for (j = 0 ; i < allocated; ++i, j++) {
-			adap->msix_info_ulds[j].vec = entries[i].vector;
-			adap->msix_info_ulds[j].idx = i;
-		}
-		adap->msix_bmap_ulds.mapsize = j;
+		adap->msix_info[i].idx = i;
 	}
-	dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
-		 "nic %d per uld %d\n",
-		 allocated, s->max_ethqsets, s->nqs_per_uld);
+
+	dev_info(adap->pdev_dev,
+		 "%d MSI-X vectors allocated, nic %d eoqsets %d per uld %d\n",
+		 allocated, s->max_ethqsets, s->eoqsets, s->nqs_per_uld);
 
 	kfree(entries);
 	return 0;
+
+out_disable_msix:
+	pci_disable_msix(adap->pdev);
+
+out_free:
+	kfree(entries);
+	return ret;
 }
 
 #undef EXTRA_VECS
@@ -5441,6 +5741,7 @@ static void free_some_resources(struct adapter *adapter)
 	kvfree(adapter->srq);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
+	cxgb4_cleanup_tc_mqprio(adapter);
 	cxgb4_cleanup_tc_flower(adapter);
 	cxgb4_cleanup_tc_u32(adapter);
 	kfree(adapter->sge.egr_map);
@@ -5837,7 +6138,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	setup_memwin(adapter);
-	err = adap_init0(adapter);
+	err = adap_init0(adapter, 0);
 #ifdef CONFIG_DEBUG_FS
 	bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
 #endif
@@ -5855,8 +6156,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_LIST_HEAD(&adapter->mac_hlist);
 
 	for_each_port(adapter, i) {
+		/* For supporting MQPRIO Offload, need some extra
+		 * queues for each ETHOFLD TIDs. Keep it equal to
+		 * MAX_ATIDs for now. Once we connect to firmware
+		 * later and query the EOTID params, we'll come to
+		 * know the actual # of EOTIDs supported.
+		 */
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
-					   MAX_ETH_QSETS);
+					   MAX_ETH_QSETS + MAX_ATIDS);
 		if (!netdev) {
 			err = -ENOMEM;
 			goto out_free_dev;
@@ -6004,6 +6311,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		if (cxgb4_init_tc_flower(adapter))
 			dev_warn(&pdev->dev,
 				 "could not offload tc flower, continuing\n");
+
+		if (cxgb4_init_tc_mqprio(adapter))
+			dev_warn(&pdev->dev,
+				 "could not offload tc mqprio, continuing\n");
 	}
 
 	if (is_offload(adapter) || is_hashfilter(adapter)) {
@@ -6040,6 +6351,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_free_dev;
 
+	err = setup_non_data_intr(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Non Data interrupt allocation failed, err: %d\n", err);
+		goto out_free_dev;
+	}
+
 	err = setup_fw_sge_queues(adapter);
 	if (err) {
 		dev_err(adapter->pdev_dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
new file mode 100644
index 000000000000..143cb1f31bc0
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#include "cxgb4.h"
+#include "cxgb4_tc_mqprio.h"
+#include "sched.h"
+
+static int cxgb4_mqprio_validate(struct net_device *dev,
+				 struct tc_mqprio_qopt_offload *mqprio)
+{
+	u64 min_rate = 0, max_rate = 0, max_link_rate;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u32 qcount = 0, qoffset = 0;
+	u32 link_ok, speed, mtu;
+	int ret;
+	u8 i;
+
+	if (!mqprio->qopt.num_tc)
+		return 0;
+
+	if (mqprio->qopt.hw != TC_MQPRIO_HW_OFFLOAD_TCS) {
+		netdev_err(dev, "Only full TC hardware offload is supported\n");
+		return -EINVAL;
+	} else if (mqprio->mode != TC_MQPRIO_MODE_CHANNEL) {
+		netdev_err(dev, "Only channel mode offload is supported\n");
+		return -EINVAL;
+	} else if (mqprio->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+		netdev_err(dev,	"Only bandwidth rate shaper supported\n");
+		return -EINVAL;
+	} else if (mqprio->qopt.num_tc > adap->params.nsched_cls) {
+		netdev_err(dev,
+			   "Only %u traffic classes supported by hardware\n",
+			   adap->params.nsched_cls);
+		return -ERANGE;
+	}
+
+	ret = t4_get_link_params(pi, &link_ok, &speed, &mtu);
+	if (ret) {
+		netdev_err(dev, "Failed to get link speed, ret: %d\n", ret);
+		return -EINVAL;
+	}
+
+	/* Convert from Mbps to bps */
+	max_link_rate = (u64)speed * 1000 * 1000;
+
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qoffset = max_t(u16, mqprio->qopt.offset[i], qoffset);
+		qcount += mqprio->qopt.count[i];
+
+		/* Convert byte per second to bits per second */
+		min_rate += (mqprio->min_rate[i] * 8);
+		max_rate += (mqprio->max_rate[i] * 8);
+	}
+
+	if (qoffset >= adap->tids.neotids || qcount > adap->tids.neotids)
+		return -ENOMEM;
+
+	if (min_rate > max_link_rate || max_rate > max_link_rate) {
+		netdev_err(dev,
+			   "Total Min/Max (%llu/%llu) Rate > supported (%llu)\n",
+			   min_rate, max_rate, max_link_rate);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cxgb4_init_eosw_txq(struct net_device *dev,
+			       struct sge_eosw_txq *eosw_txq,
+			       u32 eotid, u32 hwqid)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_desc *ring;
+
+	memset(eosw_txq, 0, sizeof(*eosw_txq));
+
+	ring = kcalloc(CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM,
+		       sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	eosw_txq->desc = ring;
+	eosw_txq->ndesc = CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM;
+	spin_lock_init(&eosw_txq->lock);
+	eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+	eosw_txq->eotid = eotid;
+	eosw_txq->hwtid = adap->tids.eotid_base + eosw_txq->eotid;
+	eosw_txq->cred = adap->params.ofldq_wr_cred;
+	eosw_txq->hwqid = hwqid;
+	eosw_txq->netdev = dev;
+	tasklet_init(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart,
+		     (unsigned long)eosw_txq);
+	return 0;
+}
+
+static void cxgb4_clean_eosw_txq(struct net_device *dev,
+				 struct sge_eosw_txq *eosw_txq)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	cxgb4_eosw_txq_free_desc(adap, eosw_txq, eosw_txq->ndesc);
+	eosw_txq->pidx = 0;
+	eosw_txq->last_pidx = 0;
+	eosw_txq->cidx = 0;
+	eosw_txq->last_cidx = 0;
+	eosw_txq->flowc_idx = 0;
+	eosw_txq->inuse = 0;
+	eosw_txq->cred = adap->params.ofldq_wr_cred;
+	eosw_txq->ncompl = 0;
+	eosw_txq->last_compl = 0;
+	eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+}
+
+static void cxgb4_free_eosw_txq(struct net_device *dev,
+				struct sge_eosw_txq *eosw_txq)
+{
+	spin_lock_bh(&eosw_txq->lock);
+	cxgb4_clean_eosw_txq(dev, eosw_txq);
+	kfree(eosw_txq->desc);
+	spin_unlock_bh(&eosw_txq->lock);
+	tasklet_kill(&eosw_txq->qresume_tsk);
+}
+
+static int cxgb4_mqprio_alloc_hw_resources(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_ofld_rxq *eorxq;
+	struct sge_eohw_txq *eotxq;
+	int ret, msix = 0;
+	u32 i;
+
+	/* Allocate ETHOFLD hardware queue structures if not done already */
+	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
+		adap->sge.eohw_rxq = kcalloc(adap->sge.eoqsets,
+					     sizeof(struct sge_ofld_rxq),
+					     GFP_KERNEL);
+		if (!adap->sge.eohw_rxq)
+			return -ENOMEM;
+
+		adap->sge.eohw_txq = kcalloc(adap->sge.eoqsets,
+					     sizeof(struct sge_eohw_txq),
+					     GFP_KERNEL);
+		if (!adap->sge.eohw_txq) {
+			kfree(adap->sge.eohw_rxq);
+			return -ENOMEM;
+		}
+	}
+
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		msix = -((int)adap->sge.intrq.abs_id + 1);
+
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		/* Allocate Rxqs for receiving ETHOFLD Tx completions */
+		if (msix >= 0) {
+			msix = cxgb4_get_msix_idx_from_bmap(adap);
+			if (msix < 0)
+				goto out_free_queues;
+
+			eorxq->msix = &adap->msix_info[msix];
+			snprintf(eorxq->msix->desc,
+				 sizeof(eorxq->msix->desc),
+				 "%s-eorxq%d", dev->name, i);
+		}
+
+		init_rspq(adap, &eorxq->rspq,
+			  CXGB4_EOHW_RXQ_DEFAULT_INTR_USEC,
+			  CXGB4_EOHW_RXQ_DEFAULT_PKT_CNT,
+			  CXGB4_EOHW_RXQ_DEFAULT_DESC_NUM,
+			  CXGB4_EOHW_RXQ_DEFAULT_DESC_SIZE);
+
+		eorxq->fl.size = CXGB4_EOHW_FLQ_DEFAULT_DESC_NUM;
+
+		ret = t4_sge_alloc_rxq(adap, &eorxq->rspq, false,
+				       dev, msix, &eorxq->fl,
+				       cxgb4_ethofld_rx_handler,
+				       NULL, 0);
+		if (ret)
+			goto out_free_queues;
+
+		/* Allocate ETHOFLD hardware Txqs */
+		eotxq->q.size = CXGB4_EOHW_TXQ_DEFAULT_DESC_NUM;
+		ret = t4_sge_alloc_ethofld_txq(adap, eotxq, dev,
+					       eorxq->rspq.cntxt_id);
+		if (ret)
+			goto out_free_queues;
+
+		/* Allocate IRQs, set IRQ affinity, and start Rx */
+		if (adap->flags & CXGB4_USING_MSIX) {
+			ret = request_irq(eorxq->msix->vec, t4_sge_intr_msix, 0,
+					  eorxq->msix->desc, &eorxq->rspq);
+			if (ret)
+				goto out_free_msix;
+
+			cxgb4_set_msix_aff(adap, eorxq->msix->vec,
+					   &eorxq->msix->aff_mask, i);
+		}
+
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
+			cxgb4_enable_rx(adap, &eorxq->rspq);
+	}
+
+	refcount_inc(&adap->tc_mqprio->refcnt);
+	return 0;
+
+out_free_msix:
+	while (i-- > 0) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
+			cxgb4_quiesce_rx(&eorxq->rspq);
+
+		if (adap->flags & CXGB4_USING_MSIX) {
+			cxgb4_clear_msix_aff(eorxq->msix->vec,
+					     eorxq->msix->aff_mask);
+			free_irq(eorxq->msix->vec, &eorxq->rspq);
+		}
+	}
+
+out_free_queues:
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		if (eorxq->rspq.desc)
+			free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl);
+		if (eorxq->msix)
+			cxgb4_free_msix_idx_in_bmap(adap, eorxq->msix->idx);
+		t4_sge_free_ethofld_txq(adap, eotxq);
+	}
+
+	kfree(adap->sge.eohw_txq);
+	kfree(adap->sge.eohw_rxq);
+
+	return ret;
+}
+
+void cxgb4_mqprio_free_hw_resources(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_ofld_rxq *eorxq;
+	struct sge_eohw_txq *eotxq;
+	u32 i;
+
+	/* Return if no ETHOFLD structures have been allocated yet */
+	if (!refcount_read(&adap->tc_mqprio->refcnt))
+		return;
+
+	/* Return if no hardware queues have been allocated */
+	if (!adap->sge.eohw_rxq[pi->first_qset].rspq.desc)
+		return;
+
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		/* Device removal path will already disable NAPI
+		 * before unregistering netdevice. So, only disable
+		 * NAPI if we're not in device removal path
+		 */
+		if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+			cxgb4_quiesce_rx(&eorxq->rspq);
+
+		if (adap->flags & CXGB4_USING_MSIX) {
+			cxgb4_clear_msix_aff(eorxq->msix->vec,
+					     eorxq->msix->aff_mask);
+			free_irq(eorxq->msix->vec, &eorxq->rspq);
+		}
+
+		free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl);
+		t4_sge_free_ethofld_txq(adap, eotxq);
+	}
+
+	/* Free up ETHOFLD structures if there are no users */
+	if (refcount_dec_and_test(&adap->tc_mqprio->refcnt)) {
+		kfree(adap->sge.eohw_txq);
+		kfree(adap->sge.eohw_rxq);
+	}
+}
+
+static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
+				 struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct ch_sched_params p = {
+		.type = SCHED_CLASS_TYPE_PACKET,
+		.u.params.level = SCHED_CLASS_LEVEL_CL_RL,
+		.u.params.mode = SCHED_CLASS_MODE_FLOW,
+		.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS,
+		.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS,
+		.u.params.class = SCHED_CLS_NONE,
+		.u.params.weight = 0,
+		.u.params.pktsize = dev->mtu,
+	};
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sched_class *e;
+	int ret;
+	u8 i;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	p.u.params.channel = pi->tx_chan;
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		/* Convert from bytes per second to Kbps */
+		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
+		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
+
+		e = cxgb4_sched_class_alloc(dev, &p);
+		if (!e) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		tc_port_mqprio->tc_hwtc_map[i] = e->idx;
+	}
+
+	return 0;
+
+out_err:
+	while (i--)
+		cxgb4_sched_class_free(dev, tc_port_mqprio->tc_hwtc_map[i]);
+
+	return ret;
+}
+
+static void cxgb4_mqprio_free_tc(struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u8 i;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	for (i = 0; i < tc_port_mqprio->mqprio.qopt.num_tc; i++)
+		cxgb4_sched_class_free(dev, tc_port_mqprio->tc_hwtc_map[i]);
+}
+
+static int cxgb4_mqprio_class_bind(struct net_device *dev,
+				   struct sge_eosw_txq *eosw_txq,
+				   u8 tc)
+{
+	struct ch_sched_flowc fe;
+	int ret;
+
+	init_completion(&eosw_txq->completion);
+
+	fe.tid = eosw_txq->eotid;
+	fe.class = tc;
+
+	ret = cxgb4_sched_class_bind(dev, &fe, SCHED_FLOWC);
+	if (ret)
+		return ret;
+
+	ret = wait_for_completion_timeout(&eosw_txq->completion,
+					  CXGB4_FLOWC_WAIT_TIMEOUT);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void cxgb4_mqprio_class_unbind(struct net_device *dev,
+				      struct sge_eosw_txq *eosw_txq,
+				      u8 tc)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_sched_flowc fe;
+
+	/* If we're shutting down, interrupts are disabled and no completions
+	 * come back. So, skip waiting for completions in this scenario.
+	 */
+	if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+		init_completion(&eosw_txq->completion);
+
+	fe.tid = eosw_txq->eotid;
+	fe.class = tc;
+	cxgb4_sched_class_unbind(dev, &fe, SCHED_FLOWC);
+
+	if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+		wait_for_completion_timeout(&eosw_txq->completion,
+					    CXGB4_FLOWC_WAIT_TIMEOUT);
+}
+
+static int cxgb4_mqprio_enable_offload(struct net_device *dev,
+				       struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	u32 qoffset, qcount, tot_qcount, qid, hwqid;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	int eotid, ret;
+	u16 i, j;
+	u8 hwtc;
+
+	ret = cxgb4_mqprio_alloc_hw_resources(dev);
+	if (ret)
+		return -ENOMEM;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qoffset = mqprio->qopt.offset[i];
+		qcount = mqprio->qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eotid = cxgb4_get_free_eotid(&adap->tids);
+			if (eotid < 0) {
+				ret = -ENOMEM;
+				goto out_free_eotids;
+			}
+
+			qid = qoffset + j;
+			hwqid = pi->first_qset + (eotid % pi->nqsets);
+			eosw_txq = &tc_port_mqprio->eosw_txq[qid];
+			ret = cxgb4_init_eosw_txq(dev, eosw_txq,
+						  eotid, hwqid);
+			if (ret)
+				goto out_free_eotids;
+
+			cxgb4_alloc_eotid(&adap->tids, eotid, eosw_txq);
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			ret = cxgb4_mqprio_class_bind(dev, eosw_txq, hwtc);
+			if (ret)
+				goto out_free_eotids;
+		}
+	}
+
+	memcpy(&tc_port_mqprio->mqprio, mqprio,
+	       sizeof(struct tc_mqprio_qopt_offload));
+
+	/* Inform the stack about the configured tc params.
+	 *
+	 * Set the correct queue map. If no queue count has been
+	 * specified, then send the traffic through default NIC
+	 * queues; instead of ETHOFLD queues.
+	 */
+	ret = netdev_set_num_tc(dev, mqprio->qopt.num_tc);
+	if (ret)
+		goto out_free_eotids;
+
+	tot_qcount = pi->nqsets;
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qcount = mqprio->qopt.count[i];
+		if (qcount) {
+			qoffset = mqprio->qopt.offset[i] + pi->nqsets;
+		} else {
+			qcount = pi->nqsets;
+			qoffset = 0;
+		}
+
+		ret = netdev_set_tc_queue(dev, i, qcount, qoffset);
+		if (ret)
+			goto out_reset_tc;
+
+		tot_qcount += mqprio->qopt.count[i];
+	}
+
+	ret = netif_set_real_num_tx_queues(dev, tot_qcount);
+	if (ret)
+		goto out_reset_tc;
+
+	tc_port_mqprio->state = CXGB4_MQPRIO_STATE_ACTIVE;
+	return 0;
+
+out_reset_tc:
+	netdev_reset_tc(dev);
+	i = mqprio->qopt.num_tc;
+
+out_free_eotids:
+	while (i-- > 0) {
+		qoffset = mqprio->qopt.offset[i];
+		qcount = mqprio->qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eosw_txq = &tc_port_mqprio->eosw_txq[qoffset + j];
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			cxgb4_mqprio_class_unbind(dev, eosw_txq, hwtc);
+
+			cxgb4_free_eotid(&adap->tids, eosw_txq->eotid);
+			cxgb4_free_eosw_txq(dev, eosw_txq);
+		}
+	}
+
+	cxgb4_mqprio_free_hw_resources(dev);
+	return ret;
+}
+
+static void cxgb4_mqprio_disable_offload(struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	u32 qoffset, qcount;
+	u16 i, j;
+	u8 hwtc;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	if (tc_port_mqprio->state != CXGB4_MQPRIO_STATE_ACTIVE)
+		return;
+
+	netdev_reset_tc(dev);
+	netif_set_real_num_tx_queues(dev, pi->nqsets);
+
+	for (i = 0; i < tc_port_mqprio->mqprio.qopt.num_tc; i++) {
+		qoffset = tc_port_mqprio->mqprio.qopt.offset[i];
+		qcount = tc_port_mqprio->mqprio.qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eosw_txq = &tc_port_mqprio->eosw_txq[qoffset + j];
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			cxgb4_mqprio_class_unbind(dev, eosw_txq, hwtc);
+
+			cxgb4_free_eotid(&adap->tids, eosw_txq->eotid);
+			cxgb4_free_eosw_txq(dev, eosw_txq);
+		}
+	}
+
+	cxgb4_mqprio_free_hw_resources(dev);
+
+	/* Free up the traffic classes */
+	cxgb4_mqprio_free_tc(dev);
+
+	memset(&tc_port_mqprio->mqprio, 0,
+	       sizeof(struct tc_mqprio_qopt_offload));
+
+	tc_port_mqprio->state = CXGB4_MQPRIO_STATE_DISABLED;
+}
+
+int cxgb4_setup_tc_mqprio(struct net_device *dev,
+			  struct tc_mqprio_qopt_offload *mqprio)
+{
+	bool needs_bring_up = false;
+	int ret;
+
+	ret = cxgb4_mqprio_validate(dev, mqprio);
+	if (ret)
+		return ret;
+
+	/* To configure tc params, the current allocated EOTIDs must
+	 * be freed up. However, they can't be freed up if there's
+	 * traffic running on the interface. So, ensure interface is
+	 * down before configuring tc params.
+	 */
+	if (netif_running(dev)) {
+		cxgb_close(dev);
+		needs_bring_up = true;
+	}
+
+	cxgb4_mqprio_disable_offload(dev);
+
+	/* If requested for clear, then just return since resources are
+	 * already freed up by now.
+	 */
+	if (!mqprio->qopt.num_tc)
+		goto out;
+
+	/* Allocate free available traffic classes and configure
+	 * their rate parameters.
+	 */
+	ret = cxgb4_mqprio_alloc_tc(dev, mqprio);
+	if (ret)
+		goto out;
+
+	ret = cxgb4_mqprio_enable_offload(dev, mqprio);
+	if (ret) {
+		cxgb4_mqprio_free_tc(dev);
+		goto out;
+	}
+
+out:
+	if (needs_bring_up)
+		cxgb_open(dev);
+
+	return ret;
+}
+
+int cxgb4_init_tc_mqprio(struct adapter *adap)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio, *port_mqprio;
+	struct cxgb4_tc_mqprio *tc_mqprio;
+	struct sge_eosw_txq *eosw_txq;
+	int ret = 0;
+	u8 i;
+
+	tc_mqprio = kzalloc(sizeof(*tc_mqprio), GFP_KERNEL);
+	if (!tc_mqprio)
+		return -ENOMEM;
+
+	tc_port_mqprio = kcalloc(adap->params.nports, sizeof(*tc_port_mqprio),
+				 GFP_KERNEL);
+	if (!tc_port_mqprio) {
+		ret = -ENOMEM;
+		goto out_free_mqprio;
+	}
+
+	tc_mqprio->port_mqprio = tc_port_mqprio;
+	for (i = 0; i < adap->params.nports; i++) {
+		port_mqprio = &tc_mqprio->port_mqprio[i];
+		eosw_txq = kcalloc(adap->tids.neotids, sizeof(*eosw_txq),
+				   GFP_KERNEL);
+		if (!eosw_txq) {
+			ret = -ENOMEM;
+			goto out_free_ports;
+		}
+		port_mqprio->eosw_txq = eosw_txq;
+	}
+
+	adap->tc_mqprio = tc_mqprio;
+	refcount_set(&adap->tc_mqprio->refcnt, 0);
+	return 0;
+
+out_free_ports:
+	for (i = 0; i < adap->params.nports; i++) {
+		port_mqprio = &tc_mqprio->port_mqprio[i];
+		kfree(port_mqprio->eosw_txq);
+	}
+	kfree(tc_port_mqprio);
+
+out_free_mqprio:
+	kfree(tc_mqprio);
+	return ret;
+}
+
+void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
+{
+	struct cxgb4_tc_port_mqprio *port_mqprio;
+	u8 i;
+
+	if (adap->tc_mqprio) {
+		if (adap->tc_mqprio->port_mqprio) {
+			for (i = 0; i < adap->params.nports; i++) {
+				struct net_device *dev = adap->port[i];
+
+				if (dev)
+					cxgb4_mqprio_disable_offload(dev);
+				port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+				kfree(port_mqprio->eosw_txq);
+			}
+			kfree(adap->tc_mqprio->port_mqprio);
+		}
+		kfree(adap->tc_mqprio);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
new file mode 100644
index 000000000000..c532f1ef8451
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CXGB4_TC_MQPRIO_H__
+#define __CXGB4_TC_MQPRIO_H__
+
+#include <net/pkt_cls.h>
+
+#define CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM 128
+
+#define CXGB4_EOHW_TXQ_DEFAULT_DESC_NUM 1024
+
+#define CXGB4_EOHW_RXQ_DEFAULT_DESC_NUM 1024
+#define CXGB4_EOHW_RXQ_DEFAULT_DESC_SIZE 64
+#define CXGB4_EOHW_RXQ_DEFAULT_INTR_USEC 5
+#define CXGB4_EOHW_RXQ_DEFAULT_PKT_CNT 8
+
+#define CXGB4_EOHW_FLQ_DEFAULT_DESC_NUM 72
+
+#define CXGB4_FLOWC_WAIT_TIMEOUT (5 * HZ)
+
+enum cxgb4_mqprio_state {
+	CXGB4_MQPRIO_STATE_DISABLED = 0,
+	CXGB4_MQPRIO_STATE_ACTIVE,
+};
+
+struct cxgb4_tc_port_mqprio {
+	enum cxgb4_mqprio_state state; /* Current MQPRIO offload state */
+	struct tc_mqprio_qopt_offload mqprio; /* MQPRIO offload params */
+	struct sge_eosw_txq *eosw_txq; /* Netdev SW Tx queue array */
+	u8 tc_hwtc_map[TC_QOPT_MAX_QUEUE]; /* MQPRIO tc to hardware tc map */
+};
+
+struct cxgb4_tc_mqprio {
+	refcount_t refcnt; /* Refcount for adapter-wide resources */
+	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
+};
+
+int cxgb4_setup_tc_mqprio(struct net_device *dev,
+			  struct tc_mqprio_qopt_offload *mqprio);
+int cxgb4_init_tc_mqprio(struct adapter *adap);
+void cxgb4_cleanup_tc_mqprio(struct adapter *adap);
+#endif /* __CXGB4_TC_MQPRIO_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 86b528d8364c..cce33d279094 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -53,35 +53,6 @@
 
 #define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++)
 
-static int get_msix_idx_from_bmap(struct adapter *adap)
-{
-	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
-	unsigned long flags;
-	unsigned int msix_idx;
-
-	spin_lock_irqsave(&bmap->lock, flags);
-	msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
-	if (msix_idx < bmap->mapsize) {
-		__set_bit(msix_idx, bmap->msix_bmap);
-	} else {
-		spin_unlock_irqrestore(&bmap->lock, flags);
-		return -ENOSPC;
-	}
-
-	spin_unlock_irqrestore(&bmap->lock, flags);
-	return msix_idx;
-}
-
-static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx)
-{
-	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
-	unsigned long flags;
-
-	spin_lock_irqsave(&bmap->lock, flags);
-	__clear_bit(msix_idx, bmap->msix_bmap);
-	spin_unlock_irqrestore(&bmap->lock, flags);
-}
-
 /* Flush the aggregated lro sessions */
 static void uldrx_flush_handler(struct sge_rspq *q)
 {
@@ -138,9 +109,9 @@ static int alloc_uld_rxqs(struct adapter *adap,
 			  struct sge_uld_rxq_info *rxq_info, bool lro)
 {
 	unsigned int nq = rxq_info->nrxq + rxq_info->nciq;
-	int i, err, msi_idx, que_idx = 0, bmap_idx = 0;
 	struct sge_ofld_rxq *q = rxq_info->uldrxq;
 	unsigned short *ids = rxq_info->rspq_id;
+	int i, err, msi_idx, que_idx = 0;
 	struct sge *s = &adap->sge;
 	unsigned int per_chan;
 
@@ -159,12 +130,18 @@ static int alloc_uld_rxqs(struct adapter *adap,
 		}
 
 		if (msi_idx >= 0) {
-			bmap_idx = get_msix_idx_from_bmap(adap);
-			if (bmap_idx < 0) {
+			msi_idx = cxgb4_get_msix_idx_from_bmap(adap);
+			if (msi_idx < 0) {
 				err = -ENOSPC;
 				goto freeout;
 			}
-			msi_idx = adap->msix_info_ulds[bmap_idx].idx;
+
+			snprintf(adap->msix_info[msi_idx].desc,
+				 sizeof(adap->msix_info[msi_idx].desc),
+				 "%s-%s%d",
+				 adap->port[0]->name, rxq_info->name, i);
+
+			q->msix = &adap->msix_info[msi_idx];
 		}
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 				       adap->port[que_idx++ / per_chan],
@@ -175,8 +152,7 @@ static int alloc_uld_rxqs(struct adapter *adap,
 				       0);
 		if (err)
 			goto freeout;
-		if (msi_idx >= 0)
-			rxq_info->msix_tbl[i] = bmap_idx;
+
 		memset(&q->stats, 0, sizeof(q->stats));
 		if (ids)
 			ids[i] = q->rspq.abs_id;
@@ -188,6 +164,8 @@ freeout:
 		if (q->rspq.desc)
 			free_rspq_fl(adap, &q->rspq,
 				     q->fl.size ? &q->fl : NULL);
+		if (q->msix)
+			cxgb4_free_msix_idx_in_bmap(adap, q->msix->idx);
 	}
 	return err;
 }
@@ -198,14 +176,6 @@ setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int i, ret = 0;
 
-	if (adap->flags & CXGB4_USING_MSIX) {
-		rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
-					     sizeof(unsigned short),
-					     GFP_KERNEL);
-		if (!rxq_info->msix_tbl)
-			return -ENOMEM;
-	}
-
 	ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
 
 	/* Tell uP to route control queue completions to rdma rspq */
@@ -261,8 +231,6 @@ static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
 		t4_free_uld_rxqs(adap, rxq_info->nciq,
 				 rxq_info->uldrxq + rxq_info->nrxq);
 	t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq);
-	if (adap->flags & CXGB4_USING_MSIX)
-		kfree(rxq_info->msix_tbl);
 }
 
 static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
@@ -355,13 +323,12 @@ static int
 request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	struct uld_msix_info *minfo;
+	struct msix_info *minfo;
+	unsigned int idx;
 	int err = 0;
-	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
+		minfo = rxq_info->uldrxq[idx].msix;
 		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
 				  minfo->desc,
@@ -376,10 +343,9 @@ request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 
 unwind:
 	while (idx-- > 0) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
+		minfo = rxq_info->uldrxq[idx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
-		free_msix_idx_in_bmap(adap, bmap_idx);
+		cxgb4_free_msix_idx_in_bmap(adap, minfo->idx);
 		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 	return err;
@@ -389,69 +355,45 @@ static void
 free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	struct uld_msix_info *minfo;
-	unsigned int idx, bmap_idx;
+	struct msix_info *minfo;
+	unsigned int idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
-
+		minfo = rxq_info->uldrxq[idx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
-		free_msix_idx_in_bmap(adap, bmap_idx);
+		cxgb4_free_msix_idx_in_bmap(adap, minfo->idx);
 		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 }
 
-static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
+static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int n = sizeof(adap->msix_info_ulds[0].desc);
-	unsigned int idx, bmap_idx;
+	int idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-
-		snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
-			 adap->port[0]->name, rxq_info->name, idx);
-	}
-}
-
-static void enable_rx(struct adapter *adap, struct sge_rspq *q)
-{
-	if (!q)
-		return;
+		struct sge_rspq *q = &rxq_info->uldrxq[idx].rspq;
 
-	if (q->handler)
-		napi_enable(&q->napi);
-
-	/* 0-increment GTS to start the timer and enable interrupts */
-	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
-		     SEINTARM_V(q->intr_params) |
-		     INGRESSQID_V(q->cntxt_id));
-}
+		if (!q)
+			continue;
 
-static void quiesce_rx(struct adapter *adap, struct sge_rspq *q)
-{
-	if (q && q->handler)
-		napi_disable(&q->napi);
+		cxgb4_enable_rx(adap, q);
+	}
 }
 
-static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
+static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int idx;
 
-	for_each_uldrxq(rxq_info, idx)
-		enable_rx(adap, &rxq_info->uldrxq[idx].rspq);
-}
+	for_each_uldrxq(rxq_info, idx) {
+		struct sge_rspq *q = &rxq_info->uldrxq[idx].rspq;
 
-static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
-{
-	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int idx;
+		if (!q)
+			continue;
 
-	for_each_uldrxq(rxq_info, idx)
-		quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
+		cxgb4_quiesce_rx(q);
+	}
 }
 
 static void
@@ -750,7 +692,6 @@ void cxgb4_register_uld(enum cxgb4_uld type,
 		if (ret)
 			goto free_queues;
 		if (adap->flags & CXGB4_USING_MSIX) {
-			name_msix_vecs_uld(adap, type);
 			ret = request_msix_queue_irqs_uld(adap, type);
 			if (ret)
 				goto free_rxq;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index cee582e36134..861b25d28ed6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -89,6 +89,10 @@ union aopen_entry {
 	union aopen_entry *next;
 };
 
+struct eotid_entry {
+	void *data;
+};
+
 /*
  * Holds the size, base address, free list start, etc of the TID, server TID,
  * and active-open TID tables.  The tables themselves are allocated dynamically.
@@ -126,6 +130,12 @@ struct tid_info {
 	unsigned int v6_stids_in_use;
 	unsigned int sftids_in_use;
 
+	/* ETHOFLD range */
+	struct eotid_entry *eotid_tab;
+	unsigned long *eotid_bmap;
+	unsigned int eotid_base;
+	unsigned int neotids;
+
 	/* TIDs in the TCAM */
 	atomic_t tids_in_use;
 	/* TIDs in the HASH */
@@ -176,6 +186,35 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data,
 	atomic_inc(&t->conns_in_use);
 }
 
+static inline struct eotid_entry *cxgb4_lookup_eotid(struct tid_info *t,
+						     u32 eotid)
+{
+	return eotid < t->neotids ? &t->eotid_tab[eotid] : NULL;
+}
+
+static inline int cxgb4_get_free_eotid(struct tid_info *t)
+{
+	int eotid;
+
+	eotid = find_first_zero_bit(t->eotid_bmap, t->neotids);
+	if (eotid >= t->neotids)
+		eotid = -1;
+
+	return eotid;
+}
+
+static inline void cxgb4_alloc_eotid(struct tid_info *t, u32 eotid, void *data)
+{
+	set_bit(eotid, t->eotid_bmap);
+	t->eotid_tab[eotid].data = data;
+}
+
+static inline void cxgb4_free_eotid(struct tid_info *t, u32 eotid)
+{
+	clear_bit(eotid, t->eotid_bmap);
+	t->eotid_tab[eotid].data = NULL;
+}
+
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data);
 int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 1a407d3c1d67..e9e45006632d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -351,15 +351,13 @@ exists:
 static void _t4_l2e_free(struct l2t_entry *e)
 {
 	struct l2t_data *d;
-	struct sk_buff *skb;
 
 	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
 		if (e->neigh) {
 			neigh_release(e->neigh);
 			e->neigh = NULL;
 		}
-		while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-			kfree_skb(skb);
+		__skb_queue_purge(&e->arpq);
 	}
 
 	d = container_of(e, struct l2t_data, l2tab[e->idx]);
@@ -370,7 +368,6 @@ static void _t4_l2e_free(struct l2t_entry *e)
 static void t4_l2e_free(struct l2t_entry *e)
 {
 	struct l2t_data *d;
-	struct sk_buff *skb;
 
 	spin_lock_bh(&e->lock);
 	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
@@ -378,8 +375,7 @@ static void t4_l2e_free(struct l2t_entry *e)
 			neigh_release(e->neigh);
 			e->neigh = NULL;
 		}
-		while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-			kfree_skb(skb);
+		__skb_queue_purge(&e->arpq);
 	}
 	spin_unlock_bh(&e->lock);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index 60218dc676a8..0a98c4dbb36b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -92,45 +92,69 @@ static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg,
 
 		pf = adap->pf;
 		vf = 0;
+
+		err = t4_set_params(adap, adap->mbox, pf, vf, 1,
+				    &fw_param, &fw_class);
+		break;
+	}
+	case SCHED_FLOWC: {
+		struct sched_flowc_entry *fe;
+
+		fe = (struct sched_flowc_entry *)arg;
+
+		fw_class = bind ? fe->param.class : FW_SCHED_CLS_NONE;
+		err = cxgb4_ethofld_send_flowc(adap->port[pi->port_id],
+					       fe->param.tid, fw_class);
 		break;
 	}
 	default:
 		err = -ENOTSUPP;
-		goto out;
+		break;
 	}
 
-	err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class);
-
-out:
 	return err;
 }
 
-static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
-						 const unsigned int qid,
-						 int *index)
+static void *t4_sched_entry_lookup(struct port_info *pi,
+				   enum sched_bind_type type,
+				   const u32 val)
 {
 	struct sched_table *s = pi->sched_tbl;
 	struct sched_class *e, *end;
-	struct sched_class *found = NULL;
-	int i;
+	void *found = NULL;
 
-	/* Look for a class with matching bound queue parameters */
+	/* Look for an entry with matching @val */
 	end = &s->tab[s->sched_size];
 	for (e = &s->tab[0]; e != end; ++e) {
-		struct sched_queue_entry *qe;
-
-		i = 0;
-		if (e->state == SCHED_STATE_UNUSED)
+		if (e->state == SCHED_STATE_UNUSED ||
+		    e->bind_type != type)
 			continue;
 
-		list_for_each_entry(qe, &e->queue_list, list) {
-			if (qe->cntxt_id == qid) {
-				found = e;
-				if (index)
-					*index = i;
-				break;
+		switch (type) {
+		case SCHED_QUEUE: {
+			struct sched_queue_entry *qe;
+
+			list_for_each_entry(qe, &e->entry_list, list) {
+				if (qe->cntxt_id == val) {
+					found = qe;
+					break;
+				}
 			}
-			i++;
+			break;
+		}
+		case SCHED_FLOWC: {
+			struct sched_flowc_entry *fe;
+
+			list_for_each_entry(fe, &e->entry_list, list) {
+				if (fe->param.tid == val) {
+					found = fe;
+					break;
+				}
+			}
+			break;
+		}
+		default:
+			return NULL;
 		}
 
 		if (found)
@@ -142,35 +166,26 @@ static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
 
 static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
 {
-	struct adapter *adap = pi->adapter;
-	struct sched_class *e;
 	struct sched_queue_entry *qe = NULL;
+	struct adapter *adap = pi->adapter;
 	struct sge_eth_txq *txq;
-	unsigned int qid;
-	int index = -1;
+	struct sched_class *e;
 	int err = 0;
 
 	if (p->queue < 0 || p->queue >= pi->nqsets)
 		return -ERANGE;
 
 	txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
-	qid = txq->q.cntxt_id;
-
-	/* Find the existing class that the queue is bound to */
-	e = t4_sched_queue_lookup(pi, qid, &index);
-	if (e && index >= 0) {
-		int i = 0;
 
-		list_for_each_entry(qe, &e->queue_list, list) {
-			if (i == index)
-				break;
-			i++;
-		}
+	/* Find the existing entry that the queue is bound to */
+	qe = t4_sched_entry_lookup(pi, SCHED_QUEUE, txq->q.cntxt_id);
+	if (qe) {
 		err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE,
 					      false);
 		if (err)
 			return err;
 
+		e = &pi->sched_tbl->tab[qe->param.class];
 		list_del(&qe->list);
 		kvfree(qe);
 		if (atomic_dec_and_test(&e->refcnt)) {
@@ -183,11 +198,11 @@ static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
 
 static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
 {
-	struct adapter *adap = pi->adapter;
 	struct sched_table *s = pi->sched_tbl;
-	struct sched_class *e;
 	struct sched_queue_entry *qe = NULL;
+	struct adapter *adap = pi->adapter;
 	struct sge_eth_txq *txq;
+	struct sched_class *e;
 	unsigned int qid;
 	int err = 0;
 
@@ -215,7 +230,8 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
 	if (err)
 		goto out_err;
 
-	list_add_tail(&qe->list, &e->queue_list);
+	list_add_tail(&qe->list, &e->entry_list);
+	e->bind_type = SCHED_QUEUE;
 	atomic_inc(&e->refcnt);
 	return err;
 
@@ -224,6 +240,73 @@ out_err:
 	return err;
 }
 
+static int t4_sched_flowc_unbind(struct port_info *pi, struct ch_sched_flowc *p)
+{
+	struct sched_flowc_entry *fe = NULL;
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	int err = 0;
+
+	if (p->tid < 0 || p->tid >= adap->tids.neotids)
+		return -ERANGE;
+
+	/* Find the existing entry that the flowc is bound to */
+	fe = t4_sched_entry_lookup(pi, SCHED_FLOWC, p->tid);
+	if (fe) {
+		err = t4_sched_bind_unbind_op(pi, (void *)fe, SCHED_FLOWC,
+					      false);
+		if (err)
+			return err;
+
+		e = &pi->sched_tbl->tab[fe->param.class];
+		list_del(&fe->list);
+		kvfree(fe);
+		if (atomic_dec_and_test(&e->refcnt)) {
+			e->state = SCHED_STATE_UNUSED;
+			memset(&e->info, 0, sizeof(e->info));
+		}
+	}
+	return err;
+}
+
+static int t4_sched_flowc_bind(struct port_info *pi, struct ch_sched_flowc *p)
+{
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_flowc_entry *fe = NULL;
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	int err = 0;
+
+	if (p->tid < 0 || p->tid >= adap->tids.neotids)
+		return -ERANGE;
+
+	fe = kvzalloc(sizeof(*fe), GFP_KERNEL);
+	if (!fe)
+		return -ENOMEM;
+
+	/* Unbind flowc from any existing class */
+	err = t4_sched_flowc_unbind(pi, p);
+	if (err)
+		goto out_err;
+
+	/* Bind flowc to specified class */
+	memcpy(&fe->param, p, sizeof(fe->param));
+
+	e = &s->tab[fe->param.class];
+	err = t4_sched_bind_unbind_op(pi, (void *)fe, SCHED_FLOWC, true);
+	if (err)
+		goto out_err;
+
+	list_add_tail(&fe->list, &e->entry_list);
+	e->bind_type = SCHED_FLOWC;
+	atomic_inc(&e->refcnt);
+	return err;
+
+out_err:
+	kvfree(fe);
+	return err;
+}
+
 static void t4_sched_class_unbind_all(struct port_info *pi,
 				      struct sched_class *e,
 				      enum sched_bind_type type)
@@ -235,10 +318,17 @@ static void t4_sched_class_unbind_all(struct port_info *pi,
 	case SCHED_QUEUE: {
 		struct sched_queue_entry *qe;
 
-		list_for_each_entry(qe, &e->queue_list, list)
+		list_for_each_entry(qe, &e->entry_list, list)
 			t4_sched_queue_unbind(pi, &qe->param);
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct sched_flowc_entry *fe;
+
+		list_for_each_entry(fe, &e->entry_list, list)
+			t4_sched_flowc_unbind(pi, &fe->param);
+		break;
+	}
 	default:
 		break;
 	}
@@ -262,6 +352,15 @@ static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg,
 			err = t4_sched_queue_unbind(pi, qe);
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		if (bind)
+			err = t4_sched_flowc_bind(pi, fe);
+		else
+			err = t4_sched_flowc_unbind(pi, fe);
+		break;
+	}
 	default:
 		err = -ENOTSUPP;
 		break;
@@ -299,6 +398,12 @@ int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
 		class_id = qe->class;
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		class_id = fe->class;
+		break;
+	}
 	default:
 		return -ENOTSUPP;
 	}
@@ -340,6 +445,12 @@ int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
 		class_id = qe->class;
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		class_id = fe->class;
+		break;
+	}
 	default:
 		return -ENOTSUPP;
 	}
@@ -355,10 +466,13 @@ static struct sched_class *t4_sched_class_lookup(struct port_info *pi,
 						const struct ch_sched_params *p)
 {
 	struct sched_table *s = pi->sched_tbl;
-	struct sched_class *e, *end;
 	struct sched_class *found = NULL;
+	struct sched_class *e, *end;
 
-	if (!p) {
+	/* Only allow tc to be shared among SCHED_FLOWC types. For
+	 * other types, always allocate a new tc.
+	 */
+	if (!p || p->u.params.mode != SCHED_CLASS_MODE_FLOW) {
 		/* Get any available unused class */
 		end = &s->tab[s->sched_size];
 		for (e = &s->tab[0]; e != end; ++e) {
@@ -467,9 +581,32 @@ struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
 	return t4_sched_class_alloc(pi, p);
 }
 
-static void t4_sched_class_free(struct port_info *pi, struct sched_class *e)
+/**
+ * cxgb4_sched_class_free - free a scheduling class
+ * @dev: net_device pointer
+ * @e: scheduling class
+ *
+ * Frees a scheduling class if there are no users.
+ */
+void cxgb4_sched_class_free(struct net_device *dev, u8 classid)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e;
+
+	e = &s->tab[classid];
+	if (!atomic_read(&e->refcnt)) {
+		e->state = SCHED_STATE_UNUSED;
+		memset(&e->info, 0, sizeof(e->info));
+	}
+}
+
+static void t4_sched_class_free(struct net_device *dev, struct sched_class *e)
 {
-	t4_sched_class_unbind_all(pi, e, SCHED_QUEUE);
+	struct port_info *pi = netdev2pinfo(dev);
+
+	t4_sched_class_unbind_all(pi, e, e->bind_type);
+	cxgb4_sched_class_free(dev, e->idx);
 }
 
 struct sched_table *t4_init_sched(unsigned int sched_size)
@@ -487,7 +624,7 @@ struct sched_table *t4_init_sched(unsigned int sched_size)
 		memset(&s->tab[i], 0, sizeof(struct sched_class));
 		s->tab[i].idx = i;
 		s->tab[i].state = SCHED_STATE_UNUSED;
-		INIT_LIST_HEAD(&s->tab[i].queue_list);
+		INIT_LIST_HEAD(&s->tab[i].entry_list);
 		atomic_set(&s->tab[i].refcnt, 0);
 	}
 	return s;
@@ -510,7 +647,7 @@ void t4_cleanup_sched(struct adapter *adap)
 
 			e = &s->tab[i];
 			if (e->state == SCHED_STATE_ACTIVE)
-				t4_sched_class_free(pi, e);
+				t4_sched_class_free(adap->port[j], e);
 		}
 		kvfree(s);
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 168fb4ce3759..80bed8e59362 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -56,6 +56,7 @@ enum sched_fw_ops {
 
 enum sched_bind_type {
 	SCHED_QUEUE,
+	SCHED_FLOWC,
 };
 
 struct sched_queue_entry {
@@ -64,11 +65,17 @@ struct sched_queue_entry {
 	struct ch_sched_queue param;
 };
 
+struct sched_flowc_entry {
+	struct list_head list;
+	struct ch_sched_flowc param;
+};
+
 struct sched_class {
 	u8 state;
 	u8 idx;
 	struct ch_sched_params info;
-	struct list_head queue_list;
+	enum sched_bind_type bind_type;
+	struct list_head entry_list;
 	atomic_t refcnt;
 };
 
@@ -102,6 +109,7 @@ int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
 
 struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
 					    struct ch_sched_params *p);
+void cxgb4_sched_class_free(struct net_device *dev, u8 classid);
 
 struct sched_table *t4_init_sched(unsigned int size);
 void t4_cleanup_sched(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 928bfea5457b..e346830ebca9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -55,6 +55,8 @@
 #include "t4fw_api.h"
 #include "cxgb4_ptp.h"
 #include "cxgb4_uld.h"
+#include "cxgb4_tc_mqprio.h"
+#include "sched.h"
 
 /*
  * Rx buffer size.  We use largish buffers if possible but settle for single
@@ -269,7 +271,6 @@ out_err:
 }
 EXPORT_SYMBOL(cxgb4_map_skb);
 
-#ifdef CONFIG_NEED_DMA_MAP_STATE
 static void unmap_skb(struct device *dev, const struct sk_buff *skb,
 		      const dma_addr_t *addr)
 {
@@ -284,6 +285,7 @@ static void unmap_skb(struct device *dev, const struct sk_buff *skb,
 		dma_unmap_page(dev, *addr++, skb_frag_size(fp), DMA_TO_DEVICE);
 }
 
+#ifdef CONFIG_NEED_DMA_MAP_STATE
 /**
  *	deferred_unmap_destructor - unmap a packet when it is freed
  *	@skb: the packet
@@ -1309,6 +1311,35 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 	tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len));
 }
 
+static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb,
+				 struct cpl_tx_pkt_lso_core *lso)
+{
+	int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
+	int l3hdr_len = skb_network_header_len(skb);
+	const struct skb_shared_info *ssi;
+	bool ipv6 = false;
+
+	ssi = skb_shinfo(skb);
+	if (ssi->gso_type & SKB_GSO_TCPV6)
+		ipv6 = true;
+
+	lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
+			      LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
+			      LSO_IPV6_V(ipv6) |
+			      LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
+			      LSO_IPHDR_LEN_V(l3hdr_len / 4) |
+			      LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
+	lso->ipid_ofst = htons(0);
+	lso->mss = htons(ssi->gso_size);
+	lso->seqno_offset = htonl(0);
+	if (is_t4(adap->params.chip))
+		lso->len = htonl(skb->len);
+	else
+		lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
+
+	return (void *)(lso + 1);
+}
+
 /**
  *	t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
  *	@adap: the adapter
@@ -1347,6 +1378,31 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
 	return reclaimed;
 }
 
+static inline int cxgb4_validate_skb(struct sk_buff *skb,
+				     struct net_device *dev,
+				     u32 min_pkt_len)
+{
+	u32 max_pkt_len;
+
+	/* The chip min packet length is 10 octets but some firmware
+	 * commands have a minimum packet length requirement. So, play
+	 * safe and reject anything shorter than @min_pkt_len.
+	 */
+	if (unlikely(skb->len < min_pkt_len))
+		return -EINVAL;
+
+	/* Discard the packet if the length is greater than mtu */
+	max_pkt_len = ETH_HLEN + dev->mtu;
+
+	if (skb_vlan_tagged(skb))
+		max_pkt_len += VLAN_HLEN;
+
+	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  *	cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *	@skb: the packet
@@ -1356,41 +1412,24 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
  */
 static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	u32 wr_mid, ctrl0, op;
-	u64 cntrl, *end, *sgl;
-	int qidx, credits;
-	unsigned int flits, ndesc;
-	struct adapter *adap;
-	struct sge_eth_txq *q;
-	const struct port_info *pi;
+	enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
+	bool ptp_enabled = is_ptp_enabled(skb, dev);
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	const struct skb_shared_info *ssi;
 	struct fw_eth_tx_pkt_wr *wr;
 	struct cpl_tx_pkt_core *cpl;
-	const struct skb_shared_info *ssi;
-	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	int len, qidx, credits, ret;
+	const struct port_info *pi;
+	unsigned int flits, ndesc;
 	bool immediate = false;
-	int len, max_pkt_len;
-	bool ptp_enabled = is_ptp_enabled(skb, dev);
+	u32 wr_mid, ctrl0, op;
+	u64 cntrl, *end, *sgl;
+	struct sge_eth_txq *q;
 	unsigned int chip_ver;
-	enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
-
-#ifdef CONFIG_CHELSIO_T4_FCOE
-	int err;
-#endif /* CONFIG_CHELSIO_T4_FCOE */
-
-	/*
-	 * The chip min packet length is 10 octets but play safe and reject
-	 * anything shorter than an Ethernet header.
-	 */
-	if (unlikely(skb->len < ETH_HLEN)) {
-out_free:	dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
+	struct adapter *adap;
 
-	/* Discard the packet if the length is greater than mtu */
-	max_pkt_len = ETH_HLEN + dev->mtu;
-	if (skb_vlan_tagged(skb))
-		max_pkt_len += VLAN_HLEN;
-	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+	ret = cxgb4_validate_skb(skb, dev, ETH_HLEN);
+	if (ret)
 		goto out_free;
 
 	pi = netdev_priv(dev);
@@ -1421,8 +1460,8 @@ out_free:	dev_kfree_skb_any(skb);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
-	err = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
-	if (unlikely(err == -ENOTSUPP)) {
+	ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
+	if (unlikely(ret == -ENOTSUPP)) {
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
 		goto out_free;
@@ -1490,9 +1529,6 @@ out_free:	dev_kfree_skb_any(skb);
 	len += sizeof(*cpl);
 	if (ssi->gso_size) {
 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
-		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
-		int l3hdr_len = skb_network_header_len(skb);
-		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
 		struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1);
 
 		if (tnl_type)
@@ -1519,30 +1555,8 @@ out_free:	dev_kfree_skb_any(skb);
 			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				cntrl = hwcsum(adap->params.chip, skb);
 		} else {
-			lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
-					LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
-					LSO_IPV6_V(v6) |
-					LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
-					LSO_IPHDR_LEN_V(l3hdr_len / 4) |
-					LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
-			lso->ipid_ofst = htons(0);
-			lso->mss = htons(ssi->gso_size);
-			lso->seqno_offset = htonl(0);
-			if (is_t4(adap->params.chip))
-				lso->len = htonl(skb->len);
-			else
-				lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
-			cpl = (void *)(lso + 1);
-
-			if (CHELSIO_CHIP_VERSION(adap->params.chip)
-			    <= CHELSIO_T5)
-				cntrl =	TXPKT_ETHHDR_LEN_V(eth_xtra_len);
-			else
-				cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
-
-			cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
-				 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
-				 TXPKT_IPHDR_LEN_V(l3hdr_len);
+			cpl = write_tso_wr(adap, skb, lso);
+			cntrl = hwcsum(adap->params.chip, skb);
 		}
 		sgl = (u64 *)(cpl + 1); /* sgl start here */
 		if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
@@ -1622,6 +1636,10 @@ out_free:	dev_kfree_skb_any(skb);
 	if (ptp_enabled)
 		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
+
+out_free:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
 }
 
 /* Constants ... */
@@ -1710,32 +1728,25 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 	dma_addr_t addr[MAX_SKB_FRAGS + 1];
 	const struct skb_shared_info *ssi;
 	struct fw_eth_tx_pkt_vm_wr *wr;
-	int qidx, credits, max_pkt_len;
 	struct cpl_tx_pkt_core *cpl;
 	const struct port_info *pi;
 	unsigned int flits, ndesc;
 	struct sge_eth_txq *txq;
 	struct adapter *adapter;
+	int qidx, credits, ret;
+	size_t fw_hdr_copy_len;
 	u64 cntrl, *end;
 	u32 wr_mid;
-	const size_t fw_hdr_copy_len = sizeof(wr->ethmacdst) +
-				       sizeof(wr->ethmacsrc) +
-				       sizeof(wr->ethtype) +
-				       sizeof(wr->vlantci);
 
 	/* The chip minimum packet length is 10 octets but the firmware
 	 * command that we are using requires that we copy the Ethernet header
 	 * (including the VLAN tag) into the header so we reject anything
 	 * smaller than that ...
 	 */
-	if (unlikely(skb->len < fw_hdr_copy_len))
-		goto out_free;
-
-	/* Discard the packet if the length is greater than mtu */
-	max_pkt_len = ETH_HLEN + dev->mtu;
-	if (skb_vlan_tag_present(skb))
-		max_pkt_len += VLAN_HLEN;
-	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+	fw_hdr_copy_len = sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
+			  sizeof(wr->ethtype) + sizeof(wr->vlantci);
+	ret = cxgb4_validate_skb(skb, dev, fw_hdr_copy_len);
+	if (ret)
 		goto out_free;
 
 	/* Figure out which TX Queue we're going to use. */
@@ -1991,34 +2002,451 @@ out_free:
 	return NETDEV_TX_OK;
 }
 
+/**
+ * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
+ * @q: the SGE control Tx queue
+ *
+ * This is a variant of cxgb4_reclaim_completed_tx() that is used
+ * for Tx queues that send only immediate data (presently just
+ * the control queues) and	thus do not have any sk_buffs to release.
+ */
+static inline void reclaim_completed_tx_imm(struct sge_txq *q)
+{
+	int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
+	int reclaim = hw_cidx - q->cidx;
+
+	if (reclaim < 0)
+		reclaim += q->size;
+
+	q->in_use -= reclaim;
+	q->cidx = hw_cidx;
+}
+
+static inline void eosw_txq_advance_index(u32 *idx, u32 n, u32 max)
+{
+	u32 val = *idx + n;
+
+	if (val >= max)
+		val -= max;
+
+	*idx = val;
+}
+
+void cxgb4_eosw_txq_free_desc(struct adapter *adap,
+			      struct sge_eosw_txq *eosw_txq, u32 ndesc)
+{
+	struct sge_eosw_desc *d;
+
+	d = &eosw_txq->desc[eosw_txq->last_cidx];
+	while (ndesc--) {
+		if (d->skb) {
+			if (d->addr[0]) {
+				unmap_skb(adap->pdev_dev, d->skb, d->addr);
+				memset(d->addr, 0, sizeof(d->addr));
+			}
+			dev_consume_skb_any(d->skb);
+			d->skb = NULL;
+		}
+		eosw_txq_advance_index(&eosw_txq->last_cidx, 1,
+				       eosw_txq->ndesc);
+		d = &eosw_txq->desc[eosw_txq->last_cidx];
+	}
+}
+
+static inline void eosw_txq_advance(struct sge_eosw_txq *eosw_txq, u32 n)
+{
+	eosw_txq_advance_index(&eosw_txq->pidx, n, eosw_txq->ndesc);
+	eosw_txq->inuse += n;
+}
+
+static inline int eosw_txq_enqueue(struct sge_eosw_txq *eosw_txq,
+				   struct sk_buff *skb)
+{
+	if (eosw_txq->inuse == eosw_txq->ndesc)
+		return -ENOMEM;
+
+	eosw_txq->desc[eosw_txq->pidx].skb = skb;
+	return 0;
+}
+
+static inline struct sk_buff *eosw_txq_peek(struct sge_eosw_txq *eosw_txq)
+{
+	return eosw_txq->desc[eosw_txq->last_pidx].skb;
+}
+
+static inline u8 ethofld_calc_tx_flits(struct adapter *adap,
+				       struct sk_buff *skb, u32 hdr_len)
+{
+	u8 flits, nsgl = 0;
+	u32 wrlen;
+
+	wrlen = sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core);
+	if (skb_shinfo(skb)->gso_size)
+		wrlen += sizeof(struct cpl_tx_pkt_lso_core);
+
+	wrlen += roundup(hdr_len, 16);
+
+	/* Packet headers + WR + CPLs */
+	flits = DIV_ROUND_UP(wrlen, 8);
+
+	if (skb_shinfo(skb)->nr_frags > 0)
+		nsgl = sgl_len(skb_shinfo(skb)->nr_frags);
+	else if (skb->len - hdr_len)
+		nsgl = sgl_len(1);
+
+	return flits + nsgl;
+}
+
+static inline void *write_eo_wr(struct adapter *adap,
+				struct sge_eosw_txq *eosw_txq,
+				struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+				u32 hdr_len, u32 wrlen)
+{
+	const struct skb_shared_info *ssi = skb_shinfo(skb);
+	struct cpl_tx_pkt_core *cpl;
+	u32 immd_len, wrlen16;
+	bool compl = false;
+
+	wrlen16 = DIV_ROUND_UP(wrlen, 16);
+	immd_len = sizeof(struct cpl_tx_pkt_core);
+	if (skb_shinfo(skb)->gso_size) {
+		if (skb->encapsulation &&
+		    CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5)
+			immd_len += sizeof(struct cpl_tx_tnl_lso);
+		else
+			immd_len += sizeof(struct cpl_tx_pkt_lso_core);
+	}
+	immd_len += hdr_len;
+
+	if (!eosw_txq->ncompl ||
+	    eosw_txq->last_compl >= adap->params.ofldq_wr_cred / 2) {
+		compl = true;
+		eosw_txq->ncompl++;
+		eosw_txq->last_compl = 0;
+	}
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) |
+				     FW_ETH_TX_EO_WR_IMMDLEN_V(immd_len) |
+				     FW_WR_COMPL_V(compl));
+	wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16_V(wrlen16) |
+					 FW_WR_FLOWID_V(eosw_txq->hwtid));
+	wr->r3 = 0;
+	wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
+	wr->u.tcpseg.ethlen = skb_network_offset(skb);
+	wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
+	wr->u.tcpseg.tcplen = tcp_hdrlen(skb);
+	wr->u.tcpseg.tsclk_tsoff = 0;
+	wr->u.tcpseg.r4 = 0;
+	wr->u.tcpseg.r5 = 0;
+	wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len);
+
+	if (ssi->gso_size) {
+		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+
+		wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size);
+		cpl = write_tso_wr(adap, skb, lso);
+	} else {
+		wr->u.tcpseg.mss = cpu_to_be16(0xffff);
+		cpl = (void *)(wr + 1);
+	}
+
+	eosw_txq->cred -= wrlen16;
+	eosw_txq->last_compl += wrlen16;
+	return cpl;
+}
+
+static void ethofld_hard_xmit(struct net_device *dev,
+			      struct sge_eosw_txq *eosw_txq)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u32 wrlen, wrlen16, hdr_len, data_len;
+	enum sge_eosw_state next_state;
+	u64 cntrl, *start, *end, *sgl;
+	struct sge_eohw_txq *eohw_txq;
+	struct cpl_tx_pkt_core *cpl;
+	struct fw_eth_tx_eo_wr *wr;
+	bool skip_eotx_wr = false;
+	struct sge_eosw_desc *d;
+	struct sk_buff *skb;
+	u8 flits, ndesc;
+	int left;
+
+	eohw_txq = &adap->sge.eohw_txq[eosw_txq->hwqid];
+	spin_lock(&eohw_txq->lock);
+	reclaim_completed_tx_imm(&eohw_txq->q);
+
+	d = &eosw_txq->desc[eosw_txq->last_pidx];
+	skb = d->skb;
+	skb_tx_timestamp(skb);
+
+	wr = (struct fw_eth_tx_eo_wr *)&eohw_txq->q.desc[eohw_txq->q.pidx];
+	if (unlikely(eosw_txq->state != CXGB4_EO_STATE_ACTIVE &&
+		     eosw_txq->last_pidx == eosw_txq->flowc_idx)) {
+		hdr_len = skb->len;
+		data_len = 0;
+		flits = DIV_ROUND_UP(hdr_len, 8);
+		if (eosw_txq->state == CXGB4_EO_STATE_FLOWC_OPEN_SEND)
+			next_state = CXGB4_EO_STATE_FLOWC_OPEN_REPLY;
+		else
+			next_state = CXGB4_EO_STATE_FLOWC_CLOSE_REPLY;
+		skip_eotx_wr = true;
+	} else {
+		hdr_len = eth_get_headlen(dev, skb->data, skb_headlen(skb));
+		data_len = skb->len - hdr_len;
+		flits = ethofld_calc_tx_flits(adap, skb, hdr_len);
+	}
+	ndesc = flits_to_desc(flits);
+	wrlen = flits * 8;
+	wrlen16 = DIV_ROUND_UP(wrlen, 16);
+
+	/* If there are no CPL credits, then wait for credits
+	 * to come back and retry again
+	 */
+	if (unlikely(wrlen16 > eosw_txq->cred))
+		goto out_unlock;
+
+	if (unlikely(skip_eotx_wr)) {
+		start = (u64 *)wr;
+		eosw_txq->state = next_state;
+		goto write_wr_headers;
+	}
+
+	cpl = write_eo_wr(adap, eosw_txq, skb, wr, hdr_len, wrlen);
+	cntrl = hwcsum(adap->params.chip, skb);
+	if (skb_vlan_tag_present(skb))
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+
+	cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
+				 TXPKT_INTF_V(pi->tx_chan) |
+				 TXPKT_PF_V(adap->pf));
+	cpl->pack = 0;
+	cpl->len = cpu_to_be16(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	start = (u64 *)(cpl + 1);
+
+write_wr_headers:
+	sgl = (u64 *)inline_tx_skb_header(skb, &eohw_txq->q, (void *)start,
+					  hdr_len);
+	if (data_len) {
+		if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, d->addr))) {
+			memset(d->addr, 0, sizeof(d->addr));
+			eohw_txq->mapping_err++;
+			goto out_unlock;
+		}
+
+		end = (u64 *)wr + flits;
+		if (unlikely(start > sgl)) {
+			left = (u8 *)end - (u8 *)eohw_txq->q.stat;
+			end = (void *)eohw_txq->q.desc + left;
+		}
+
+		if (unlikely((u8 *)sgl >= (u8 *)eohw_txq->q.stat)) {
+			/* If current position is already at the end of the
+			 * txq, reset the current to point to start of the queue
+			 * and update the end ptr as well.
+			 */
+			left = (u8 *)end - (u8 *)eohw_txq->q.stat;
+
+			end = (void *)eohw_txq->q.desc + left;
+			sgl = (void *)eohw_txq->q.desc;
+		}
+
+		cxgb4_write_sgl(skb, &eohw_txq->q, (void *)sgl, end, hdr_len,
+				d->addr);
+	}
+
+	txq_advance(&eohw_txq->q, ndesc);
+	cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc);
+	eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc);
+
+out_unlock:
+	spin_unlock(&eohw_txq->lock);
+}
+
+static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
+{
+	struct sk_buff *skb;
+	int pktcount;
+
+	switch (eosw_txq->state) {
+	case CXGB4_EO_STATE_ACTIVE:
+	case CXGB4_EO_STATE_FLOWC_OPEN_SEND:
+	case CXGB4_EO_STATE_FLOWC_CLOSE_SEND:
+		pktcount = eosw_txq->pidx - eosw_txq->last_pidx;
+		if (pktcount < 0)
+			pktcount += eosw_txq->ndesc;
+		break;
+	case CXGB4_EO_STATE_FLOWC_OPEN_REPLY:
+	case CXGB4_EO_STATE_FLOWC_CLOSE_REPLY:
+	case CXGB4_EO_STATE_CLOSED:
+	default:
+		return;
+	};
+
+	while (pktcount--) {
+		skb = eosw_txq_peek(eosw_txq);
+		if (!skb) {
+			eosw_txq_advance_index(&eosw_txq->last_pidx, 1,
+					       eosw_txq->ndesc);
+			continue;
+		}
+
+		ethofld_hard_xmit(dev, eosw_txq);
+	}
+}
+
+static netdev_tx_t cxgb4_ethofld_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	u32 qid;
+	int ret;
+
+	ret = cxgb4_validate_skb(skb, dev, ETH_HLEN);
+	if (ret)
+		goto out_free;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	qid = skb_get_queue_mapping(skb) - pi->nqsets;
+	eosw_txq = &tc_port_mqprio->eosw_txq[qid];
+	spin_lock_bh(&eosw_txq->lock);
+	if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
+		goto out_unlock;
+
+	ret = eosw_txq_enqueue(eosw_txq, skb);
+	if (ret)
+		goto out_unlock;
+
+	/* SKB is queued for processing until credits are available.
+	 * So, call the destructor now and we'll free the skb later
+	 * after it has been successfully transmitted.
+	 */
+	skb_orphan(skb);
+
+	eosw_txq_advance(eosw_txq, 1);
+	ethofld_xmit(dev, eosw_txq);
+	spin_unlock_bh(&eosw_txq->lock);
+	return NETDEV_TX_OK;
+
+out_unlock:
+	spin_unlock_bh(&eosw_txq->lock);
+out_free:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
+	u16 qid = skb_get_queue_mapping(skb);
 
 	if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
 		return cxgb4_vf_eth_xmit(skb, dev);
 
+	if (unlikely(qid >= pi->nqsets))
+		return cxgb4_ethofld_xmit(skb, dev);
+
 	return cxgb4_eth_xmit(skb, dev);
 }
 
 /**
- *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
- *	@q: the SGE control Tx queue
+ * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc.
+ * @dev - netdevice
+ * @eotid - ETHOFLD tid to bind/unbind
+ * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
  *
- *	This is a variant of cxgb4_reclaim_completed_tx() that is used
- *	for Tx queues that send only immediate data (presently just
- *	the control queues) and	thus do not have any sk_buffs to release.
+ * Send a FLOWC work request to bind an ETHOFLD TID to a traffic class.
+ * If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from
+ * a traffic class.
  */
-static inline void reclaim_completed_tx_imm(struct sge_txq *q)
+int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 {
-	int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
-	int reclaim = hw_cidx - q->cidx;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	enum sge_eosw_state next_state;
+	struct sge_eosw_txq *eosw_txq;
+	u32 len, len16, nparams = 6;
+	struct fw_flowc_wr *flowc;
+	struct eotid_entry *entry;
+	struct sge_ofld_rxq *rxq;
+	struct sk_buff *skb;
+	int ret = 0;
 
-	if (reclaim < 0)
-		reclaim += q->size;
+	len = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval) * nparams;
+	len16 = DIV_ROUND_UP(len, 16);
 
-	q->in_use -= reclaim;
-	q->cidx = hw_cidx;
+	entry = cxgb4_lookup_eotid(&adap->tids, eotid);
+	if (!entry)
+		return -ENOMEM;
+
+	eosw_txq = (struct sge_eosw_txq *)entry->data;
+	if (!eosw_txq)
+		return -ENOMEM;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	spin_lock_bh(&eosw_txq->lock);
+	if (tc != FW_SCHED_CLS_NONE) {
+		if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
+			goto out_unlock;
+
+		next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
+	} else {
+		if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
+			goto out_unlock;
+
+		next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
+	}
+
+	flowc = __skb_put(skb, len);
+	memset(flowc, 0, len);
+
+	rxq = &adap->sge.eohw_rxq[eosw_txq->hwqid];
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(len16) |
+					  FW_WR_FLOWID_V(eosw_txq->hwtid));
+	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+					   FW_FLOWC_WR_NPARAMS_V(nparams) |
+					   FW_WR_COMPL_V(1));
+	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
+	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V(adap->pf));
+	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
+	flowc->mnemval[1].val = cpu_to_be32(pi->tx_chan);
+	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
+	flowc->mnemval[2].val = cpu_to_be32(pi->tx_chan);
+	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
+	flowc->mnemval[3].val = cpu_to_be32(rxq->rspq.abs_id);
+	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+	flowc->mnemval[4].val = cpu_to_be32(tc);
+	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_EOSTATE;
+	flowc->mnemval[5].val = cpu_to_be32(tc == FW_SCHED_CLS_NONE ?
+					    FW_FLOWC_MNEM_EOSTATE_CLOSING :
+					    FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
+
+	eosw_txq->cred -= len16;
+	eosw_txq->ncompl++;
+	eosw_txq->last_compl = 0;
+
+	ret = eosw_txq_enqueue(eosw_txq, skb);
+	if (ret) {
+		dev_consume_skb_any(skb);
+		goto out_unlock;
+	}
+
+	eosw_txq->state = next_state;
+	eosw_txq->flowc_idx = eosw_txq->pidx;
+	eosw_txq_advance(eosw_txq, 1);
+	ethofld_xmit(dev, eosw_txq);
+
+out_unlock:
+	spin_unlock_bh(&eosw_txq->lock);
+	return ret;
 }
 
 /**
@@ -3311,6 +3739,113 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+void cxgb4_ethofld_restart(unsigned long data)
+{
+	struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data;
+	int pktcount;
+
+	spin_lock(&eosw_txq->lock);
+	pktcount = eosw_txq->cidx - eosw_txq->last_cidx;
+	if (pktcount < 0)
+		pktcount += eosw_txq->ndesc;
+
+	if (pktcount) {
+		cxgb4_eosw_txq_free_desc(netdev2adap(eosw_txq->netdev),
+					 eosw_txq, pktcount);
+		eosw_txq->inuse -= pktcount;
+	}
+
+	/* There may be some packets waiting for completions. So,
+	 * attempt to send these packets now.
+	 */
+	ethofld_xmit(eosw_txq->netdev, eosw_txq);
+	spin_unlock(&eosw_txq->lock);
+}
+
+/* cxgb4_ethofld_rx_handler - Process ETHOFLD Tx completions
+ * @q: the response queue that received the packet
+ * @rsp: the response queue descriptor holding the CPL message
+ * @si: the gather list of packet fragments
+ *
+ * Process a ETHOFLD Tx completion. Increment the cidx here, but
+ * free up the descriptors in a tasklet later.
+ */
+int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
+			     const struct pkt_gl *si)
+{
+	u8 opcode = ((const struct rss_header *)rsp)->opcode;
+
+	/* skip RSS header */
+	rsp++;
+
+	if (opcode == CPL_FW4_ACK) {
+		const struct cpl_fw4_ack *cpl;
+		struct sge_eosw_txq *eosw_txq;
+		struct eotid_entry *entry;
+		struct sk_buff *skb;
+		u32 hdr_len, eotid;
+		u8 flits, wrlen16;
+		int credits;
+
+		cpl = (const struct cpl_fw4_ack *)rsp;
+		eotid = CPL_FW4_ACK_FLOWID_G(ntohl(OPCODE_TID(cpl))) -
+			q->adap->tids.eotid_base;
+		entry = cxgb4_lookup_eotid(&q->adap->tids, eotid);
+		if (!entry)
+			goto out_done;
+
+		eosw_txq = (struct sge_eosw_txq *)entry->data;
+		if (!eosw_txq)
+			goto out_done;
+
+		spin_lock(&eosw_txq->lock);
+		credits = cpl->credits;
+		while (credits > 0) {
+			skb = eosw_txq->desc[eosw_txq->cidx].skb;
+			if (!skb)
+				break;
+
+			if (unlikely((eosw_txq->state ==
+				      CXGB4_EO_STATE_FLOWC_OPEN_REPLY ||
+				      eosw_txq->state ==
+				      CXGB4_EO_STATE_FLOWC_CLOSE_REPLY) &&
+				     eosw_txq->cidx == eosw_txq->flowc_idx)) {
+				hdr_len = skb->len;
+				flits = DIV_ROUND_UP(skb->len, 8);
+				if (eosw_txq->state ==
+				    CXGB4_EO_STATE_FLOWC_OPEN_REPLY)
+					eosw_txq->state = CXGB4_EO_STATE_ACTIVE;
+				else
+					eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+				complete(&eosw_txq->completion);
+			} else {
+				hdr_len = eth_get_headlen(eosw_txq->netdev,
+							  skb->data,
+							  skb_headlen(skb));
+				flits = ethofld_calc_tx_flits(q->adap, skb,
+							      hdr_len);
+			}
+			eosw_txq_advance_index(&eosw_txq->cidx, 1,
+					       eosw_txq->ndesc);
+			wrlen16 = DIV_ROUND_UP(flits * 8, 16);
+			credits -= wrlen16;
+		}
+
+		eosw_txq->cred += cpl->credits;
+		eosw_txq->ncompl--;
+
+		spin_unlock(&eosw_txq->lock);
+
+		/* Schedule a tasklet to reclaim SKBs and restart ETHOFLD Tx,
+		 * if there were packets waiting for completion.
+		 */
+		tasklet_schedule(&eosw_txq->qresume_tsk);
+	}
+
+out_done:
+	return 0;
+}
+
 /*
  * The MSI-X interrupt handler for an SGE response queue.
  */
@@ -3912,30 +4447,30 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 	return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 }
 
-int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
-			 struct net_device *dev, unsigned int iqid,
-			 unsigned int uld_type)
+static int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_txq *q,
+				 struct net_device *dev, u32 cmd, u32 iqid)
 {
 	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
-	int ret, nentries;
-	struct fw_eq_ofld_cmd c;
-	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
-	int cmd = FW_EQ_OFLD_CMD;
+	struct sge *s = &adap->sge;
+	struct fw_eq_ofld_cmd c;
+	u32 fb_min, nentries;
+	int ret;
 
 	/* Add status entries */
-	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
-
-	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
-			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
-			NUMA_NO_NODE);
-	if (!txq->q.desc)
+	nentries = q->size + s->stat_len / sizeof(struct tx_desc);
+	q->desc = alloc_ring(adap->pdev_dev, q->size, sizeof(struct tx_desc),
+			     sizeof(struct tx_sw_desc), &q->phys_addr,
+			     &q->sdesc, s->stat_len, NUMA_NO_NODE);
+	if (!q->desc)
 		return -ENOMEM;
 
+	if (chip_ver <= CHELSIO_T5)
+		fb_min = FETCHBURSTMIN_64B_X;
+	else
+		fb_min = FETCHBURSTMIN_64B_T6_X;
+
 	memset(&c, 0, sizeof(c));
-	if (unlikely(uld_type == CXGB4_TX_CRYPTO))
-		cmd = FW_EQ_CTRL_CMD;
 	c.op_to_vfn = htonl(FW_CMD_OP_V(cmd) | FW_CMD_REQUEST_F |
 			    FW_CMD_WRITE_F | FW_CMD_EXEC_F |
 			    FW_EQ_OFLD_CMD_PFN_V(adap->pf) |
@@ -3947,27 +4482,42 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 		      FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
 	c.dcaen_to_eqsize =
-		htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
-					     ? FETCHBURSTMIN_64B_X
-					     : FETCHBURSTMIN_64B_T6_X) |
+		htonl(FW_EQ_OFLD_CMD_FBMIN_V(fb_min) |
 		      FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
 		      FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
-	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
+	c.eqaddr = cpu_to_be64(q->phys_addr);
 
 	ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
 	if (ret) {
-		kfree(txq->q.sdesc);
-		txq->q.sdesc = NULL;
+		kfree(q->sdesc);
+		q->sdesc = NULL;
 		dma_free_coherent(adap->pdev_dev,
 				  nentries * sizeof(struct tx_desc),
-				  txq->q.desc, txq->q.phys_addr);
-		txq->q.desc = NULL;
+				  q->desc, q->phys_addr);
+		q->desc = NULL;
 		return ret;
 	}
 
+	init_txq(adap, q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
+	return 0;
+}
+
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+			 struct net_device *dev, unsigned int iqid,
+			 unsigned int uld_type)
+{
+	u32 cmd = FW_EQ_OFLD_CMD;
+	int ret;
+
+	if (unlikely(uld_type == CXGB4_TX_CRYPTO))
+		cmd = FW_EQ_CTRL_CMD;
+
+	ret = t4_sge_alloc_ofld_txq(adap, &txq->q, dev, cmd, iqid);
+	if (ret)
+		return ret;
+
 	txq->q.q_type = CXGB4_TXQ_ULD;
-	init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
 	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
@@ -3976,6 +4526,25 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 	return 0;
 }
 
+int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq,
+			     struct net_device *dev, u32 iqid)
+{
+	int ret;
+
+	ret = t4_sge_alloc_ofld_txq(adap, &txq->q, dev, FW_EQ_OFLD_CMD, iqid);
+	if (ret)
+		return ret;
+
+	txq->q.q_type = CXGB4_TXQ_ULD;
+	spin_lock_init(&txq->lock);
+	txq->adap = adap;
+	txq->tso = 0;
+	txq->tx_cso = 0;
+	txq->vlan_ins = 0;
+	txq->mapping_err = 0;
+	return 0;
+}
+
 void free_txq(struct adapter *adap, struct sge_txq *q)
 {
 	struct sge *s = &adap->sge;
@@ -4031,6 +4600,17 @@ void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q)
 				     q->fl.size ? &q->fl : NULL);
 }
 
+void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq)
+{
+	if (txq->q.desc) {
+		t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
+				txq->q.cntxt_id);
+		free_tx_desc(adap, &txq->q, txq->q.in_use, false);
+		kfree(txq->q.sdesc);
+		free_txq(adap, &txq->q);
+	}
+}
+
 /**
  *	t4_free_sge_resources - free SGE resources
  *	@adap: the adapter
@@ -4060,6 +4640,10 @@ void t4_free_sge_resources(struct adapter *adap)
 		if (eq->rspq.desc)
 			free_rspq_fl(adap, &eq->rspq,
 				     eq->fl.size ? &eq->fl : NULL);
+		if (eq->msix) {
+			cxgb4_free_msix_idx_in_bmap(adap, eq->msix->idx);
+			eq->msix = NULL;
+		}
 
 		etq = &adap->sge.ethtxq[i];
 		if (etq->q.desc) {
@@ -4086,8 +4670,15 @@ void t4_free_sge_resources(struct adapter *adap)
 		}
 	}
 
-	if (adap->sge.fw_evtq.desc)
+	if (adap->sge.fw_evtq.desc) {
 		free_rspq_fl(adap, &adap->sge.fw_evtq, NULL);
+		if (adap->sge.fwevtq_msix_idx >= 0)
+			cxgb4_free_msix_idx_in_bmap(adap,
+						    adap->sge.fwevtq_msix_idx);
+	}
+
+	if (adap->sge.nd_msix_idx >= 0)
+		cxgb4_free_msix_idx_in_bmap(adap, adap->sge.nd_msix_idx);
 
 	if (adap->sge.intrq.desc)
 		free_rspq_fl(adap, &adap->sge.intrq, NULL);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 38dd41eb959e..575c6abcdae7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1421,6 +1421,11 @@ enum {
 	CPL_FW4_ACK_FLAGS_FLOWC		= 0x4,	/* fw_flowc_wr complete */
 };
 
+#define CPL_FW4_ACK_FLOWID_S    0
+#define CPL_FW4_ACK_FLOWID_M    0xffffff
+#define CPL_FW4_ACK_FLOWID_G(x) \
+	(((x) >> CPL_FW4_ACK_FLOWID_S) & CPL_FW4_ACK_FLOWID_M)
+
 struct cpl_fw6_msg {
 	u8 opcode;
 	u8 type;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 65313f6b5704..414e5cca293e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -87,6 +87,7 @@ enum fw_wr_opcodes {
 	FW_ULPTX_WR                    = 0x04,
 	FW_TP_WR                       = 0x05,
 	FW_ETH_TX_PKT_WR               = 0x08,
+	FW_ETH_TX_EO_WR                = 0x1c,
 	FW_OFLD_CONNECTION_WR          = 0x2f,
 	FW_FLOWC_WR                    = 0x0a,
 	FW_OFLD_TX_DATA_WR             = 0x0b,
@@ -534,6 +535,35 @@ struct fw_eth_tx_pkt_wr {
 	__be64 r3;
 };
 
+enum fw_eth_tx_eo_type {
+	FW_ETH_TX_EO_TYPE_TCPSEG = 1,
+};
+
+struct fw_eth_tx_eo_wr {
+	__be32 op_immdlen;
+	__be32 equiq_to_len16;
+	__be64 r3;
+	union fw_eth_tx_eo {
+		struct fw_eth_tx_eo_tcpseg {
+			__u8   type;
+			__u8   ethlen;
+			__be16 iplen;
+			__u8   tcplen;
+			__u8   tsclk_tsoff;
+			__be16 r4;
+			__be16 mss;
+			__be16 r5;
+			__be32 plen;
+		} tcpseg;
+	} u;
+};
+
+#define FW_ETH_TX_EO_WR_IMMDLEN_S	0
+#define FW_ETH_TX_EO_WR_IMMDLEN_M	0x1ff
+#define FW_ETH_TX_EO_WR_IMMDLEN_V(x)	((x) << FW_ETH_TX_EO_WR_IMMDLEN_S)
+#define FW_ETH_TX_EO_WR_IMMDLEN_G(x)	\
+	(((x) >> FW_ETH_TX_EO_WR_IMMDLEN_S) & FW_ETH_TX_EO_WR_IMMDLEN_M)
+
 struct fw_ofld_connection_wr {
 	__be32 op_compl;
 	__be32 len16_pkd;
@@ -660,6 +690,12 @@ enum fw_flowc_mnem_tcpstate {
 	FW_FLOWC_MNEM_TCPSTATE_TIMEWAIT = 10, /* not expected */
 };
 
+enum fw_flowc_mnem_eostate {
+	FW_FLOWC_MNEM_EOSTATE_ESTABLISHED = 1, /* default */
+	/* graceful close, after sending outstanding payload */
+	FW_FLOWC_MNEM_EOSTATE_CLOSING = 2,
+};
+
 enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_PFNVFN,		/* PFN [15:8] VFN [7:0] */
 	FW_FLOWC_MNEM_CH,
@@ -1134,6 +1170,7 @@ enum fw_caps_config_nic {
 	FW_CAPS_CONFIG_NIC		= 0x00000001,
 	FW_CAPS_CONFIG_NIC_VM		= 0x00000002,
 	FW_CAPS_CONFIG_NIC_HASHFILTER	= 0x00000020,
+	FW_CAPS_CONFIG_NIC_ETHOFLD	= 0x00000040,
 };
 
 enum fw_caps_config_ofld {
@@ -1276,6 +1313,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+	FW_PARAMS_PARAM_DEV_NUM_TM_CLASS = 0x2B,
 	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
 };
 
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 96e9565f1e08..a6f2063f1475 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -90,6 +90,9 @@ struct ftgmac100 {
 	struct mii_bus *mii_bus;
 	struct clk *clk;
 
+	/* AST2500/AST2600 RMII ref clock gate */
+	struct clk *rclk;
+
 	/* Link management */
 	int cur_speed;
 	int cur_duplex;
@@ -1609,7 +1612,7 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct platform_device *pdev = to_platform_device(priv->dev);
-	int phy_intf = PHY_INTERFACE_MODE_RGMII;
+	phy_interface_t phy_intf = PHY_INTERFACE_MODE_RGMII;
 	struct device_node *np = pdev->dev.of_node;
 	int i, err = 0;
 	u32 reg;
@@ -1634,8 +1637,8 @@ static int ftgmac100_setup_mdio(struct net_device *netdev)
 	/* Get PHY mode from device-tree */
 	if (np) {
 		/* Default to RGMII. It's a gigabit part after all */
-		phy_intf = of_get_phy_mode(np);
-		if (phy_intf < 0)
+		err = of_get_phy_mode(np, &phy_intf);
+		if (err)
 			phy_intf = PHY_INTERFACE_MODE_RGMII;
 
 		/* Aspeed only supports these. I don't know about other IP
@@ -1717,20 +1720,41 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
 		   nd->link_up ? "up" : "down");
 }
 
-static void ftgmac100_setup_clk(struct ftgmac100 *priv)
+static int ftgmac100_setup_clk(struct ftgmac100 *priv)
 {
-	priv->clk = devm_clk_get(priv->dev, NULL);
-	if (IS_ERR(priv->clk))
-		return;
+	struct clk *clk;
+	int rc;
 
-	clk_prepare_enable(priv->clk);
+	clk = devm_clk_get(priv->dev, NULL /* MACCLK */);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+	priv->clk = clk;
+	rc = clk_prepare_enable(priv->clk);
+	if (rc)
+		return rc;
 
 	/* Aspeed specifies a 100MHz clock is required for up to
 	 * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz
 	 * is sufficient
 	 */
-	clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
-			FTGMAC_100MHZ);
+	rc = clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
+			  FTGMAC_100MHZ);
+	if (rc)
+		goto cleanup_clk;
+
+	/* RCLK is for RMII, typically used for NCSI. Optional because its not
+	 * necessary if it's the AST2400 MAC, or the MAC is configured for
+	 * RGMII, or the controller is not an ASPEED-based controller.
+	 */
+	priv->rclk = devm_clk_get_optional(priv->dev, "RCLK");
+	rc = clk_prepare_enable(priv->rclk);
+	if (!rc)
+		return 0;
+
+cleanup_clk:
+	clk_disable_unprepare(priv->clk);
+
+	return rc;
 }
 
 static int ftgmac100_probe(struct platform_device *pdev)
@@ -1852,8 +1876,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
 			goto err_setup_mdio;
 	}
 
-	if (priv->is_aspeed)
-		ftgmac100_setup_clk(priv);
+	if (priv->is_aspeed) {
+		err = ftgmac100_setup_clk(priv);
+		if (err)
+			goto err_ncsi_dev;
+	}
 
 	/* Default ring sizes */
 	priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES;
@@ -1885,8 +1912,10 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_ncsi_dev:
 err_register_netdev:
+	clk_disable_unprepare(priv->rclk);
+	clk_disable_unprepare(priv->clk);
+err_ncsi_dev:
 	ftgmac100_destroy_mdio(netdev);
 err_setup_mdio:
 	iounmap(priv->base);
@@ -1908,6 +1937,7 @@ static int ftgmac100_remove(struct platform_device *pdev)
 
 	unregister_netdev(netdev);
 
+	clk_disable_unprepare(priv->rclk);
 	clk_disable_unprepare(priv->clk);
 
 	/* There's a small chance the reset task will have been re-queued,
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index b4b82b9c5cd6..6a9d12dad5d9 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -178,31 +178,9 @@ struct fm_port_fqs {
 /* All the dpa bps in use at any moment */
 static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
 
-/* The raw buffer size must be cacheline aligned */
 #define DPAA_BP_RAW_SIZE 4096
-/* When using more than one buffer pool, the raw sizes are as follows:
- * 1 bp: 4KB
- * 2 bp: 2KB, 4KB
- * 3 bp: 1KB, 2KB, 4KB
- * 4 bp: 1KB, 2KB, 4KB, 8KB
- */
-static inline size_t bpool_buffer_raw_size(u8 index, u8 cnt)
-{
-	size_t res = DPAA_BP_RAW_SIZE / 4;
-	u8 i;
-
-	for (i = (cnt < 3) ? cnt : 3; i < 3 + index; i++)
-		res *= 2;
-	return res;
-}
 
-/* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
- * even stronger (SMP_CACHE_BYTES-aligned), so we just get away with that,
- * via SKB_WITH_OVERHEAD(). We can't rely on netdev_alloc_frag() giving us
- * half-page-aligned buffers, so we reserve some more space for start-of-buffer
- * alignment.
- */
-#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD((raw_size) - SMP_CACHE_BYTES)
+#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
 
 static int dpaa_max_frm;
 
@@ -288,7 +266,7 @@ static int dpaa_stop(struct net_device *net_dev)
 	/* Allow the Fman (Tx) port to process in-flight frames before we
 	 * try switching it off.
 	 */
-	usleep_range(5000, 10000);
+	msleep(200);
 
 	err = mac_dev->stop(mac_dev);
 	if (err < 0)
@@ -305,6 +283,8 @@ static int dpaa_stop(struct net_device *net_dev)
 		phy_disconnect(net_dev->phydev);
 	net_dev->phydev = NULL;
 
+	msleep(200);
+
 	return err;
 }
 
@@ -596,10 +576,7 @@ static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
 
 static void dpaa_bps_free(struct dpaa_priv *priv)
 {
-	int i;
-
-	for (i = 0; i < DPAA_BPS_NUM; i++)
-		dpaa_bp_free(priv->dpaa_bps[i]);
+	dpaa_bp_free(priv->dpaa_bp);
 }
 
 /* Use multiple WQs for FQ assignment:
@@ -773,7 +750,7 @@ static void dpaa_release_channel(void)
 	qman_release_pool(rx_pool_channel);
 }
 
-static void dpaa_eth_add_channel(u16 channel)
+static void dpaa_eth_add_channel(u16 channel, struct device *dev)
 {
 	u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
 	const cpumask_t *cpus = qman_affine_cpus();
@@ -783,6 +760,7 @@ static void dpaa_eth_add_channel(u16 channel)
 	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
 		portal = qman_get_affine_portal(cpu);
 		qman_p_static_dequeue_add(portal, pool);
+		qman_start_using_portal(portal, dev);
 	}
 }
 
@@ -901,7 +879,7 @@ static void dpaa_fq_setup(struct dpaa_priv *priv,
 
 	if (num_portals == 0)
 		dev_err(priv->net_dev->dev.parent,
-			"No Qman software (affine) channels found");
+			"No Qman software (affine) channels found\n");
 
 	/* Initialize each FQ in the list */
 	list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
@@ -1197,15 +1175,15 @@ static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
 	return err;
 }
 
-static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
-				 size_t count, struct dpaa_fq *errq,
+static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp,
+				 struct dpaa_fq *errq,
 				 struct dpaa_fq *defq, struct dpaa_fq *pcdq,
 				 struct dpaa_buffer_layout *buf_layout)
 {
 	struct fman_buffer_prefix_content buf_prefix_content;
 	struct fman_port_rx_params *rx_p;
 	struct fman_port_params params;
-	int i, err;
+	int err;
 
 	memset(&params, 0, sizeof(params));
 	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
@@ -1224,12 +1202,9 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
 		rx_p->pcd_fqs_count = DPAA_ETH_PCD_RXQ_NUM;
 	}
 
-	count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
-	rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
-	for (i = 0; i < count; i++) {
-		rx_p->ext_buf_pools.ext_buf_pool[i].id =  bps[i]->bpid;
-		rx_p->ext_buf_pools.ext_buf_pool[i].size = (u16)bps[i]->size;
-	}
+	rx_p->ext_buf_pools.num_of_pools_used = 1;
+	rx_p->ext_buf_pools.ext_buf_pool[0].id =  bp->bpid;
+	rx_p->ext_buf_pools.ext_buf_pool[0].size = (u16)bp->size;
 
 	err = fman_port_config(port, &params);
 	if (err) {
@@ -1252,7 +1227,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
 }
 
 static int dpaa_eth_init_ports(struct mac_device *mac_dev,
-			       struct dpaa_bp **bps, size_t count,
+			       struct dpaa_bp *bp,
 			       struct fm_port_fqs *port_fqs,
 			       struct dpaa_buffer_layout *buf_layout,
 			       struct device *dev)
@@ -1266,7 +1241,7 @@ static int dpaa_eth_init_ports(struct mac_device *mac_dev,
 	if (err)
 		return err;
 
-	err = dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+	err = dpaa_eth_init_rx_port(rxport, bp, port_fqs->rx_errq,
 				    port_fqs->rx_defq, port_fqs->rx_pcdq,
 				    &buf_layout[RX]);
 
@@ -1335,15 +1310,16 @@ static void dpaa_fd_release(const struct net_device *net_dev,
 		vaddr = phys_to_virt(qm_fd_addr(fd));
 		sgt = vaddr + qm_fd_get_offset(fd);
 
-		dma_unmap_single(dpaa_bp->dev, qm_fd_addr(fd), dpaa_bp->size,
-				 DMA_FROM_DEVICE);
+		dma_unmap_page(dpaa_bp->priv->rx_dma_dev, qm_fd_addr(fd),
+			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
 
 		dpaa_release_sgt_members(sgt);
 
-		addr = dma_map_single(dpaa_bp->dev, vaddr, dpaa_bp->size,
-				      DMA_FROM_DEVICE);
-		if (dma_mapping_error(dpaa_bp->dev, addr)) {
-			dev_err(dpaa_bp->dev, "DMA mapping failed");
+		addr = dma_map_page(dpaa_bp->priv->rx_dma_dev,
+				    virt_to_page(vaddr), 0, DPAA_BP_RAW_SIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(dpaa_bp->priv->rx_dma_dev, addr)) {
+			netdev_err(net_dev, "DMA mapping failed\n");
 			return;
 		}
 		bm_buffer_set64(&bmb, addr);
@@ -1396,7 +1372,7 @@ static void count_ern(struct dpaa_percpu_priv *percpu_priv,
 static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
 			       struct sk_buff *skb,
 			       struct qm_fd *fd,
-			       char *parse_results)
+			       void *parse_results)
 {
 	struct fman_prs_result *parse_result;
 	u16 ethertype = ntohs(skb->protocol);
@@ -1488,25 +1464,24 @@ return_error:
 
 static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
 {
-	struct device *dev = dpaa_bp->dev;
+	struct net_device *net_dev = dpaa_bp->priv->net_dev;
 	struct bm_buffer bmb[8];
 	dma_addr_t addr;
-	void *new_buf;
+	struct page *p;
 	u8 i;
 
 	for (i = 0; i < 8; i++) {
-		new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
-		if (unlikely(!new_buf)) {
-			dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
-				dpaa_bp->raw_size);
+		p = dev_alloc_pages(0);
+		if (unlikely(!p)) {
+			netdev_err(net_dev, "dev_alloc_pages() failed\n");
 			goto release_previous_buffs;
 		}
-		new_buf = PTR_ALIGN(new_buf, SMP_CACHE_BYTES);
 
-		addr = dma_map_single(dev, new_buf,
-				      dpaa_bp->size, DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(dev, addr))) {
-			dev_err(dpaa_bp->dev, "DMA map failed");
+		addr = dma_map_page(dpaa_bp->priv->rx_dma_dev, p, 0,
+				    DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(dpaa_bp->priv->rx_dma_dev,
+					       addr))) {
+			netdev_err(net_dev, "DMA map failed\n");
 			goto release_previous_buffs;
 		}
 
@@ -1581,17 +1556,16 @@ static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
 {
 	struct dpaa_bp *dpaa_bp;
 	int *countptr;
-	int res, i;
+	int res;
+
+	dpaa_bp = priv->dpaa_bp;
+	if (!dpaa_bp)
+		return -EINVAL;
+	countptr = this_cpu_ptr(dpaa_bp->percpu_count);
+	res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
+	if (res)
+		return res;
 
-	for (i = 0; i < DPAA_BPS_NUM; i++) {
-		dpaa_bp = priv->dpaa_bps[i];
-		if (!dpaa_bp)
-			return -EINVAL;
-		countptr = this_cpu_ptr(dpaa_bp->percpu_count);
-		res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
-		if (res)
-			return res;
-	}
 	return 0;
 }
 
@@ -1600,68 +1574,74 @@ static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
  * Skb freeing is not handled here.
  *
  * This function may be called on error paths in the Tx function, so guard
- * against cases when not all fd relevant fields were filled in.
+ * against cases when not all fd relevant fields were filled in. To avoid
+ * reading the invalid transmission timestamp for the error paths set ts to
+ * false.
  *
  * Return the skb backpointer, since for S/G frames the buffer containing it
  * gets freed here.
  */
 static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
-					  const struct qm_fd *fd)
+					  const struct qm_fd *fd, bool ts)
 {
 	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
 	struct device *dev = priv->net_dev->dev.parent;
 	struct skb_shared_hwtstamps shhwtstamps;
 	dma_addr_t addr = qm_fd_addr(fd);
+	void *vaddr = phys_to_virt(addr);
 	const struct qm_sg_entry *sgt;
-	struct sk_buff **skbh, *skb;
-	int nr_frags, i;
+	struct sk_buff *skb;
 	u64 ns;
-
-	skbh = (struct sk_buff **)phys_to_virt(addr);
-	skb = *skbh;
-
-	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
-		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-
-		if (!fman_port_get_tstamp(priv->mac_dev->port[TX], (void *)skbh,
-					  &ns)) {
-			shhwtstamps.hwtstamp = ns_to_ktime(ns);
-			skb_tstamp_tx(skb, &shhwtstamps);
-		} else {
-			dev_warn(dev, "fman_port_get_tstamp failed!\n");
-		}
-	}
+	int i;
 
 	if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
-		nr_frags = skb_shinfo(skb)->nr_frags;
-		dma_unmap_single(dev, addr,
-				 qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
-				 dma_dir);
+		dma_unmap_page(priv->tx_dma_dev, addr,
+			       qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
+			       dma_dir);
 
 		/* The sgt buffer has been allocated with netdev_alloc_frag(),
 		 * it's from lowmem.
 		 */
-		sgt = phys_to_virt(addr + qm_fd_get_offset(fd));
+		sgt = vaddr + qm_fd_get_offset(fd);
 
 		/* sgt[0] is from lowmem, was dma_map_single()-ed */
-		dma_unmap_single(dev, qm_sg_addr(&sgt[0]),
+		dma_unmap_single(priv->tx_dma_dev, qm_sg_addr(&sgt[0]),
 				 qm_sg_entry_get_len(&sgt[0]), dma_dir);
 
 		/* remaining pages were mapped with skb_frag_dma_map() */
-		for (i = 1; i <= nr_frags; i++) {
+		for (i = 1; (i < DPAA_SGT_MAX_ENTRIES) &&
+		     !qm_sg_entry_is_final(&sgt[i - 1]); i++) {
 			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
 
-			dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
+			dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[i]),
 				       qm_sg_entry_get_len(&sgt[i]), dma_dir);
 		}
-
-		/* Free the page frag that we allocated on Tx */
-		skb_free_frag(phys_to_virt(addr));
 	} else {
-		dma_unmap_single(dev, addr,
-				 skb_tail_pointer(skb) - (u8 *)skbh, dma_dir);
+		dma_unmap_single(priv->tx_dma_dev, addr,
+				 priv->tx_headroom + qm_fd_get_length(fd),
+				 dma_dir);
+	}
+
+	skb = *(struct sk_buff **)vaddr;
+
+	/* DMA unmapping is required before accessing the HW provided info */
+	if (ts && priv->tx_tstamp &&
+	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+		if (!fman_port_get_tstamp(priv->mac_dev->port[TX], vaddr,
+					  &ns)) {
+			shhwtstamps.hwtstamp = ns_to_ktime(ns);
+			skb_tstamp_tx(skb, &shhwtstamps);
+		} else {
+			dev_warn(dev, "fman_port_get_tstamp failed!\n");
+		}
 	}
 
+	if (qm_fd_get_format(fd) == qm_fd_sg)
+		/* Free the page that we allocated on Tx for the SGT */
+		free_pages((unsigned long)vaddr, 0);
+
 	return skb;
 }
 
@@ -1715,7 +1695,7 @@ static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
 	return skb;
 
 free_buffer:
-	skb_free_frag(vaddr);
+	free_pages((unsigned long)vaddr, 0);
 	return NULL;
 }
 
@@ -1762,8 +1742,8 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 			goto free_buffers;
 
 		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
-		dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
-				 DMA_FROM_DEVICE);
+		dma_unmap_page(priv->rx_dma_dev, sg_addr,
+			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
 		if (!skb) {
 			sz = dpaa_bp->size +
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1815,7 +1795,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 	WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
 
 	/* free the SG table buffer */
-	skb_free_frag(vaddr);
+	free_pages((unsigned long)vaddr, 0);
 
 	return skb;
 
@@ -1832,7 +1812,7 @@ free_buffers:
 	for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
 		sg_addr = qm_sg_addr(&sgt[i]);
 		sg_vaddr = phys_to_virt(sg_addr);
-		skb_free_frag(sg_vaddr);
+		free_pages((unsigned long)sg_vaddr, 0);
 		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
 		if (dpaa_bp) {
 			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
@@ -1843,7 +1823,7 @@ free_buffers:
 			break;
 	}
 	/* free the SGT fragment */
-	skb_free_frag(vaddr);
+	free_pages((unsigned long)vaddr, 0);
 
 	return NULL;
 }
@@ -1853,9 +1833,8 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
 			    int *offset)
 {
 	struct net_device *net_dev = priv->net_dev;
-	struct device *dev = net_dev->dev.parent;
 	enum dma_data_direction dma_dir;
-	unsigned char *buffer_start;
+	unsigned char *buff_start;
 	struct sk_buff **skbh;
 	dma_addr_t addr;
 	int err;
@@ -1864,10 +1843,10 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
 	 * available, so just use that for offset.
 	 */
 	fd->bpid = FSL_DPAA_BPID_INV;
-	buffer_start = skb->data - priv->tx_headroom;
+	buff_start = skb->data - priv->tx_headroom;
 	dma_dir = DMA_TO_DEVICE;
 
-	skbh = (struct sk_buff **)buffer_start;
+	skbh = (struct sk_buff **)buff_start;
 	*skbh = skb;
 
 	/* Enable L3/L4 hardware checksum computation.
@@ -1876,7 +1855,7 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
 	 * need to write into the skb.
 	 */
 	err = dpaa_enable_tx_csum(priv, skb, fd,
-				  ((char *)skbh) + DPAA_TX_PRIV_DATA_SIZE);
+				  buff_start + DPAA_TX_PRIV_DATA_SIZE);
 	if (unlikely(err < 0)) {
 		if (net_ratelimit())
 			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
@@ -1889,9 +1868,9 @@ static int skb_to_contig_fd(struct dpaa_priv *priv,
 	fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
 
 	/* Map the entire buffer size that may be seen by FMan, but no more */
-	addr = dma_map_single(dev, skbh,
-			      skb_tail_pointer(skb) - buffer_start, dma_dir);
-	if (unlikely(dma_mapping_error(dev, addr))) {
+	addr = dma_map_single(priv->tx_dma_dev, buff_start,
+			      priv->tx_headroom + skb->len, dma_dir);
+	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
 		if (net_ratelimit())
 			netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
 		return -EINVAL;
@@ -1907,24 +1886,22 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
 	const int nr_frags = skb_shinfo(skb)->nr_frags;
 	struct net_device *net_dev = priv->net_dev;
-	struct device *dev = net_dev->dev.parent;
 	struct qm_sg_entry *sgt;
 	struct sk_buff **skbh;
-	int i, j, err, sz;
-	void *buffer_start;
+	void *buff_start;
 	skb_frag_t *frag;
 	dma_addr_t addr;
 	size_t frag_len;
-	void *sgt_buf;
-
-	/* get a page frag to store the SGTable */
-	sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
-	sgt_buf = netdev_alloc_frag(sz);
-	if (unlikely(!sgt_buf)) {
-		netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
-			   sz);
+	struct page *p;
+	int i, j, err;
+
+	/* get a page to store the SGTable */
+	p = dev_alloc_pages(0);
+	if (unlikely(!p)) {
+		netdev_err(net_dev, "dev_alloc_pages() failed\n");
 		return -ENOMEM;
 	}
+	buff_start = page_address(p);
 
 	/* Enable L3/L4 hardware checksum computation.
 	 *
@@ -1932,7 +1909,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	 * need to write into the skb.
 	 */
 	err = dpaa_enable_tx_csum(priv, skb, fd,
-				  sgt_buf + DPAA_TX_PRIV_DATA_SIZE);
+				  buff_start + DPAA_TX_PRIV_DATA_SIZE);
 	if (unlikely(err < 0)) {
 		if (net_ratelimit())
 			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
@@ -1941,15 +1918,15 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	}
 
 	/* SGT[0] is used by the linear part */
-	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
+	sgt = (struct qm_sg_entry *)(buff_start + priv->tx_headroom);
 	frag_len = skb_headlen(skb);
 	qm_sg_entry_set_len(&sgt[0], frag_len);
 	sgt[0].bpid = FSL_DPAA_BPID_INV;
 	sgt[0].offset = 0;
-	addr = dma_map_single(dev, skb->data,
+	addr = dma_map_single(priv->tx_dma_dev, skb->data,
 			      skb_headlen(skb), dma_dir);
-	if (unlikely(dma_mapping_error(dev, addr))) {
-		dev_err(dev, "DMA mapping failed");
+	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+		netdev_err(priv->net_dev, "DMA mapping failed\n");
 		err = -EINVAL;
 		goto sg0_map_failed;
 	}
@@ -1960,10 +1937,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 		frag = &skb_shinfo(skb)->frags[i];
 		frag_len = skb_frag_size(frag);
 		WARN_ON(!skb_frag_page(frag));
-		addr = skb_frag_dma_map(dev, frag, 0,
+		addr = skb_frag_dma_map(priv->tx_dma_dev, frag, 0,
 					frag_len, dma_dir);
-		if (unlikely(dma_mapping_error(dev, addr))) {
-			dev_err(dev, "DMA mapping failed");
+		if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+			netdev_err(priv->net_dev, "DMA mapping failed\n");
 			err = -EINVAL;
 			goto sg_map_failed;
 		}
@@ -1979,17 +1956,17 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	/* Set the final bit in the last used entry of the SGT */
 	qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
+	/* set fd offset to priv->tx_headroom */
 	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
 	/* DMA map the SGT page */
-	buffer_start = (void *)sgt - priv->tx_headroom;
-	skbh = (struct sk_buff **)buffer_start;
+	skbh = (struct sk_buff **)buff_start;
 	*skbh = skb;
 
-	addr = dma_map_single(dev, buffer_start,
-			      priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
-	if (unlikely(dma_mapping_error(dev, addr))) {
-		dev_err(dev, "DMA mapping failed");
+	addr = dma_map_page(priv->tx_dma_dev, p, 0,
+			    priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
+	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
+		netdev_err(priv->net_dev, "DMA mapping failed\n");
 		err = -EINVAL;
 		goto sgt_map_failed;
 	}
@@ -2003,11 +1980,11 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 sgt_map_failed:
 sg_map_failed:
 	for (j = 0; j < i; j++)
-		dma_unmap_page(dev, qm_sg_addr(&sgt[j]),
+		dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[j]),
 			       qm_sg_entry_get_len(&sgt[j]), dma_dir);
 sg0_map_failed:
 csum_failed:
-	skb_free_frag(sgt_buf);
+	free_pages((unsigned long)buff_start, 0);
 
 	return err;
 }
@@ -2114,7 +2091,7 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 	if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
 		return NETDEV_TX_OK;
 
-	dpaa_cleanup_tx_fd(priv, &fd);
+	dpaa_cleanup_tx_fd(priv, &fd, false);
 skb_to_fd_failed:
 enomem:
 	percpu_stats->tx_errors++;
@@ -2160,7 +2137,7 @@ static void dpaa_tx_error(struct net_device *net_dev,
 
 	percpu_priv->stats.tx_errors++;
 
-	skb = dpaa_cleanup_tx_fd(priv, fd);
+	skb = dpaa_cleanup_tx_fd(priv, fd, false);
 	dev_kfree_skb(skb);
 }
 
@@ -2200,7 +2177,7 @@ static void dpaa_tx_conf(struct net_device *net_dev,
 
 	percpu_priv->tx_confirm++;
 
-	skb = dpaa_cleanup_tx_fd(priv, fd);
+	skb = dpaa_cleanup_tx_fd(priv, fd, true);
 
 	consume_skb(skb);
 }
@@ -2304,11 +2281,8 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 		return qman_cb_dqrr_consume;
 	}
 
-	dpaa_bp = dpaa_bpid2pool(fd->bpid);
-	if (!dpaa_bp)
-		return qman_cb_dqrr_consume;
-
-	dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
+	dma_unmap_page(dpaa_bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
+		       DMA_FROM_DEVICE);
 
 	/* prefetch the first 64 bytes of the frame or the SGT start */
 	vaddr = phys_to_virt(addr);
@@ -2430,7 +2404,7 @@ static void egress_ern(struct qman_portal *portal,
 	percpu_priv->stats.tx_fifo_errors++;
 	count_ern(percpu_priv, msg);
 
-	skb = dpaa_cleanup_tx_fd(priv, fd);
+	skb = dpaa_cleanup_tx_fd(priv, fd, false);
 	dev_kfree_skb_any(skb);
 }
 
@@ -2663,7 +2637,8 @@ static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
 {
 	dma_addr_t addr = bm_buf_addr(bmb);
 
-	dma_unmap_single(bp->dev, addr, bp->size, DMA_FROM_DEVICE);
+	dma_unmap_page(bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
+		       DMA_FROM_DEVICE);
 
 	skb_free_frag(phys_to_virt(addr));
 }
@@ -2764,21 +2739,46 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
 
 static int dpaa_eth_probe(struct platform_device *pdev)
 {
-	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
 	struct net_device *net_dev = NULL;
+	struct dpaa_bp *dpaa_bp = NULL;
 	struct dpaa_fq *dpaa_fq, *tmp;
 	struct dpaa_priv *priv = NULL;
 	struct fm_port_fqs port_fqs;
 	struct mac_device *mac_dev;
-	int err = 0, i, channel;
+	int err = 0, channel;
 	struct device *dev;
 
-	/* device used for DMA mapping */
-	dev = pdev->dev.parent;
-	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
-	if (err) {
-		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
-		return err;
+	dev = &pdev->dev;
+
+	err = bman_is_probed();
+	if (!err)
+		return -EPROBE_DEFER;
+	if (err < 0) {
+		dev_err(dev, "failing probe due to bman probe error\n");
+		return -ENODEV;
+	}
+	err = qman_is_probed();
+	if (!err)
+		return -EPROBE_DEFER;
+	if (err < 0) {
+		dev_err(dev, "failing probe due to qman probe error\n");
+		return -ENODEV;
+	}
+	err = bman_portals_probed();
+	if (!err)
+		return -EPROBE_DEFER;
+	if (err < 0) {
+		dev_err(dev,
+			"failing probe due to bman portals probe error\n");
+		return -ENODEV;
+	}
+	err = qman_portals_probed();
+	if (!err)
+		return -EPROBE_DEFER;
+	if (err < 0) {
+		dev_err(dev,
+			"failing probe due to qman portals probe error\n");
+		return -ENODEV;
 	}
 
 	/* Allocate this early, so we can store relevant information in
@@ -2801,11 +2801,23 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
 	mac_dev = dpaa_mac_dev_get(pdev);
 	if (IS_ERR(mac_dev)) {
-		dev_err(dev, "dpaa_mac_dev_get() failed\n");
+		netdev_err(net_dev, "dpaa_mac_dev_get() failed\n");
 		err = PTR_ERR(mac_dev);
 		goto free_netdev;
 	}
 
+	/* Devices used for DMA mapping */
+	priv->rx_dma_dev = fman_port_get_device(mac_dev->port[RX]);
+	priv->tx_dma_dev = fman_port_get_device(mac_dev->port[TX]);
+	err = dma_coerce_mask_and_coherent(priv->rx_dma_dev, DMA_BIT_MASK(40));
+	if (!err)
+		err = dma_coerce_mask_and_coherent(priv->tx_dma_dev,
+						   DMA_BIT_MASK(40));
+	if (err) {
+		netdev_err(net_dev, "dma_coerce_mask_and_coherent() failed\n");
+		return err;
+	}
+
 	/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
 	 * we choose conservatively and let the user explicitly set a higher
 	 * MTU via ifconfig. Otherwise, the user may end up with different MTUs
@@ -2822,23 +2834,21 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
 	/* bp init */
-	for (i = 0; i < DPAA_BPS_NUM; i++) {
-		dpaa_bps[i] = dpaa_bp_alloc(dev);
-		if (IS_ERR(dpaa_bps[i])) {
-			err = PTR_ERR(dpaa_bps[i]);
-			goto free_dpaa_bps;
-		}
-		/* the raw size of the buffers used for reception */
-		dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
-		/* avoid runtime computations by keeping the usable size here */
-		dpaa_bps[i]->size = dpaa_bp_size(dpaa_bps[i]->raw_size);
-		dpaa_bps[i]->dev = dev;
-
-		err = dpaa_bp_alloc_pool(dpaa_bps[i]);
-		if (err < 0)
-			goto free_dpaa_bps;
-		priv->dpaa_bps[i] = dpaa_bps[i];
+	dpaa_bp = dpaa_bp_alloc(dev);
+	if (IS_ERR(dpaa_bp)) {
+		err = PTR_ERR(dpaa_bp);
+		goto free_dpaa_bps;
 	}
+	/* the raw size of the buffers used for reception */
+	dpaa_bp->raw_size = DPAA_BP_RAW_SIZE;
+	/* avoid runtime computations by keeping the usable size here */
+	dpaa_bp->size = dpaa_bp_size(dpaa_bp->raw_size);
+	dpaa_bp->priv = priv;
+
+	err = dpaa_bp_alloc_pool(dpaa_bp);
+	if (err < 0)
+		goto free_dpaa_bps;
+	priv->dpaa_bp = dpaa_bp;
 
 	INIT_LIST_HEAD(&priv->dpaa_fq_list);
 
@@ -2864,7 +2874,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	/* Walk the CPUs with affine portals
 	 * and add this pool channel to each's dequeue mask.
 	 */
-	dpaa_eth_add_channel(priv->channel);
+	dpaa_eth_add_channel(priv->channel, &pdev->dev);
 
 	dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
 
@@ -2896,7 +2906,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
 
 	/* All real interfaces need their ports initialized */
-	err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+	err = dpaa_eth_init_ports(mac_dev, dpaa_bp, &port_fqs,
 				  &priv->buf_layout[0], dev);
 	if (err)
 		goto free_dpaa_fqs;
@@ -2955,7 +2965,7 @@ static int dpaa_remove(struct platform_device *pdev)
 	struct device *dev;
 	int err;
 
-	dev = pdev->dev.parent;
+	dev = &pdev->dev;
 	net_dev = dev_get_drvdata(dev);
 
 	priv = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
index f7e59e8db075..fc2cc4c48e06 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
@@ -47,8 +47,6 @@
 /* Total number of Tx queues */
 #define DPAA_ETH_TXQ_NUM	(DPAA_TC_NUM * DPAA_TC_TXQ_NUM)
 
-#define DPAA_BPS_NUM 3 /* number of bpools per interface */
-
 /* More detailed FQ types - used for fine-grained WQ assignments */
 enum dpaa_fq_type {
 	FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
@@ -80,9 +78,11 @@ struct dpaa_fq_cbs {
 	struct qman_fq egress_ern;
 };
 
+struct dpaa_priv;
+
 struct dpaa_bp {
-	/* device used in the DMA mapping operations */
-	struct device *dev;
+	/* used in the DMA mapping operations */
+	struct dpaa_priv *priv;
 	/* current number of buffers in the buffer pool alloted to each CPU */
 	int __percpu *percpu_count;
 	/* all buffers allocated for this pool have this raw size */
@@ -146,13 +146,15 @@ struct dpaa_buffer_layout {
 
 struct dpaa_priv {
 	struct dpaa_percpu_priv __percpu *percpu_priv;
-	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM];
+	struct dpaa_bp *dpaa_bp;
 	/* Store here the needed Tx headroom for convenience and speed
 	 * (even though it can be computed based on the fields of buf_layout)
 	 */
 	u16 tx_headroom;
 	struct net_device *net_dev;
 	struct mac_device *mac_dev;
+	struct device *rx_dma_dev;
+	struct device *tx_dma_dev;
 	struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
 	struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
 
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
index 0d9b185e317f..ee62d25cac81 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
@@ -131,11 +131,9 @@ static ssize_t dpaa_eth_show_bpids(struct device *dev,
 {
 	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
 	ssize_t bytes = 0;
-	int i = 0;
 
-	for (i = 0; i < DPAA_BPS_NUM; i++)
-		bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
-				  priv->dpaa_bps[i]->bpid);
+	bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
+				  priv->dpaa_bp->bpid);
 
 	return bytes;
 }
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 7ce2e99b594d..66d150872d48 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -47,6 +47,8 @@ static const char dpaa_stats_percpu[][ETH_GSTRING_LEN] = {
 	"tx S/G",
 	"tx error",
 	"rx error",
+	"rx dropped",
+	"tx dropped",
 };
 
 static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
@@ -78,10 +80,8 @@ static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
 static int dpaa_get_link_ksettings(struct net_device *net_dev,
 				   struct ethtool_link_ksettings *cmd)
 {
-	if (!net_dev->phydev) {
-		netdev_dbg(net_dev, "phy device not initialized\n");
+	if (!net_dev->phydev)
 		return 0;
-	}
 
 	phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
@@ -93,10 +93,8 @@ static int dpaa_set_link_ksettings(struct net_device *net_dev,
 {
 	int err;
 
-	if (!net_dev->phydev) {
-		netdev_err(net_dev, "phy device not initialized\n");
+	if (!net_dev->phydev)
 		return -ENODEV;
-	}
 
 	err = phy_ethtool_ksettings_set(net_dev->phydev, cmd);
 	if (err < 0)
@@ -140,10 +138,8 @@ static int dpaa_nway_reset(struct net_device *net_dev)
 {
 	int err;
 
-	if (!net_dev->phydev) {
-		netdev_err(net_dev, "phy device not initialized\n");
+	if (!net_dev->phydev)
 		return -ENODEV;
-	}
 
 	err = 0;
 	if (net_dev->phydev->autoneg) {
@@ -165,10 +161,8 @@ static void dpaa_get_pauseparam(struct net_device *net_dev,
 	priv = netdev_priv(net_dev);
 	mac_dev = priv->mac_dev;
 
-	if (!net_dev->phydev) {
-		netdev_err(net_dev, "phy device not initialized\n");
+	if (!net_dev->phydev)
 		return;
-	}
 
 	epause->autoneg = mac_dev->autoneg_pause;
 	epause->rx_pause = mac_dev->rx_pause_active;
@@ -223,7 +217,7 @@ static int dpaa_get_sset_count(struct net_device *net_dev, int type)
 	unsigned int total_stats, num_stats;
 
 	num_stats   = num_online_cpus() + 1;
-	total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM) +
+	total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + 1) +
 			DPAA_STATS_GLOBAL_LEN;
 
 	switch (type) {
@@ -235,10 +229,10 @@ static int dpaa_get_sset_count(struct net_device *net_dev, int type)
 }
 
 static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
-		       int crr_cpu, u64 *bp_count, u64 *data)
+		       int crr_cpu, u64 bp_count, u64 *data)
 {
 	int num_values = num_cpus + 1;
-	int crr = 0, j;
+	int crr = 0;
 
 	/* update current CPU's stats and also add them to the total values */
 	data[crr * num_values + crr_cpu] = percpu_priv->in_interrupt;
@@ -262,23 +256,27 @@ static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
 	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_errors;
 	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_errors;
 
-	for (j = 0; j < DPAA_BPS_NUM; j++) {
-		data[crr * num_values + crr_cpu] = bp_count[j];
-		data[crr++ * num_values + num_cpus] += bp_count[j];
-	}
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_dropped;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_dropped;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_dropped;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_dropped;
+
+	data[crr * num_values + crr_cpu] = bp_count;
+	data[crr++ * num_values + num_cpus] += bp_count;
 }
 
 static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 				   struct ethtool_stats *stats, u64 *data)
 {
-	u64 bp_count[DPAA_BPS_NUM], cg_time, cg_num;
 	struct dpaa_percpu_priv *percpu_priv;
 	struct dpaa_rx_errors rx_errors;
 	unsigned int num_cpus, offset;
+	u64 bp_count, cg_time, cg_num;
 	struct dpaa_ern_cnt ern_cnt;
 	struct dpaa_bp *dpaa_bp;
 	struct dpaa_priv *priv;
-	int total_stats, i, j;
+	int total_stats, i;
 	bool cg_status;
 
 	total_stats = dpaa_get_sset_count(net_dev, ETH_SS_STATS);
@@ -292,12 +290,10 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 
 	for_each_online_cpu(i) {
 		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
-		for (j = 0; j < DPAA_BPS_NUM; j++) {
-			dpaa_bp = priv->dpaa_bps[j];
-			if (!dpaa_bp->percpu_count)
-				continue;
-			bp_count[j] = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
-		}
+		dpaa_bp = priv->dpaa_bp;
+		if (!dpaa_bp->percpu_count)
+			continue;
+		bp_count = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
 		rx_errors.dme += percpu_priv->rx_errors.dme;
 		rx_errors.fpe += percpu_priv->rx_errors.fpe;
 		rx_errors.fse += percpu_priv->rx_errors.fse;
@@ -315,7 +311,7 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 		copy_stats(percpu_priv, num_cpus, i, bp_count, data);
 	}
 
-	offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM);
+	offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + 1);
 	memcpy(data + offset, &rx_errors, sizeof(struct dpaa_rx_errors));
 
 	offset += sizeof(struct dpaa_rx_errors) / sizeof(u64);
@@ -363,18 +359,16 @@ static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
 		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
 		strings += ETH_GSTRING_LEN;
 	}
-	for (i = 0; i < DPAA_BPS_NUM; i++) {
-		for (j = 0; j < num_cpus; j++) {
-			snprintf(string_cpu, ETH_GSTRING_LEN,
-				 "bpool %c [CPU %d]", 'a' + i, j);
-			memcpy(strings, string_cpu, ETH_GSTRING_LEN);
-			strings += ETH_GSTRING_LEN;
-		}
-		snprintf(string_cpu, ETH_GSTRING_LEN, "bpool %c [TOTAL]",
-			 'a' + i);
+	for (j = 0; j < num_cpus; j++) {
+		snprintf(string_cpu, ETH_GSTRING_LEN,
+			 "bpool [CPU %d]", j);
 		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
 		strings += ETH_GSTRING_LEN;
 	}
+	snprintf(string_cpu, ETH_GSTRING_LEN, "bpool [TOTAL]");
+	memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+	strings += ETH_GSTRING_LEN;
+
 	memcpy(strings, dpaa_stats_global, size);
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index fbef2829f3de..c6fb8e4021ac 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -2,6 +2,7 @@
 config FSL_DPAA2_ETH
 	tristate "Freescale DPAA2 Ethernet"
 	depends on FSL_MC_BUS && FSL_MC_DPIO
+	select PHYLINK
 	help
 	  This is the DPAA2 Ethernet driver supporting Freescale SoCs
 	  with DPAA2 (DataPath Acceleration Architecture v2).
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index d1e78cdd512f..69184ca3b7b9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
 
-fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o
+fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 19379bae0144..c26c0a7cbb6b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2019 NXP
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -221,6 +221,7 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
 			    struct dpaa2_eth_channel *ch,
 			    dma_addr_t addr)
 {
+	int retries = 0;
 	int err;
 
 	ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
@@ -229,8 +230,11 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
 
 	while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
 					       ch->xdp.drop_bufs,
-					       ch->xdp.drop_cnt)) == -EBUSY)
+					       ch->xdp.drop_cnt)) == -EBUSY) {
+		if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+			break;
 		cpu_relax();
+	}
 
 	if (err) {
 		free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
@@ -458,7 +462,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 	struct dpaa2_eth_fq *fq = NULL;
 	struct dpaa2_dq *dq;
 	const struct dpaa2_fd *fd;
-	int cleaned = 0;
+	int cleaned = 0, retries = 0;
 	int is_last;
 
 	do {
@@ -469,6 +473,11 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 			 * the store until we get some sort of valid response
 			 * token (either a valid frame or an "empty dequeue")
 			 */
+			if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) {
+				netdev_err_once(priv->net_dev,
+						"Unable to read a valid dequeue response\n");
+				return -ETIMEDOUT;
+			}
 			continue;
 		}
 
@@ -477,6 +486,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 
 		fq->consume(priv, ch, fd, fq);
 		cleaned++;
+		retries = 0;
 	} while (!is_last);
 
 	if (!cleaned)
@@ -949,6 +959,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
 	struct page *page;
 	dma_addr_t addr;
+	int retries = 0;
 	int i, err;
 
 	for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) {
@@ -980,8 +991,11 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
 release_bufs:
 	/* In case the portal is busy, retry until successful */
 	while ((err = dpaa2_io_service_release(ch->dpio, bpid,
-					       buf_array, i)) == -EBUSY)
+					       buf_array, i)) == -EBUSY) {
+		if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+			break;
 		cpu_relax();
+	}
 
 	/* If release command failed, clean up and bail out;
 	 * not much else we can do about it
@@ -1032,16 +1046,21 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
 static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
 {
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
+	int retries = 0;
 	int ret;
 
 	do {
 		ret = dpaa2_io_service_acquire(NULL, priv->bpid,
 					       buf_array, count);
 		if (ret < 0) {
+			if (ret == -EBUSY &&
+			    retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
+				continue;
 			netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
 			return;
 		}
 		free_bufs(priv, buf_array, ret);
+		retries = 0;
 	} while (ret);
 }
 
@@ -1094,7 +1113,7 @@ static int pull_channel(struct dpaa2_eth_channel *ch)
 						    ch->store);
 		dequeues++;
 		cpu_relax();
-	} while (err == -EBUSY);
+	} while (err == -EBUSY && dequeues < DPAA2_ETH_SWP_BUSY_RETRIES);
 
 	ch->stats.dequeue_portal_busy += dequeues;
 	if (unlikely(err))
@@ -1118,6 +1137,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 	struct netdev_queue *nq;
 	int store_cleaned, work_done;
 	struct list_head rx_list;
+	int retries = 0;
 	int err;
 
 	ch = container_of(napi, struct dpaa2_eth_channel, napi);
@@ -1136,7 +1156,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 		refill_pool(priv, ch, priv->bpid);
 
 		store_cleaned = consume_frames(ch, &fq);
-		if (!store_cleaned)
+		if (store_cleaned <= 0)
 			break;
 		if (fq->type == DPAA2_RX_FQ) {
 			rx_cleaned += store_cleaned;
@@ -1163,7 +1183,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 	do {
 		err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
 		cpu_relax();
-	} while (err == -EBUSY);
+	} while (err == -EBUSY && retries++ < DPAA2_ETH_SWP_BUSY_RETRIES);
 	WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
 		  ch->nctx.desired_cpu);
 
@@ -1235,8 +1255,6 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
 	priv->rx_td_enabled = enable;
 }
 
-static void update_tx_fqids(struct dpaa2_eth_priv *priv);
-
 static int link_state_update(struct dpaa2_eth_priv *priv)
 {
 	struct dpni_link_state state = {0};
@@ -1258,12 +1276,17 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
 		   !!(state.options & DPNI_LINK_OPT_ASYM_PAUSE);
 	dpaa2_eth_set_rx_taildrop(priv, !tx_pause);
 
+	/* When we manage the MAC/PHY using phylink there is no need
+	 * to manually update the netif_carrier.
+	 */
+	if (priv->mac)
+		goto out;
+
 	/* Chech link state; speed / duplex changes are not treated yet */
 	if (priv->link_state.up == state.up)
 		goto out;
 
 	if (state.up) {
-		update_tx_fqids(priv);
 		netif_carrier_on(priv->net_dev);
 		netif_tx_start_all_queues(priv->net_dev);
 	} else {
@@ -1295,17 +1318,21 @@ static int dpaa2_eth_open(struct net_device *net_dev)
 			   priv->dpbp_dev->obj_desc.id, priv->bpid);
 	}
 
-	/* We'll only start the txqs when the link is actually ready; make sure
-	 * we don't race against the link up notification, which may come
-	 * immediately after dpni_enable();
-	 */
-	netif_tx_stop_all_queues(net_dev);
+	if (!priv->mac) {
+		/* We'll only start the txqs when the link is actually ready;
+		 * make sure we don't race against the link up notification,
+		 * which may come immediately after dpni_enable();
+		 */
+		netif_tx_stop_all_queues(net_dev);
+
+		/* Also, explicitly set carrier off, otherwise
+		 * netif_carrier_ok() will return true and cause 'ip link show'
+		 * to report the LOWER_UP flag, even though the link
+		 * notification wasn't even received.
+		 */
+		netif_carrier_off(net_dev);
+	}
 	enable_ch_napi(priv);
-	/* Also, explicitly set carrier off, otherwise netif_carrier_ok() will
-	 * return true and cause 'ip link show' to report the LOWER_UP flag,
-	 * even though the link notification wasn't even received.
-	 */
-	netif_carrier_off(net_dev);
 
 	err = dpni_enable(priv->mc_io, 0, priv->mc_token);
 	if (err < 0) {
@@ -1313,13 +1340,17 @@ static int dpaa2_eth_open(struct net_device *net_dev)
 		goto enable_err;
 	}
 
-	/* If the DPMAC object has already processed the link up interrupt,
-	 * we have to learn the link state ourselves.
-	 */
-	err = link_state_update(priv);
-	if (err < 0) {
-		netdev_err(net_dev, "Can't update link state\n");
-		goto link_state_err;
+	if (!priv->mac) {
+		/* If the DPMAC object has already processed the link up
+		 * interrupt, we have to learn the link state ourselves.
+		 */
+		err = link_state_update(priv);
+		if (err < 0) {
+			netdev_err(net_dev, "Can't update link state\n");
+			goto link_state_err;
+		}
+	} else {
+		phylink_start(priv->mac->phylink);
 	}
 
 	return 0;
@@ -1394,8 +1425,12 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 	int dpni_enabled = 0;
 	int retries = 10;
 
-	netif_tx_stop_all_queues(net_dev);
-	netif_carrier_off(net_dev);
+	if (!priv->mac) {
+		netif_tx_stop_all_queues(net_dev);
+		netif_carrier_off(net_dev);
+	} else {
+		phylink_stop(priv->mac->phylink);
+	}
 
 	/* On dpni_disable(), the MC firmware will:
 	 * - stop MAC Rx and wait for all Rx frames to be enqueued to software
@@ -2046,7 +2081,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
 {
 	struct fsl_mc_device *dpcon;
 	struct device *dev = priv->net_dev->dev.parent;
-	struct dpcon_attr attrs;
 	int err;
 
 	err = fsl_mc_object_allocate(to_fsl_mc_device(dev),
@@ -2071,12 +2105,6 @@ static struct fsl_mc_device *setup_dpcon(struct dpaa2_eth_priv *priv)
 		goto close;
 	}
 
-	err = dpcon_get_attributes(priv->mc_io, 0, dpcon->mc_handle, &attrs);
-	if (err) {
-		dev_err(dev, "dpcon_get_attributes() failed\n");
-		goto close;
-	}
-
 	err = dpcon_enable(priv->mc_io, 0, dpcon->mc_handle);
 	if (err) {
 		dev_err(dev, "dpcon_enable() failed\n");
@@ -3332,12 +3360,56 @@ static int poll_link_state(void *arg)
 	return 0;
 }
 
+static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
+{
+	struct fsl_mc_device *dpni_dev, *dpmac_dev;
+	struct dpaa2_mac *mac;
+	int err;
+
+	dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
+	dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+	if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+		return 0;
+
+	if (dpaa2_mac_is_type_fixed(dpmac_dev, priv->mc_io))
+		return 0;
+
+	mac = kzalloc(sizeof(struct dpaa2_mac), GFP_KERNEL);
+	if (!mac)
+		return -ENOMEM;
+
+	mac->mc_dev = dpmac_dev;
+	mac->mc_io = priv->mc_io;
+	mac->net_dev = priv->net_dev;
+
+	err = dpaa2_mac_connect(mac);
+	if (err) {
+		netdev_err(priv->net_dev, "Error connecting to the MAC endpoint\n");
+		kfree(mac);
+		return err;
+	}
+	priv->mac = mac;
+
+	return 0;
+}
+
+static void dpaa2_eth_disconnect_mac(struct dpaa2_eth_priv *priv)
+{
+	if (!priv->mac)
+		return;
+
+	dpaa2_mac_disconnect(priv->mac);
+	kfree(priv->mac);
+	priv->mac = NULL;
+}
+
 static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 {
 	u32 status = ~0;
 	struct device *dev = (struct device *)arg;
 	struct fsl_mc_device *dpni_dev = to_fsl_mc_device(dev);
 	struct net_device *net_dev = dev_get_drvdata(dev);
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	int err;
 
 	err = dpni_get_irq_status(dpni_dev->mc_io, 0, dpni_dev->mc_handle,
@@ -3350,8 +3422,17 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
 	if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
 		link_state_update(netdev_priv(net_dev));
 
-	if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+	if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) {
 		set_mac_addr(netdev_priv(net_dev));
+		update_tx_fqids(priv);
+
+		rtnl_lock();
+		if (priv->mac)
+			dpaa2_eth_disconnect_mac(priv);
+		else
+			dpaa2_eth_connect_mac(priv);
+		rtnl_unlock();
+	}
 
 	return IRQ_HANDLED;
 }
@@ -3527,6 +3608,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 		priv->do_link_poll = true;
 	}
 
+	err = dpaa2_eth_connect_mac(priv);
+	if (err)
+		goto err_connect_mac;
+
 	err = register_netdev(net_dev);
 	if (err < 0) {
 		dev_err(dev, "register_netdev() failed\n");
@@ -3541,6 +3626,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	return 0;
 
 err_netdev_reg:
+	dpaa2_eth_disconnect_mac(priv);
+err_connect_mac:
 	if (priv->do_link_poll)
 		kthread_stop(priv->poll_thread);
 	else
@@ -3583,6 +3670,10 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 #ifdef CONFIG_DEBUG_FS
 	dpaa2_dbg_remove(priv);
 #endif
+	rtnl_lock();
+	dpaa2_eth_disconnect_mac(priv);
+	rtnl_unlock();
+
 	unregister_netdev(net_dev);
 
 	if (priv->do_link_poll)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 8a0e65b3267f..7635db3ef903 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -17,6 +17,7 @@
 
 #include "dpaa2-eth-trace.h"
 #include "dpaa2-eth-debugfs.h"
+#include "dpaa2-mac.h"
 
 #define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0)
 
@@ -245,6 +246,14 @@ static inline struct dpaa2_faead *dpaa2_get_faead(void *buf_addr, bool swa)
  */
 #define DPAA2_ETH_ENQUEUE_RETRIES	10
 
+/* Number of times to retry DPIO portal operations while waiting
+ * for portal to finish executing current command and become
+ * available. We want to avoid being stuck in a while loop in case
+ * hardware becomes unresponsive, but not give up too easily if
+ * the portal really is busy for valid reasons
+ */
+#define DPAA2_ETH_SWP_BUSY_RETRIES	1000
+
 /* Driver statistics, other than those in struct rtnl_link_stats64.
  * These are usually collected per-CPU and aggregated by ethtool.
  */
@@ -407,6 +416,8 @@ struct dpaa2_eth_priv {
 #ifdef CONFIG_DEBUG_FS
 	struct dpaa2_debugfs dbg;
 #endif
+
+	struct dpaa2_mac *mac;
 };
 
 #define DPAA2_RXH_SUPPORTED	(RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 0aa1c34019bb..96676abcebd5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -85,6 +85,10 @@ dpaa2_eth_get_link_ksettings(struct net_device *net_dev,
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 
+	if (priv->mac)
+		return phylink_ethtool_ksettings_get(priv->mac->phylink,
+						     link_settings);
+
 	link_settings->base.autoneg = AUTONEG_DISABLE;
 	if (!(priv->link_state.options & DPNI_LINK_OPT_HALF_DUPLEX))
 		link_settings->base.duplex = DUPLEX_FULL;
@@ -93,12 +97,29 @@ dpaa2_eth_get_link_ksettings(struct net_device *net_dev,
 	return 0;
 }
 
+static int
+dpaa2_eth_set_link_ksettings(struct net_device *net_dev,
+			     const struct ethtool_link_ksettings *link_settings)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	if (!priv->mac)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_ksettings_set(priv->mac->phylink, link_settings);
+}
+
 static void dpaa2_eth_get_pauseparam(struct net_device *net_dev,
 				     struct ethtool_pauseparam *pause)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	u64 link_options = priv->link_state.options;
 
+	if (priv->mac) {
+		phylink_ethtool_get_pauseparam(priv->mac->phylink, pause);
+		return;
+	}
+
 	pause->rx_pause = !!(link_options & DPNI_LINK_OPT_PAUSE);
 	pause->tx_pause = pause->rx_pause ^
 			  !!(link_options & DPNI_LINK_OPT_ASYM_PAUSE);
@@ -118,6 +139,9 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev,
 		return -EOPNOTSUPP;
 	}
 
+	if (priv->mac)
+		return phylink_ethtool_set_pauseparam(priv->mac->phylink,
+						      pause);
 	if (pause->autoneg)
 		return -EOPNOTSUPP;
 
@@ -149,6 +173,7 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev,
 static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset,
 				  u8 *data)
 {
+	struct dpaa2_eth_priv *priv = netdev_priv(netdev);
 	u8 *p = data;
 	int i;
 
@@ -162,15 +187,22 @@ static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset,
 			strlcpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
+		if (priv->mac)
+			dpaa2_mac_get_strings(p);
 		break;
 	}
 }
 
 static int dpaa2_eth_get_sset_count(struct net_device *net_dev, int sset)
 {
+	int num_ss_stats = DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS;
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
 	switch (sset) {
 	case ETH_SS_STATS: /* ethtool_get_stats(), ethtool_get_drvinfo() */
-		return DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS;
+		if (priv->mac)
+			num_ss_stats += dpaa2_mac_get_sset_count();
+		return num_ss_stats;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -216,7 +248,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 		if (err == -EINVAL)
 			/* Older firmware versions don't support all pages */
 			memset(&dpni_stats, 0, sizeof(dpni_stats));
-		else
+		else if (err)
 			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
 
 		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
@@ -269,6 +301,9 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 		return;
 	}
 	*(data + i++) = buf_cnt;
+
+	if (priv->mac)
+		dpaa2_mac_get_ethtool_stats(priv->mac, data + i);
 }
 
 static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
@@ -728,6 +763,7 @@ const struct ethtool_ops dpaa2_ethtool_ops = {
 	.get_drvinfo = dpaa2_eth_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_link_ksettings = dpaa2_eth_get_link_ksettings,
+	.set_link_ksettings = dpaa2_eth_set_link_ksettings,
 	.get_pauseparam = dpaa2_eth_get_pauseparam,
 	.set_pauseparam = dpaa2_eth_set_pauseparam,
 	.get_sset_count = dpaa2_eth_get_sset_count,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
new file mode 100644
index 000000000000..84233e467ed1
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include "dpaa2-eth.h"
+#include "dpaa2-mac.h"
+
+#define phylink_to_dpaa2_mac(config) \
+	container_of((config), struct dpaa2_mac, phylink_config)
+
+static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
+{
+	*if_mode = PHY_INTERFACE_MODE_NA;
+
+	switch (eth_if) {
+	case DPMAC_ETH_IF_RGMII:
+		*if_mode = PHY_INTERFACE_MODE_RGMII;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Caller must call of_node_put on the returned value */
+static struct device_node *dpaa2_mac_get_node(u16 dpmac_id)
+{
+	struct device_node *dpmacs, *dpmac = NULL;
+	u32 id;
+	int err;
+
+	dpmacs = of_find_node_by_name(NULL, "dpmacs");
+	if (!dpmacs)
+		return NULL;
+
+	while ((dpmac = of_get_next_child(dpmacs, dpmac)) != NULL) {
+		err = of_property_read_u32(dpmac, "reg", &id);
+		if (err)
+			continue;
+		if (id == dpmac_id)
+			break;
+	}
+
+	of_node_put(dpmacs);
+
+	return dpmac;
+}
+
+static int dpaa2_mac_get_if_mode(struct device_node *node,
+				 struct dpmac_attr attr)
+{
+	phy_interface_t if_mode;
+	int err;
+
+	err = of_get_phy_mode(node, &if_mode);
+	if (!err)
+		return if_mode;
+
+	err = phy_mode(attr.eth_if, &if_mode);
+	if (!err)
+		return if_mode;
+
+	return err;
+}
+
+static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac,
+					phy_interface_t interface)
+{
+	switch (interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		return (interface != mac->if_mode);
+	default:
+		return true;
+	}
+}
+
+static void dpaa2_mac_validate(struct phylink_config *config,
+			       unsigned long *supported,
+			       struct phylink_link_state *state)
+{
+	struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	if (state->interface != PHY_INTERFACE_MODE_NA &&
+	    dpaa2_mac_phy_mode_mismatch(mac, state->interface)) {
+		goto empty_set;
+	}
+
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Autoneg);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		phylink_set(mask, 10baseT_Full);
+		phylink_set(mask, 100baseT_Full);
+		phylink_set(mask, 1000baseT_Full);
+		break;
+	default:
+		goto empty_set;
+	}
+
+	linkmode_and(supported, supported, mask);
+	linkmode_and(state->advertising, state->advertising, mask);
+
+	return;
+
+empty_set:
+	linkmode_zero(supported);
+}
+
+static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode,
+			     const struct phylink_link_state *state)
+{
+	struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+	struct dpmac_link_state *dpmac_state = &mac->state;
+	int err;
+
+	if (state->speed != SPEED_UNKNOWN)
+		dpmac_state->rate = state->speed;
+
+	if (state->duplex != DUPLEX_UNKNOWN) {
+		if (!state->duplex)
+			dpmac_state->options |= DPMAC_LINK_OPT_HALF_DUPLEX;
+		else
+			dpmac_state->options &= ~DPMAC_LINK_OPT_HALF_DUPLEX;
+	}
+
+	if (state->an_enabled)
+		dpmac_state->options |= DPMAC_LINK_OPT_AUTONEG;
+	else
+		dpmac_state->options &= ~DPMAC_LINK_OPT_AUTONEG;
+
+	if (state->pause & MLO_PAUSE_RX)
+		dpmac_state->options |= DPMAC_LINK_OPT_PAUSE;
+	else
+		dpmac_state->options &= ~DPMAC_LINK_OPT_PAUSE;
+
+	if (!!(state->pause & MLO_PAUSE_RX) ^ !!(state->pause & MLO_PAUSE_TX))
+		dpmac_state->options |= DPMAC_LINK_OPT_ASYM_PAUSE;
+	else
+		dpmac_state->options &= ~DPMAC_LINK_OPT_ASYM_PAUSE;
+
+	err = dpmac_set_link_state(mac->mc_io, 0,
+				   mac->mc_dev->mc_handle, dpmac_state);
+	if (err)
+		netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static void dpaa2_mac_link_up(struct phylink_config *config, unsigned int mode,
+			      phy_interface_t interface, struct phy_device *phy)
+{
+	struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+	struct dpmac_link_state *dpmac_state = &mac->state;
+	int err;
+
+	dpmac_state->up = 1;
+	err = dpmac_set_link_state(mac->mc_io, 0,
+				   mac->mc_dev->mc_handle, dpmac_state);
+	if (err)
+		netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static void dpaa2_mac_link_down(struct phylink_config *config,
+				unsigned int mode,
+				phy_interface_t interface)
+{
+	struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config);
+	struct dpmac_link_state *dpmac_state = &mac->state;
+	int err;
+
+	dpmac_state->up = 0;
+	err = dpmac_set_link_state(mac->mc_io, 0,
+				   mac->mc_dev->mc_handle, dpmac_state);
+	if (err)
+		netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err);
+}
+
+static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
+	.validate = dpaa2_mac_validate,
+	.mac_config = dpaa2_mac_config,
+	.mac_link_up = dpaa2_mac_link_up,
+	.mac_link_down = dpaa2_mac_link_down,
+};
+
+bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
+			     struct fsl_mc_io *mc_io)
+{
+	struct dpmac_attr attr;
+	bool fixed = false;
+	u16 mc_handle = 0;
+	int err;
+
+	err = dpmac_open(mc_io, 0, dpmac_dev->obj_desc.id,
+			 &mc_handle);
+	if (err || !mc_handle)
+		return false;
+
+	err = dpmac_get_attributes(mc_io, 0, mc_handle, &attr);
+	if (err)
+		goto out;
+
+	if (attr.link_type == DPMAC_LINK_TYPE_FIXED)
+		fixed = true;
+
+out:
+	dpmac_close(mc_io, 0, mc_handle);
+
+	return fixed;
+}
+
+int dpaa2_mac_connect(struct dpaa2_mac *mac)
+{
+	struct fsl_mc_device *dpmac_dev = mac->mc_dev;
+	struct net_device *net_dev = mac->net_dev;
+	struct device_node *dpmac_node;
+	struct phylink *phylink;
+	struct dpmac_attr attr;
+	int err;
+
+	err = dpmac_open(mac->mc_io, 0, dpmac_dev->obj_desc.id,
+			 &dpmac_dev->mc_handle);
+	if (err || !dpmac_dev->mc_handle) {
+		netdev_err(net_dev, "dpmac_open() = %d\n", err);
+		return -ENODEV;
+	}
+
+	err = dpmac_get_attributes(mac->mc_io, 0, dpmac_dev->mc_handle, &attr);
+	if (err) {
+		netdev_err(net_dev, "dpmac_get_attributes() = %d\n", err);
+		goto err_close_dpmac;
+	}
+
+	dpmac_node = dpaa2_mac_get_node(attr.id);
+	if (!dpmac_node) {
+		netdev_err(net_dev, "No dpmac@%d node found.\n", attr.id);
+		err = -ENODEV;
+		goto err_close_dpmac;
+	}
+
+	err = dpaa2_mac_get_if_mode(dpmac_node, attr);
+	if (err < 0) {
+		err = -EINVAL;
+		goto err_put_node;
+	}
+	mac->if_mode = err;
+
+	/* The MAC does not have the capability to add RGMII delays so
+	 * error out if the interface mode requests them and there is no PHY
+	 * to act upon them
+	 */
+	if (of_phy_is_fixed_link(dpmac_node) &&
+	    (mac->if_mode == PHY_INTERFACE_MODE_RGMII_ID ||
+	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
+	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_TXID)) {
+		netdev_err(net_dev, "RGMII delay not supported\n");
+		err = -EINVAL;
+		goto err_put_node;
+	}
+
+	mac->phylink_config.dev = &net_dev->dev;
+	mac->phylink_config.type = PHYLINK_NETDEV;
+
+	phylink = phylink_create(&mac->phylink_config,
+				 of_fwnode_handle(dpmac_node), mac->if_mode,
+				 &dpaa2_mac_phylink_ops);
+	if (IS_ERR(phylink)) {
+		err = PTR_ERR(phylink);
+		goto err_put_node;
+	}
+	mac->phylink = phylink;
+
+	err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
+	if (err) {
+		netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
+		goto err_phylink_destroy;
+	}
+
+	of_node_put(dpmac_node);
+
+	return 0;
+
+err_phylink_destroy:
+	phylink_destroy(mac->phylink);
+err_put_node:
+	of_node_put(dpmac_node);
+err_close_dpmac:
+	dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle);
+	return err;
+}
+
+void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
+{
+	if (!mac->phylink)
+		return;
+
+	phylink_disconnect_phy(mac->phylink);
+	phylink_destroy(mac->phylink);
+	dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
+}
+
+static char dpaa2_mac_ethtool_stats[][ETH_GSTRING_LEN] = {
+	[DPMAC_CNT_ING_ALL_FRAME]		= "[mac] rx all frames",
+	[DPMAC_CNT_ING_GOOD_FRAME]		= "[mac] rx frames ok",
+	[DPMAC_CNT_ING_ERR_FRAME]		= "[mac] rx frame errors",
+	[DPMAC_CNT_ING_FRAME_DISCARD]		= "[mac] rx frame discards",
+	[DPMAC_CNT_ING_UCAST_FRAME]		= "[mac] rx u-cast",
+	[DPMAC_CNT_ING_BCAST_FRAME]		= "[mac] rx b-cast",
+	[DPMAC_CNT_ING_MCAST_FRAME]		= "[mac] rx m-cast",
+	[DPMAC_CNT_ING_FRAME_64]		= "[mac] rx 64 bytes",
+	[DPMAC_CNT_ING_FRAME_127]		= "[mac] rx 65-127 bytes",
+	[DPMAC_CNT_ING_FRAME_255]		= "[mac] rx 128-255 bytes",
+	[DPMAC_CNT_ING_FRAME_511]		= "[mac] rx 256-511 bytes",
+	[DPMAC_CNT_ING_FRAME_1023]		= "[mac] rx 512-1023 bytes",
+	[DPMAC_CNT_ING_FRAME_1518]		= "[mac] rx 1024-1518 bytes",
+	[DPMAC_CNT_ING_FRAME_1519_MAX]		= "[mac] rx 1519-max bytes",
+	[DPMAC_CNT_ING_FRAG]			= "[mac] rx frags",
+	[DPMAC_CNT_ING_JABBER]			= "[mac] rx jabber",
+	[DPMAC_CNT_ING_ALIGN_ERR]		= "[mac] rx align errors",
+	[DPMAC_CNT_ING_OVERSIZED]		= "[mac] rx oversized",
+	[DPMAC_CNT_ING_VALID_PAUSE_FRAME]	= "[mac] rx pause",
+	[DPMAC_CNT_ING_BYTE]			= "[mac] rx bytes",
+	[DPMAC_CNT_EGR_GOOD_FRAME]		= "[mac] tx frames ok",
+	[DPMAC_CNT_EGR_UCAST_FRAME]		= "[mac] tx u-cast",
+	[DPMAC_CNT_EGR_MCAST_FRAME]		= "[mac] tx m-cast",
+	[DPMAC_CNT_EGR_BCAST_FRAME]		= "[mac] tx b-cast",
+	[DPMAC_CNT_EGR_ERR_FRAME]		= "[mac] tx frame errors",
+	[DPMAC_CNT_EGR_UNDERSIZED]		= "[mac] tx undersized",
+	[DPMAC_CNT_EGR_VALID_PAUSE_FRAME]	= "[mac] tx b-pause",
+	[DPMAC_CNT_EGR_BYTE]			= "[mac] tx bytes",
+};
+
+#define DPAA2_MAC_NUM_STATS	ARRAY_SIZE(dpaa2_mac_ethtool_stats)
+
+int dpaa2_mac_get_sset_count(void)
+{
+	return DPAA2_MAC_NUM_STATS;
+}
+
+void dpaa2_mac_get_strings(u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	for (i = 0; i < DPAA2_MAC_NUM_STATS; i++) {
+		strlcpy(p, dpaa2_mac_ethtool_stats[i], ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data)
+{
+	struct fsl_mc_device *dpmac_dev = mac->mc_dev;
+	int i, err;
+	u64 value;
+
+	for (i = 0; i < DPAA2_MAC_NUM_STATS; i++) {
+		err = dpmac_get_counter(mac->mc_io, 0, dpmac_dev->mc_handle,
+					i, &value);
+		if (err) {
+			netdev_err_once(mac->net_dev,
+					"dpmac_get_counter error %d\n", err);
+			*(data + i) = U64_MAX;
+			continue;
+		}
+		*(data + i) = value;
+	}
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
new file mode 100644
index 000000000000..4da8079b9155
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+#ifndef DPAA2_MAC_H
+#define DPAA2_MAC_H
+
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/phylink.h>
+
+#include "dpmac.h"
+#include "dpmac-cmd.h"
+
+struct dpaa2_mac {
+	struct fsl_mc_device *mc_dev;
+	struct dpmac_link_state state;
+	struct net_device *net_dev;
+	struct fsl_mc_io *mc_io;
+
+	struct phylink_config phylink_config;
+	struct phylink *phylink;
+	phy_interface_t if_mode;
+};
+
+bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
+			     struct fsl_mc_io *mc_io);
+
+int dpaa2_mac_connect(struct dpaa2_mac *mac);
+
+void dpaa2_mac_disconnect(struct dpaa2_mac *mac);
+
+int dpaa2_mac_get_sset_count(void);
+
+void dpaa2_mac_get_strings(u8 *data);
+
+void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data);
+
+#endif /* DPAA2_MAC_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h
new file mode 100644
index 000000000000..3ea51dd9374b
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#ifndef _FSL_DPMAC_CMD_H
+#define _FSL_DPMAC_CMD_H
+
+/* DPMAC Version */
+#define DPMAC_VER_MAJOR				4
+#define DPMAC_VER_MINOR				4
+#define DPMAC_CMD_BASE_VERSION			1
+#define DPMAC_CMD_2ND_VERSION			2
+#define DPMAC_CMD_ID_OFFSET			4
+
+#define DPMAC_CMD(id)	(((id) << DPMAC_CMD_ID_OFFSET) | DPMAC_CMD_BASE_VERSION)
+#define DPMAC_CMD_V2(id) (((id) << DPMAC_CMD_ID_OFFSET) | DPMAC_CMD_2ND_VERSION)
+
+/* Command IDs */
+#define DPMAC_CMDID_CLOSE		DPMAC_CMD(0x800)
+#define DPMAC_CMDID_OPEN		DPMAC_CMD(0x80c)
+
+#define DPMAC_CMDID_GET_ATTR		DPMAC_CMD(0x004)
+#define DPMAC_CMDID_SET_LINK_STATE	DPMAC_CMD_V2(0x0c3)
+
+#define DPMAC_CMDID_GET_COUNTER		DPMAC_CMD(0x0c4)
+
+/* Macros for accessing command fields smaller than 1byte */
+#define DPMAC_MASK(field)        \
+	GENMASK(DPMAC_##field##_SHIFT + DPMAC_##field##_SIZE - 1, \
+		DPMAC_##field##_SHIFT)
+
+#define dpmac_set_field(var, field, val) \
+	((var) |= (((val) << DPMAC_##field##_SHIFT) & DPMAC_MASK(field)))
+#define dpmac_get_field(var, field)      \
+	(((var) & DPMAC_MASK(field)) >> DPMAC_##field##_SHIFT)
+
+struct dpmac_cmd_open {
+	__le32 dpmac_id;
+};
+
+struct dpmac_rsp_get_attributes {
+	u8 eth_if;
+	u8 link_type;
+	__le16 id;
+	__le32 max_rate;
+};
+
+#define DPMAC_STATE_SIZE	1
+#define DPMAC_STATE_SHIFT	0
+#define DPMAC_STATE_VALID_SIZE	1
+#define DPMAC_STATE_VALID_SHIFT	1
+
+struct dpmac_cmd_set_link_state {
+	__le64 options;
+	__le32 rate;
+	__le32 pad0;
+	/* from lsb: up:1, state_valid:1 */
+	u8 state;
+	u8 pad1[7];
+	__le64 supported;
+	__le64 advertising;
+};
+
+struct dpmac_cmd_get_counter {
+	u8 id;
+};
+
+struct dpmac_rsp_get_counter {
+	u64 pad;
+	u64 counter;
+};
+
+#endif /* _FSL_DPMAC_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.c b/drivers/net/ethernet/freescale/dpaa2/dpmac.c
new file mode 100644
index 000000000000..d5997b654562
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#include <linux/fsl/mc.h>
+#include "dpmac.h"
+#include "dpmac-cmd.h"
+
+/**
+ * dpmac_open() - Open a control session for the specified object.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpmac_id:	DPMAC unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpmac_create function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpmac_open(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       int dpmac_id,
+	       u16 *token)
+{
+	struct dpmac_cmd_open *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPMAC_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	cmd_params = (struct dpmac_cmd_open *)cmd.params;
+	cmd_params->dpmac_id = cpu_to_le32(dpmac_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return err;
+}
+
+/**
+ * dpmac_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPMAC object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpmac_close(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPMAC_CMDID_CLOSE, cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpmac_get_attributes - Retrieve DPMAC attributes.
+ *
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPMAC object
+ * @attr:	Returned object's attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpmac_get_attributes(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 struct dpmac_attr *attr)
+{
+	struct dpmac_rsp_get_attributes *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_ATTR,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpmac_rsp_get_attributes *)cmd.params;
+	attr->eth_if = rsp_params->eth_if;
+	attr->link_type = rsp_params->link_type;
+	attr->id = le16_to_cpu(rsp_params->id);
+	attr->max_rate = le32_to_cpu(rsp_params->max_rate);
+
+	return 0;
+}
+
+/**
+ * dpmac_set_link_state() - Set the Ethernet link status
+ * @mc_io:      Pointer to opaque I/O object
+ * @cmd_flags:  Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:      Token of DPMAC object
+ * @link_state: Link state configuration
+ *
+ * Return:      '0' on Success; Error code otherwise.
+ */
+int dpmac_set_link_state(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 struct dpmac_link_state *link_state)
+{
+	struct dpmac_cmd_set_link_state *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_LINK_STATE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpmac_cmd_set_link_state *)cmd.params;
+	cmd_params->options = cpu_to_le64(link_state->options);
+	cmd_params->rate = cpu_to_le32(link_state->rate);
+	dpmac_set_field(cmd_params->state, STATE, link_state->up);
+	dpmac_set_field(cmd_params->state, STATE_VALID,
+			link_state->state_valid);
+	cmd_params->supported = cpu_to_le64(link_state->supported);
+	cmd_params->advertising = cpu_to_le64(link_state->advertising);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpmac_get_counter() - Read a specific DPMAC counter
+ * @mc_io:	Pointer to opaque I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPMAC object
+ * @id:		The requested counter ID
+ * @value:	Returned counter value
+ *
+ * Return:	The requested counter; '0' otherwise.
+ */
+int dpmac_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      enum dpmac_counter_id id, u64 *value)
+{
+	struct dpmac_cmd_get_counter *dpmac_cmd;
+	struct dpmac_rsp_get_counter *dpmac_rsp;
+	struct fsl_mc_command cmd = { 0 };
+	int err = 0;
+
+	cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_COUNTER,
+					  cmd_flags,
+					  token);
+	dpmac_cmd = (struct dpmac_cmd_get_counter *)cmd.params;
+	dpmac_cmd->id = id;
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	dpmac_rsp = (struct dpmac_rsp_get_counter *)cmd.params;
+	*value = le64_to_cpu(dpmac_rsp->counter);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
new file mode 100644
index 000000000000..135f143097a5
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+#ifndef __FSL_DPMAC_H
+#define __FSL_DPMAC_H
+
+/* Data Path MAC API
+ * Contains initialization APIs and runtime control APIs for DPMAC
+ */
+
+struct fsl_mc_io;
+
+int dpmac_open(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       int dpmac_id,
+	       u16 *token);
+
+int dpmac_close(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token);
+
+/**
+ * enum dpmac_link_type -  DPMAC link type
+ * @DPMAC_LINK_TYPE_NONE: No link
+ * @DPMAC_LINK_TYPE_FIXED: Link is fixed type
+ * @DPMAC_LINK_TYPE_PHY: Link by PHY ID
+ * @DPMAC_LINK_TYPE_BACKPLANE: Backplane link type
+ */
+enum dpmac_link_type {
+	DPMAC_LINK_TYPE_NONE,
+	DPMAC_LINK_TYPE_FIXED,
+	DPMAC_LINK_TYPE_PHY,
+	DPMAC_LINK_TYPE_BACKPLANE
+};
+
+/**
+ * enum dpmac_eth_if - DPMAC Ethrnet interface
+ * @DPMAC_ETH_IF_MII: MII interface
+ * @DPMAC_ETH_IF_RMII: RMII interface
+ * @DPMAC_ETH_IF_SMII: SMII interface
+ * @DPMAC_ETH_IF_GMII: GMII interface
+ * @DPMAC_ETH_IF_RGMII: RGMII interface
+ * @DPMAC_ETH_IF_SGMII: SGMII interface
+ * @DPMAC_ETH_IF_QSGMII: QSGMII interface
+ * @DPMAC_ETH_IF_XAUI: XAUI interface
+ * @DPMAC_ETH_IF_XFI: XFI interface
+ * @DPMAC_ETH_IF_CAUI: CAUI interface
+ * @DPMAC_ETH_IF_1000BASEX: 1000BASEX interface
+ * @DPMAC_ETH_IF_USXGMII: USXGMII interface
+ */
+enum dpmac_eth_if {
+	DPMAC_ETH_IF_MII,
+	DPMAC_ETH_IF_RMII,
+	DPMAC_ETH_IF_SMII,
+	DPMAC_ETH_IF_GMII,
+	DPMAC_ETH_IF_RGMII,
+	DPMAC_ETH_IF_SGMII,
+	DPMAC_ETH_IF_QSGMII,
+	DPMAC_ETH_IF_XAUI,
+	DPMAC_ETH_IF_XFI,
+	DPMAC_ETH_IF_CAUI,
+	DPMAC_ETH_IF_1000BASEX,
+	DPMAC_ETH_IF_USXGMII,
+};
+
+/**
+ * struct dpmac_attr - Structure representing DPMAC attributes
+ * @id:		DPMAC object ID
+ * @max_rate:	Maximum supported rate - in Mbps
+ * @eth_if:	Ethernet interface
+ * @link_type:	link type
+ */
+struct dpmac_attr {
+	u16 id;
+	u32 max_rate;
+	enum dpmac_eth_if eth_if;
+	enum dpmac_link_type link_type;
+};
+
+int dpmac_get_attributes(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 struct dpmac_attr *attr);
+
+/**
+ * DPMAC link configuration/state options
+ */
+
+/**
+ * Enable auto-negotiation
+ */
+#define DPMAC_LINK_OPT_AUTONEG			BIT_ULL(0)
+/**
+ * Enable half-duplex mode
+ */
+#define DPMAC_LINK_OPT_HALF_DUPLEX		BIT_ULL(1)
+/**
+ * Enable pause frames
+ */
+#define DPMAC_LINK_OPT_PAUSE			BIT_ULL(2)
+/**
+ * Enable a-symmetric pause frames
+ */
+#define DPMAC_LINK_OPT_ASYM_PAUSE		BIT_ULL(3)
+
+/**
+ * Advertised link speeds
+ */
+#define DPMAC_ADVERTISED_10BASET_FULL		BIT_ULL(0)
+#define DPMAC_ADVERTISED_100BASET_FULL		BIT_ULL(1)
+#define DPMAC_ADVERTISED_1000BASET_FULL		BIT_ULL(2)
+#define DPMAC_ADVERTISED_10000BASET_FULL	BIT_ULL(4)
+#define DPMAC_ADVERTISED_2500BASEX_FULL		BIT_ULL(5)
+
+/**
+ * Advertise auto-negotiation enable
+ */
+#define DPMAC_ADVERTISED_AUTONEG		BIT_ULL(3)
+
+/**
+ * struct dpmac_link_state - DPMAC link configuration request
+ * @rate: Rate in Mbps
+ * @options: Enable/Disable DPMAC link cfg features (bitmap)
+ * @up: Link state
+ * @state_valid: Ignore/Update the state of the link
+ * @supported: Speeds capability of the phy (bitmap)
+ * @advertising: Speeds that are advertised for autoneg (bitmap)
+ */
+struct dpmac_link_state {
+	u32 rate;
+	u64 options;
+	int up;
+	int state_valid;
+	u64 supported;
+	u64 advertising;
+};
+
+int dpmac_set_link_state(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 struct dpmac_link_state *link_state);
+
+/**
+ * enum dpmac_counter_id - DPMAC counter types
+ *
+ * @DPMAC_CNT_ING_FRAME_64: counts 64-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_127: counts 65- to 127-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_255: counts 128- to 255-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_511: counts 256- to 511-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_1023: counts 512- to 1023-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_1518: counts 1024- to 1518-bytes frames, good or bad.
+ * @DPMAC_CNT_ING_FRAME_1519_MAX: counts 1519-bytes frames and larger
+ *				  (up to max frame length specified),
+ *				  good or bad.
+ * @DPMAC_CNT_ING_FRAG: counts frames which are shorter than 64 bytes received
+ *			with a wrong CRC
+ * @DPMAC_CNT_ING_JABBER: counts frames longer than the maximum frame length
+ *			  specified, with a bad frame check sequence.
+ * @DPMAC_CNT_ING_FRAME_DISCARD: counts dropped frames due to internal errors.
+ *				 Occurs when a receive FIFO overflows.
+ *				 Includes also frames truncated as a result of
+ *				 the receive FIFO overflow.
+ * @DPMAC_CNT_ING_ALIGN_ERR: counts frames with an alignment error
+ *			     (optional used for wrong SFD).
+ * @DPMAC_CNT_EGR_UNDERSIZED: counts frames transmitted that was less than 64
+ *			      bytes long with a good CRC.
+ * @DPMAC_CNT_ING_OVERSIZED: counts frames longer than the maximum frame length
+ *			     specified, with a good frame check sequence.
+ * @DPMAC_CNT_ING_VALID_PAUSE_FRAME: counts valid pause frames (regular and PFC)
+ * @DPMAC_CNT_EGR_VALID_PAUSE_FRAME: counts valid pause frames transmitted
+ *				     (regular and PFC).
+ * @DPMAC_CNT_ING_BYTE: counts bytes received except preamble for all valid
+ *			frames and valid pause frames.
+ * @DPMAC_CNT_ING_MCAST_FRAME: counts received multicast frames.
+ * @DPMAC_CNT_ING_BCAST_FRAME: counts received broadcast frames.
+ * @DPMAC_CNT_ING_ALL_FRAME: counts each good or bad frames received.
+ * @DPMAC_CNT_ING_UCAST_FRAME: counts received unicast frames.
+ * @DPMAC_CNT_ING_ERR_FRAME: counts frames received with an error
+ *			     (except for undersized/fragment frame).
+ * @DPMAC_CNT_EGR_BYTE: counts bytes transmitted except preamble for all valid
+ *			frames and valid pause frames transmitted.
+ * @DPMAC_CNT_EGR_MCAST_FRAME: counts transmitted multicast frames.
+ * @DPMAC_CNT_EGR_BCAST_FRAME: counts transmitted broadcast frames.
+ * @DPMAC_CNT_EGR_UCAST_FRAME: counts transmitted unicast frames.
+ * @DPMAC_CNT_EGR_ERR_FRAME: counts frames transmitted with an error.
+ * @DPMAC_CNT_ING_GOOD_FRAME: counts frames received without error, including
+ *			      pause frames.
+ * @DPMAC_CNT_EGR_GOOD_FRAME: counts frames transmitted without error, including
+ *			      pause frames.
+ */
+enum dpmac_counter_id {
+	DPMAC_CNT_ING_FRAME_64,
+	DPMAC_CNT_ING_FRAME_127,
+	DPMAC_CNT_ING_FRAME_255,
+	DPMAC_CNT_ING_FRAME_511,
+	DPMAC_CNT_ING_FRAME_1023,
+	DPMAC_CNT_ING_FRAME_1518,
+	DPMAC_CNT_ING_FRAME_1519_MAX,
+	DPMAC_CNT_ING_FRAG,
+	DPMAC_CNT_ING_JABBER,
+	DPMAC_CNT_ING_FRAME_DISCARD,
+	DPMAC_CNT_ING_ALIGN_ERR,
+	DPMAC_CNT_EGR_UNDERSIZED,
+	DPMAC_CNT_ING_OVERSIZED,
+	DPMAC_CNT_ING_VALID_PAUSE_FRAME,
+	DPMAC_CNT_EGR_VALID_PAUSE_FRAME,
+	DPMAC_CNT_ING_BYTE,
+	DPMAC_CNT_ING_MCAST_FRAME,
+	DPMAC_CNT_ING_BCAST_FRAME,
+	DPMAC_CNT_ING_ALL_FRAME,
+	DPMAC_CNT_ING_UCAST_FRAME,
+	DPMAC_CNT_ING_ERR_FRAME,
+	DPMAC_CNT_EGR_BYTE,
+	DPMAC_CNT_EGR_MCAST_FRAME,
+	DPMAC_CNT_EGR_BCAST_FRAME,
+	DPMAC_CNT_EGR_UCAST_FRAME,
+	DPMAC_CNT_EGR_ERR_FRAME,
+	DPMAC_CNT_ING_GOOD_FRAME,
+	DPMAC_CNT_EGR_GOOD_FRAME
+};
+
+int dpmac_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      enum dpmac_counter_id id, u64 *value);
+
+#endif /* __FSL_DPMAC_H */
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index b6ff89307409..3e8f9819f08c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1599,7 +1599,10 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 	if (cmd == SIOCGHWTSTAMP)
 		return enetc_hwtstamp_get(ndev, rq);
 #endif
-	return -EINVAL;
+
+	if (!ndev->phydev)
+		return -EOPNOTSUPP;
+	return phy_mii_ioctl(ndev->phydev, rq, cmd);
 }
 
 int enetc_alloc_msix(struct enetc_ndev_priv *priv)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index fcb52efec075..880a8ed8bb47 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -584,6 +584,31 @@ static int enetc_get_ts_info(struct net_device *ndev,
 	return 0;
 }
 
+static void enetc_get_wol(struct net_device *dev,
+			  struct ethtool_wolinfo *wol)
+{
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (dev->phydev)
+		phy_ethtool_get_wol(dev->phydev, wol);
+}
+
+static int enetc_set_wol(struct net_device *dev,
+			 struct ethtool_wolinfo *wol)
+{
+	int ret;
+
+	if (!dev->phydev)
+		return -EOPNOTSUPP;
+
+	ret = phy_ethtool_set_wol(dev->phydev, wol);
+	if (!ret)
+		device_set_wakeup_enable(&dev->dev, wol->wolopts);
+
+	return ret;
+}
+
 static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_regs_len = enetc_get_reglen,
 	.get_regs = enetc_get_regs,
@@ -601,6 +626,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = enetc_get_ts_info,
+	.get_wol = enetc_get_wol,
+	.set_wol = enetc_set_wol,
 };
 
 static const struct ethtool_ops enetc_vf_ethtool_ops = {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index b73421c3e25b..7da79b816416 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -784,8 +784,8 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 		}
 	}
 
-	priv->if_mode = of_get_phy_mode(np);
-	if ((int)priv->if_mode < 0) {
+	err = of_get_phy_mode(np, &priv->if_mode);
+	if (err) {
 		dev_err(priv->dev, "missing phy type\n");
 		of_node_put(priv->phy_node);
 		if (of_phy_is_fixed_link(np))
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a9c386b63581..b886b075650e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2706,7 +2706,6 @@ static void fec_enet_free_buffers(struct net_device *ndev)
 
 	for (q = 0; q < fep->num_tx_queues; q++) {
 		txq = fep->tx_queue[q];
-		bdp = txq->bd.base;
 		for (i = 0; i < txq->bd.ring_size; i++) {
 			kfree(txq->tx_bounce[i]);
 			txq->tx_bounce[i] = NULL;
@@ -3394,6 +3393,7 @@ fec_probe(struct platform_device *pdev)
 {
 	struct fec_enet_private *fep;
 	struct fec_platform_data *pdata;
+	phy_interface_t interface;
 	struct net_device *ndev;
 	int i, irq, ret = 0;
 	const struct of_device_id *of_id;
@@ -3466,15 +3466,15 @@ fec_probe(struct platform_device *pdev)
 	}
 	fep->phy_node = phy_node;
 
-	ret = of_get_phy_mode(pdev->dev.of_node);
-	if (ret < 0) {
+	ret = of_get_phy_mode(pdev->dev.of_node, &interface);
+	if (ret) {
 		pdata = dev_get_platdata(&pdev->dev);
 		if (pdata)
 			fep->phy_interface = pdata->phy;
 		else
 			fep->phy_interface = PHY_INTERFACE_MODE_MII;
 	} else {
-		fep->phy_interface = ret;
+		fep->phy_interface = interface;
 	}
 
 	fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 210749bf1eac..934111def0be 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -634,6 +634,9 @@ static void set_port_liodn(struct fman *fman, u8 port_id,
 {
 	u32 tmp;
 
+	iowrite32be(liodn_ofst, &fman->bmi_regs->fmbm_spliodn[port_id - 1]);
+	if (!IS_ENABLED(CONFIG_FSL_PAMU))
+		return;
 	/* set LIODN base for this port */
 	tmp = ioread32be(&fman->dma_regs->fmdmplr[port_id / 2]);
 	if (port_id % 2) {
@@ -644,7 +647,6 @@ static void set_port_liodn(struct fman *fman, u8 port_id,
 		tmp |= liodn_base << DMA_LIODN_SHIFT;
 	}
 	iowrite32be(tmp, &fman->dma_regs->fmdmplr[port_id / 2]);
-	iowrite32be(liodn_ofst, &fman->bmi_regs->fmbm_spliodn[port_id - 1]);
 }
 
 static void enable_rams_ecc(struct fman_fpm_regs __iomem *fpm_rg)
@@ -1942,6 +1944,8 @@ static int fman_init(struct fman *fman)
 
 		fman->liodn_offset[i] =
 			ioread32be(&fman->bmi_regs->fmbm_spliodn[i - 1]);
+		if (!IS_ENABLED(CONFIG_FSL_PAMU))
+			continue;
 		liodn_base = ioread32be(&fman->dma_regs->fmdmplr[i / 2]);
 		if (i % 2) {
 			/* FMDM_PLR LSB holds LIODN base for odd ports */
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index ee82ee1384eb..87b26f063cc8 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -435,7 +435,6 @@ struct fman_port_cfg {
 
 struct fman_port_rx_pools_params {
 	u8 num_of_pools;
-	u16 second_largest_buf_size;
 	u16 largest_buf_size;
 };
 
@@ -946,8 +945,6 @@ static int set_ext_buffer_pools(struct fman_port *port)
 	port->rx_pools_params.num_of_pools = ext_buf_pools->num_of_pools_used;
 	port->rx_pools_params.largest_buf_size =
 	    sizes_array[ordered_array[ext_buf_pools->num_of_pools_used - 1]];
-	port->rx_pools_params.second_largest_buf_size =
-	    sizes_array[ordered_array[ext_buf_pools->num_of_pools_used - 2]];
 
 	/* FMBM_RMPD reg. - pool depletion */
 	if (buf_pool_depletion->pools_grp_mode_enable) {
@@ -1728,6 +1725,20 @@ u32 fman_port_get_qman_channel_id(struct fman_port *port)
 }
 EXPORT_SYMBOL(fman_port_get_qman_channel_id);
 
+/**
+ * fman_port_get_device
+ * port:	Pointer to the FMan port device
+ *
+ * Get the 'struct device' associated to the specified FMan port device
+ *
+ * Return: pointer to associated 'struct device'
+ */
+struct device *fman_port_get_device(struct fman_port *port)
+{
+	return port->dev;
+}
+EXPORT_SYMBOL(fman_port_get_device);
+
 int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset)
 {
 	if (port->buffer_offsets.hash_result_offset == ILLEGAL_BASE)
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.h b/drivers/net/ethernet/freescale/fman/fman_port.h
index 9dbb69f40121..82f12661a46d 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.h
+++ b/drivers/net/ethernet/freescale/fman/fman_port.h
@@ -157,4 +157,6 @@ int fman_port_get_tstamp(struct fman_port *port, const void *data, u64 *tstamp);
 
 struct fman_port *fman_port_bind(struct device *dev);
 
+struct device *fman_port_get_device(struct fman_port *port);
+
 #endif /* __FMAN_PORT_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 7ab8095db192..f0806ace1ae2 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -608,7 +608,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	const u8		*mac_addr;
 	u32			 val;
 	u8			fman_id;
-	int			phy_if;
+	phy_interface_t          phy_if;
 
 	dev = &_of_dev->dev;
 	mac_node = dev->of_node;
@@ -776,8 +776,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	}
 
 	/* Get the PHY connection type */
-	phy_if = of_get_phy_mode(mac_node);
-	if (phy_if < 0) {
+	err = of_get_phy_mode(mac_node, &phy_if);
+	if (err) {
 		dev_warn(dev,
 			 "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n",
 			 mac_node);
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 51ad86417cb1..72868a28b621 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -641,6 +641,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	const char *model;
 	const void *mac_addr;
 	int err = 0, i;
+	phy_interface_t interface;
 	struct net_device *dev = NULL;
 	struct gfar_private *priv = NULL;
 	struct device_node *np = ofdev->dev.of_node;
@@ -805,9 +806,9 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	 * rgmii-id really needs to be specified. Other types can be
 	 * detected by hardware
 	 */
-	err = of_get_phy_mode(np);
-	if (err >= 0)
-		priv->interface = err;
+	err = of_get_phy_mode(np, &interface);
+	if (!err)
+		priv->interface = interface;
 	else
 		priv->interface = gfar_get_interface(dev);
 
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index f472a6dbbe6f..432c6a818ae5 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -90,11 +90,11 @@ extern const char gfar_driver_version[];
 #define DEFAULT_RX_LFC_THR  16
 #define DEFAULT_LFC_PTVVAL  4
 
-/* prevent fragmenation by HW in DSA environments */
-#define GFAR_RXB_SIZE roundup(1536 + 8, 64)
-#define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \
-			  + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define GFAR_RXB_TRUESIZE 2048
+#define GFAR_SKBFRAG_OVR (RXBUF_ALIGNMENT \
+			  + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define GFAR_RXB_SIZE rounddown(GFAR_RXB_TRUESIZE - GFAR_SKBFRAG_OVR, 64)
+#define GFAR_SKBFRAG_SIZE (GFAR_RXB_SIZE + GFAR_SKBFRAG_OVR)
 
 #define TX_RING_MOD_MASK(size) (size-1)
 #define RX_RING_MOD_MASK(size) (size-1)
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 4606a7e4a6d1..3e9b6d543c77 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -211,7 +211,7 @@ struct hip04_priv {
 #if defined(CONFIG_HI13X1_GMAC)
 	void __iomem *sysctrl_base;
 #endif
-	int phy_mode;
+	phy_interface_t phy_mode;
 	int chan;
 	unsigned int port;
 	unsigned int group;
@@ -961,10 +961,9 @@ static int hip04_mac_probe(struct platform_device *pdev)
 		goto init_fail;
 	}
 
-	priv->phy_mode = of_get_phy_mode(node);
-	if (priv->phy_mode < 0) {
+	ret = of_get_phy_mode(node, &priv->phy_mode);
+	if (ret) {
 		dev_warn(d, "not find phy-mode\n");
-		ret = -EINVAL;
 		goto init_fail;
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index c41b19c760f8..247de9105d10 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1193,10 +1193,9 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_free_mdio;
 
-	priv->phy_mode = of_get_phy_mode(node);
-	if ((int)priv->phy_mode < 0) {
+	ret = of_get_phy_mode(node, &priv->phy_mode);
+	if (ret) {
 		netdev_err(ndev, "not find phy-mode\n");
-		ret = -EINVAL;
 		goto err_mdiobus;
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 3a14bbc26ea2..1c5243cc1dc6 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -3049,7 +3049,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
 	u32 sl;
 	u32 credit;
 	int i;
-	const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+	static const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
 		{DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
 		{DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
 		{DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
@@ -3059,7 +3059,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset)
 		{DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
 		{DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
 	};
-	const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+	static const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
 		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
 		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
 		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index f8a87f8ca983..1b0313900f98 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -45,8 +45,9 @@ enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_GET_LINK_MODE,	/* (VF -> PF) get the link mode of pf */
 	HCLGE_MBX_PUSH_VLAN_INFO,	/* (PF -> VF) push port base vlan */
 	HCLGE_MBX_GET_MEDIA_TYPE,       /* (VF -> PF) get media type */
+	HCLGE_MBX_PUSH_PROMISC_INFO,	/* (PF -> VF) push vf promisc info */
 
-	HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
+	HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */
 	HCLGE_MBX_PUSH_LINK_STATUS,	/* (M7 -> PF) get port link status */
 	HCLGE_MBX_NCSI_ERROR,		/* (M7 -> PF) receive a NCSI error */
 };
@@ -71,7 +72,7 @@ enum hclge_mbx_vlan_cfg_subcode {
 };
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
-#define HCLGE_MBX_MAX_RESP_DATA_SIZE	8
+#define HCLGE_MBX_MAX_RESP_DATA_SIZE	8U
 #define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM	3
 #define HCLGE_MBX_RING_NODE_VARIABLE_NUM	3
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 03ca7d925e8e..eef1b2764d34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -146,7 +146,7 @@ void hnae3_unregister_client(struct hnae3_client *client)
 		return;
 
 	mutex_lock(&hnae3_common_lock);
-
+	/* one system should only have one client for every type */
 	list_for_each_entry(client_tmp, &hnae3_client_list, node) {
 		if (client_tmp->type == client->type) {
 			existed = true;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a0998937727d..3b5e2d7251e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -130,7 +130,6 @@ enum hnae3_module_type {
 	HNAE3_MODULE_TYPE_CR		= 0x04,
 	HNAE3_MODULE_TYPE_KR		= 0x05,
 	HNAE3_MODULE_TYPE_TP		= 0x06,
-
 };
 
 enum hnae3_fec_mode {
@@ -366,6 +365,19 @@ struct hnae3_ae_dev {
  *   Enable/disable HW GRO
  * add_arfs_entry
  *   Check the 5-tuples of flow, and create flow director rule
+ * get_vf_config
+ *   Get the VF configuration setting by the host
+ * set_vf_link_state
+ *   Set VF link status
+ * set_vf_spoofchk
+ *   Enable/disable spoof check for specified vf
+ * set_vf_trust
+ *   Enable/disable trust for specified vf, if the vf being trusted, then
+ *   it can enable promisc mode
+ * set_vf_rate
+ *   Set the max tx rate of specified vf.
+ * set_vf_mac
+ *   Configure the default MAC for specified VF
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -531,6 +543,16 @@ struct hnae3_ae_ops {
 	int (*mac_connect_phy)(struct hnae3_handle *handle);
 	void (*mac_disconnect_phy)(struct hnae3_handle *handle);
 	void (*restore_vlan_table)(struct hnae3_handle *handle);
+	int (*get_vf_config)(struct hnae3_handle *handle, int vf,
+			     struct ifla_vf_info *ivf);
+	int (*set_vf_link_state)(struct hnae3_handle *handle, int vf,
+				 int link_state);
+	int (*set_vf_spoofchk)(struct hnae3_handle *handle, int vf,
+			       bool enable);
+	int (*set_vf_trust)(struct hnae3_handle *handle, int vf, bool enable);
+	int (*set_vf_rate)(struct hnae3_handle *handle, int vf,
+			   int min_tx_rate, int max_tx_rate, bool force);
+	int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p);
 };
 
 struct hnae3_dcb_ops {
@@ -553,7 +575,8 @@ struct hnae3_ae_algo {
 	const struct pci_device_id *pdev_id_table;
 };
 
-#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + 16)
+#define HNAE3_INT_NAME_EXT_LEN    32	 /* Max extra information length */
+#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + HNAE3_INT_NAME_EXT_LEN)
 #define HNAE3_ITR_COUNTDOWN_START 100
 
 struct hnae3_tc_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 28961a68e333..6b328a259efc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -16,15 +16,14 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 			       const char *cmd_buf)
 {
 	struct hns3_nic_priv *priv = h->priv;
-	struct hns3_nic_ring_data *ring_data;
 	struct hns3_enet_ring *ring;
 	u32 base_add_l, base_add_h;
 	u32 queue_num, queue_max;
 	u32 value, i = 0;
 	int cnt;
 
-	if (!priv->ring_data) {
-		dev_err(&h->pdev->dev, "ring_data is NULL\n");
+	if (!priv->ring) {
+		dev_err(&h->pdev->dev, "priv->ring is NULL\n");
 		return -EFAULT;
 	}
 
@@ -44,7 +43,6 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 		return -EINVAL;
 	}
 
-	ring_data = priv->ring_data;
 	for (i = queue_num; i < queue_max; i++) {
 		/* Each cycle needs to determine whether the instance is reset,
 		 * to prevent reference to invalid memory. And need to ensure
@@ -54,73 +52,73 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 		    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
 			return -EPERM;
 
-		ring = ring_data[(u32)(i + h->kinfo.num_tqps)].ring;
+		ring = &priv->ring[(u32)(i + h->kinfo.num_tqps)];
 		base_add_h = readl_relaxed(ring->tqp->io_base +
 					   HNS3_RING_RX_RING_BASEADDR_H_REG);
 		base_add_l = readl_relaxed(ring->tqp->io_base +
 					   HNS3_RING_RX_RING_BASEADDR_L_REG);
-		dev_info(&h->pdev->dev, "RX(%d) BASE ADD: 0x%08x%08x\n", i,
+		dev_info(&h->pdev->dev, "RX(%u) BASE ADD: 0x%08x%08x\n", i,
 			 base_add_h, base_add_l);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_BD_NUM_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING BD NUM: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING BD NUM: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_BD_LEN_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING BD LEN: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING BD LEN: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_TAIL_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING TAIL: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING TAIL: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_HEAD_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING HEAD: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING HEAD: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_FBDNUM_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING FBDNUM: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING FBDNUM: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_RX_RING_PKTNUM_RECORD_REG);
-		dev_info(&h->pdev->dev, "RX(%d) RING PKTNUM: %u\n", i, value);
+		dev_info(&h->pdev->dev, "RX(%u) RING PKTNUM: %u\n", i, value);
 
-		ring = ring_data[i].ring;
+		ring = &priv->ring[i];
 		base_add_h = readl_relaxed(ring->tqp->io_base +
 					   HNS3_RING_TX_RING_BASEADDR_H_REG);
 		base_add_l = readl_relaxed(ring->tqp->io_base +
 					   HNS3_RING_TX_RING_BASEADDR_L_REG);
-		dev_info(&h->pdev->dev, "TX(%d) BASE ADD: 0x%08x%08x\n", i,
+		dev_info(&h->pdev->dev, "TX(%u) BASE ADD: 0x%08x%08x\n", i,
 			 base_add_h, base_add_l);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_BD_NUM_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING BD NUM: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING BD NUM: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_TC_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING TC: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING TC: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_TAIL_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING TAIL: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING TAIL: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_HEAD_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING HEAD: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING HEAD: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_FBDNUM_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING FBDNUM: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING FBDNUM: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_OFFSET_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING OFFSET: %u\n", i, value);
+		dev_info(&h->pdev->dev, "TX(%u) RING OFFSET: %u\n", i, value);
 
 		value = readl_relaxed(ring->tqp->io_base +
 				      HNS3_RING_TX_RING_PKTNUM_RECORD_REG);
-		dev_info(&h->pdev->dev, "TX(%d) RING PKTNUM: %u\n\n", i,
+		dev_info(&h->pdev->dev, "TX(%u) RING PKTNUM: %u\n\n", i,
 			 value);
 	}
 
@@ -130,7 +128,6 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 static int hns3_dbg_queue_map(struct hnae3_handle *h)
 {
 	struct hns3_nic_priv *priv = h->priv;
-	struct hns3_nic_ring_data *ring_data;
 	int i;
 
 	if (!h->ae_algo->ops->get_global_queue_id)
@@ -143,15 +140,12 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
 		u16 global_qid;
 
 		global_qid = h->ae_algo->ops->get_global_queue_id(h, i);
-		ring_data = &priv->ring_data[i];
-		if (!ring_data || !ring_data->ring ||
-		    !ring_data->ring->tqp_vector)
+		if (!priv->ring || !priv->ring[i].tqp_vector)
 			continue;
 
 		dev_info(&h->pdev->dev,
 			 "      %4d            %4d            %4d\n",
-			 i, global_qid,
-			 ring_data->ring->tqp_vector->vector_irq);
+			 i, global_qid, priv->ring[i].tqp_vector->vector_irq);
 	}
 
 	return 0;
@@ -160,7 +154,6 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
 static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 {
 	struct hns3_nic_priv *priv = h->priv;
-	struct hns3_nic_ring_data *ring_data;
 	struct hns3_desc *rx_desc, *tx_desc;
 	struct device *dev = &h->pdev->dev;
 	struct hns3_enet_ring *ring;
@@ -183,8 +176,7 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 		return -EINVAL;
 	}
 
-	ring_data = priv->ring_data;
-	ring  = ring_data[q_num].ring;
+	ring  = &priv->ring[q_num];
 	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 	tx_index = (cnt == 1) ? value : tx_index;
 
@@ -198,23 +190,26 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 	addr = le64_to_cpu(tx_desc->addr);
 	dev_info(dev, "TX Queue Num: %u, BD Index: %u\n", q_num, tx_index);
 	dev_info(dev, "(TX)addr: %pad\n", &addr);
-	dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.vlan_tag);
-	dev_info(dev, "(TX)send_size: %u\n", tx_desc->tx.send_size);
+	dev_info(dev, "(TX)vlan_tag: %u\n", le16_to_cpu(tx_desc->tx.vlan_tag));
+	dev_info(dev, "(TX)send_size: %u\n",
+		 le16_to_cpu(tx_desc->tx.send_size));
 	dev_info(dev, "(TX)vlan_tso: %u\n", tx_desc->tx.type_cs_vlan_tso);
 	dev_info(dev, "(TX)l2_len: %u\n", tx_desc->tx.l2_len);
 	dev_info(dev, "(TX)l3_len: %u\n", tx_desc->tx.l3_len);
 	dev_info(dev, "(TX)l4_len: %u\n", tx_desc->tx.l4_len);
-	dev_info(dev, "(TX)vlan_tag: %u\n", tx_desc->tx.outer_vlan_tag);
-	dev_info(dev, "(TX)tv: %u\n", tx_desc->tx.tv);
+	dev_info(dev, "(TX)vlan_tag: %u\n",
+		 le16_to_cpu(tx_desc->tx.outer_vlan_tag));
+	dev_info(dev, "(TX)tv: %u\n", le16_to_cpu(tx_desc->tx.tv));
 	dev_info(dev, "(TX)vlan_msec: %u\n", tx_desc->tx.ol_type_vlan_msec);
 	dev_info(dev, "(TX)ol2_len: %u\n", tx_desc->tx.ol2_len);
 	dev_info(dev, "(TX)ol3_len: %u\n", tx_desc->tx.ol3_len);
 	dev_info(dev, "(TX)ol4_len: %u\n", tx_desc->tx.ol4_len);
-	dev_info(dev, "(TX)paylen: %u\n", tx_desc->tx.paylen);
-	dev_info(dev, "(TX)vld_ra_ri: %u\n", tx_desc->tx.bdtp_fe_sc_vld_ra_ri);
-	dev_info(dev, "(TX)mss: %u\n", tx_desc->tx.mss);
+	dev_info(dev, "(TX)paylen: %u\n", le32_to_cpu(tx_desc->tx.paylen));
+	dev_info(dev, "(TX)vld_ra_ri: %u\n",
+		 le16_to_cpu(tx_desc->tx.bdtp_fe_sc_vld_ra_ri));
+	dev_info(dev, "(TX)mss: %u\n", le16_to_cpu(tx_desc->tx.mss));
 
-	ring  = ring_data[q_num + h->kinfo.num_tqps].ring;
+	ring  = &priv->ring[q_num + h->kinfo.num_tqps];
 	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
 	rx_index = (cnt == 1) ? value : tx_index;
 	rx_desc	 = &ring->desc[rx_index];
@@ -222,15 +217,19 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 	addr = le64_to_cpu(rx_desc->addr);
 	dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
 	dev_info(dev, "(RX)addr: %pad\n", &addr);
-	dev_info(dev, "(RX)l234_info: %u\n", rx_desc->rx.l234_info);
-	dev_info(dev, "(RX)pkt_len: %u\n", rx_desc->rx.pkt_len);
-	dev_info(dev, "(RX)size: %u\n", rx_desc->rx.size);
-	dev_info(dev, "(RX)rss_hash: %u\n", rx_desc->rx.rss_hash);
-	dev_info(dev, "(RX)fd_id: %u\n", rx_desc->rx.fd_id);
-	dev_info(dev, "(RX)vlan_tag: %u\n", rx_desc->rx.vlan_tag);
-	dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n", rx_desc->rx.o_dm_vlan_id_fb);
-	dev_info(dev, "(RX)ot_vlan_tag: %u\n", rx_desc->rx.ot_vlan_tag);
-	dev_info(dev, "(RX)bd_base_info: %u\n", rx_desc->rx.bd_base_info);
+	dev_info(dev, "(RX)l234_info: %u\n",
+		 le32_to_cpu(rx_desc->rx.l234_info));
+	dev_info(dev, "(RX)pkt_len: %u\n", le16_to_cpu(rx_desc->rx.pkt_len));
+	dev_info(dev, "(RX)size: %u\n", le16_to_cpu(rx_desc->rx.size));
+	dev_info(dev, "(RX)rss_hash: %u\n", le32_to_cpu(rx_desc->rx.rss_hash));
+	dev_info(dev, "(RX)fd_id: %u\n", le16_to_cpu(rx_desc->rx.fd_id));
+	dev_info(dev, "(RX)vlan_tag: %u\n", le16_to_cpu(rx_desc->rx.vlan_tag));
+	dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n",
+		 le16_to_cpu(rx_desc->rx.o_dm_vlan_id_fb));
+	dev_info(dev, "(RX)ot_vlan_tag: %u\n",
+		 le16_to_cpu(rx_desc->rx.ot_vlan_tag));
+	dev_info(dev, "(RX)bd_base_info: %u\n",
+		 le32_to_cpu(rx_desc->rx.bd_base_info));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 616cad0faa21..ba0536802b13 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -483,7 +483,7 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h)
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		dev_queue = netdev_get_tx_queue(ndev,
-						priv->ring_data[i].queue_index);
+						priv->ring[i].queue_index);
 		netdev_tx_reset_queue(dev_queue);
 	}
 }
@@ -681,7 +681,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 		return 0;
 
 	ret = skb_cow_head(skb, 0);
-	if (unlikely(ret))
+	if (unlikely(ret < 0))
 		return ret;
 
 	l3.hdr = skb_network_header(skb);
@@ -962,14 +962,6 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 	return 0;
 }
 
-static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
-{
-	/* Config bd buffer end */
-	if (!!frag_end)
-		hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U);
-	hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U);
-}
-
 static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring,
 			     struct sk_buff *skb)
 {
@@ -1062,7 +1054,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 		skb_reset_mac_len(skb);
 
 		ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
-		if (unlikely(ret)) {
+		if (unlikely(ret < 0)) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_l4_proto_err++;
 			u64_stats_update_end(&ring->syncp);
@@ -1072,7 +1064,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 		ret = hns3_set_l2l3l4(skb, ol4_proto, il4_proto,
 				      &type_cs_vlan_tso,
 				      &ol_type_vlan_len_msec);
-		if (unlikely(ret)) {
+		if (unlikely(ret < 0)) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_l2l3l4_err++;
 			u64_stats_update_end(&ring->syncp);
@@ -1081,7 +1073,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 
 		ret = hns3_set_tso(skb, &paylen, &mss,
 				   &type_cs_vlan_tso);
-		if (unlikely(ret)) {
+		if (unlikely(ret < 0)) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_tso_err++;
 			u64_stats_update_end(&ring->syncp);
@@ -1102,9 +1094,10 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  unsigned int size, int frag_end,
-			  enum hns_desc_type type)
+			  unsigned int size, enum hns_desc_type type)
 {
+#define HNS3_LIKELY_BD_NUM	1
+
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
 	struct device *dev = ring_to_dev(ring);
@@ -1118,7 +1111,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		int ret;
 
 		ret = hns3_fill_skb_desc(ring, skb, desc);
-		if (unlikely(ret))
+		if (unlikely(ret < 0))
 			return ret;
 
 		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
@@ -1137,19 +1130,16 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	desc_cb->length = size;
 
 	if (likely(size <= HNS3_MAX_BD_SIZE)) {
-		u16 bdtp_fe_sc_vld_ra_ri = 0;
-
 		desc_cb->priv = priv;
 		desc_cb->dma = dma;
 		desc_cb->type = type;
 		desc->addr = cpu_to_le64(dma);
 		desc->tx.send_size = cpu_to_le16(size);
-		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
 		desc->tx.bdtp_fe_sc_vld_ra_ri =
-			cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+			cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
 		ring_ptr_move_fw(ring, next_to_use);
-		return 0;
+		return HNS3_LIKELY_BD_NUM;
 	}
 
 	frag_buf_num = hns3_tx_bd_count(size);
@@ -1158,8 +1148,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 
 	/* When frag size is bigger than hardware limit, split this frag */
 	for (k = 0; k < frag_buf_num; k++) {
-		u16 bdtp_fe_sc_vld_ra_ri = 0;
-
 		/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
 		desc_cb->priv = priv;
 		desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
@@ -1170,11 +1158,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
 		desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
 				     (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
-		hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
-				       frag_end && (k == frag_buf_num - 1) ?
-						1 : 0);
 		desc->tx.bdtp_fe_sc_vld_ra_ri =
-				cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+				cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
 		/* move ring pointer to next */
 		ring_ptr_move_fw(ring, next_to_use);
@@ -1183,23 +1168,78 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc = &ring->desc[ring->next_to_use];
 	}
 
-	return 0;
+	return frag_buf_num;
 }
 
-static unsigned int hns3_nic_bd_num(struct sk_buff *skb)
+static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
+				    unsigned int bd_num)
 {
-	unsigned int bd_num;
+	unsigned int size;
 	int i;
 
-	/* if the total len is within the max bd limit */
-	if (likely(skb->len <= HNS3_MAX_BD_SIZE))
-		return skb_shinfo(skb)->nr_frags + 1;
+	size = skb_headlen(skb);
+	while (size > HNS3_MAX_BD_SIZE) {
+		bd_size[bd_num++] = HNS3_MAX_BD_SIZE;
+		size -= HNS3_MAX_BD_SIZE;
 
-	bd_num = hns3_tx_bd_count(skb_headlen(skb));
+		if (bd_num > HNS3_MAX_TSO_BD_NUM)
+			return bd_num;
+	}
+
+	if (size) {
+		bd_size[bd_num++] = size;
+		if (bd_num > HNS3_MAX_TSO_BD_NUM)
+			return bd_num;
+	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		bd_num += hns3_tx_bd_count(skb_frag_size(frag));
+		size = skb_frag_size(frag);
+		if (!size)
+			continue;
+
+		while (size > HNS3_MAX_BD_SIZE) {
+			bd_size[bd_num++] = HNS3_MAX_BD_SIZE;
+			size -= HNS3_MAX_BD_SIZE;
+
+			if (bd_num > HNS3_MAX_TSO_BD_NUM)
+				return bd_num;
+		}
+
+		bd_size[bd_num++] = size;
+		if (bd_num > HNS3_MAX_TSO_BD_NUM)
+			return bd_num;
+	}
+
+	return bd_num;
+}
+
+static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size)
+{
+	struct sk_buff *frag_skb;
+	unsigned int bd_num = 0;
+
+	/* If the total len is within the max bd limit */
+	if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
+		   skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM))
+		return skb_shinfo(skb)->nr_frags + 1U;
+
+	/* The below case will always be linearized, return
+	 * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
+	 */
+	if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
+		     (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)))
+		return HNS3_MAX_TSO_BD_NUM + 1U;
+
+	bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
+
+	if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM)
+		return bd_num;
+
+	skb_walk_frags(skb, frag_skb) {
+		bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num);
+		if (bd_num > HNS3_MAX_TSO_BD_NUM)
+			return bd_num;
 	}
 
 	return bd_num;
@@ -1218,26 +1258,26 @@ static unsigned int hns3_gso_hdr_len(struct sk_buff *skb)
  * 7 frags to to be larger than gso header len + mss, and the remaining
  * continuous 7 frags to be larger than MSS except the last 7 frags.
  */
-static bool hns3_skb_need_linearized(struct sk_buff *skb)
+static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
+				     unsigned int bd_num)
 {
-	int bd_limit = HNS3_MAX_BD_NUM_NORMAL - 1;
 	unsigned int tot_len = 0;
 	int i;
 
-	for (i = 0; i < bd_limit; i++)
-		tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
+	for (i = 0; i < HNS3_MAX_NON_TSO_BD_NUM - 1U; i++)
+		tot_len += bd_size[i];
 
-	/* ensure headlen + the first 7 frags is greater than mss + header
-	 * and the first 7 frags is greater than mss.
-	 */
-	if (((tot_len + skb_headlen(skb)) < (skb_shinfo(skb)->gso_size +
-	    hns3_gso_hdr_len(skb))) || (tot_len < skb_shinfo(skb)->gso_size))
+	/* ensure the first 8 frags is greater than mss + header */
+	if (tot_len + bd_size[HNS3_MAX_NON_TSO_BD_NUM - 1U] <
+	    skb_shinfo(skb)->gso_size + hns3_gso_hdr_len(skb))
 		return true;
 
-	/* ensure the remaining continuous 7 buffer is greater than mss */
-	for (i = 0; i < (skb_shinfo(skb)->nr_frags - bd_limit - 1); i++) {
-		tot_len -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
-		tot_len += skb_frag_size(&skb_shinfo(skb)->frags[i + bd_limit]);
+	/* ensure every continuous 7 buffer is greater than mss
+	 * except the last one.
+	 */
+	for (i = 0; i < bd_num - HNS3_MAX_NON_TSO_BD_NUM; i++) {
+		tot_len -= bd_size[i];
+		tot_len += bd_size[i + HNS3_MAX_NON_TSO_BD_NUM - 1U];
 
 		if (tot_len < skb_shinfo(skb)->gso_size)
 			return true;
@@ -1249,15 +1289,16 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb)
 static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 				  struct sk_buff **out_skb)
 {
+	unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
 	struct sk_buff *skb = *out_skb;
 	unsigned int bd_num;
 
-	bd_num = hns3_nic_bd_num(skb);
-	if (unlikely(bd_num > HNS3_MAX_BD_NUM_NORMAL)) {
+	bd_num = hns3_tx_bd_num(skb, bd_size);
+	if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
 		struct sk_buff *new_skb;
 
-		if (skb_is_gso(skb) && bd_num <= HNS3_MAX_BD_NUM_TSO &&
-		    !hns3_skb_need_linearized(skb))
+		if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
+		    !hns3_skb_need_linearized(skb, bd_size, bd_num))
 			goto out;
 
 		/* manual split the send packet */
@@ -1267,9 +1308,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 		dev_kfree_skb_any(skb);
 		*out_skb = new_skb;
 
-		bd_num = hns3_nic_bd_num(new_skb);
-		if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_TSO) ||
-		    (!skb_is_gso(new_skb) && bd_num > HNS3_MAX_BD_NUM_NORMAL))
+		bd_num = hns3_tx_bd_count(new_skb->len);
+		if ((skb_is_gso(new_skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
+		    (!skb_is_gso(new_skb) &&
+		     bd_num > HNS3_MAX_NON_TSO_BD_NUM))
 			return -ENOMEM;
 
 		u64_stats_update_begin(&ring->syncp);
@@ -1314,73 +1356,98 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 	}
 }
 
+static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
+				 struct sk_buff *skb, enum hns_desc_type type)
+{
+	unsigned int size = skb_headlen(skb);
+	int i, ret, bd_num = 0;
+
+	if (size) {
+		ret = hns3_fill_desc(ring, skb, size, type);
+		if (unlikely(ret < 0))
+			return ret;
+
+		bd_num += ret;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		size = skb_frag_size(frag);
+		if (!size)
+			continue;
+
+		ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE);
+		if (unlikely(ret < 0))
+			return ret;
+
+		bd_num += ret;
+	}
+
+	return bd_num;
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hns3_nic_ring_data *ring_data =
-		&tx_ring_data(priv, skb->queue_mapping);
-	struct hns3_enet_ring *ring = ring_data->ring;
+	struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
 	struct netdev_queue *dev_queue;
-	skb_frag_t *frag;
-	int next_to_use_head;
-	int buf_num;
-	int seg_num;
-	int size;
+	int pre_ntu, next_to_use_head;
+	struct sk_buff *frag_skb;
+	int bd_num = 0;
 	int ret;
-	int i;
 
 	/* Prefetch the data used later */
 	prefetch(skb->data);
 
-	buf_num = hns3_nic_maybe_stop_tx(ring, &skb);
-	if (unlikely(buf_num <= 0)) {
-		if (buf_num == -EBUSY) {
+	ret = hns3_nic_maybe_stop_tx(ring, &skb);
+	if (unlikely(ret <= 0)) {
+		if (ret == -EBUSY) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_busy++;
 			u64_stats_update_end(&ring->syncp);
 			goto out_net_tx_busy;
-		} else if (buf_num == -ENOMEM) {
+		} else if (ret == -ENOMEM) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.sw_err_cnt++;
 			u64_stats_update_end(&ring->syncp);
 		}
 
-		hns3_rl_err(netdev, "xmit error: %d!\n", buf_num);
+		hns3_rl_err(netdev, "xmit error: %d!\n", ret);
 		goto out_err_tx_ok;
 	}
 
-	/* No. of segments (plus a header) */
-	seg_num = skb_shinfo(skb)->nr_frags + 1;
-	/* Fill the first part */
-	size = skb_headlen(skb);
-
 	next_to_use_head = ring->next_to_use;
 
-	ret = hns3_fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0,
-			     DESC_TYPE_SKB);
-	if (unlikely(ret))
+	ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+	if (unlikely(ret < 0))
 		goto fill_err;
 
-	/* Fill the fragments */
-	for (i = 1; i < seg_num; i++) {
-		frag = &skb_shinfo(skb)->frags[i - 1];
-		size = skb_frag_size(frag);
+	bd_num += ret;
 
-		ret = hns3_fill_desc(ring, frag, size,
-				     seg_num - 1 == i ? 1 : 0,
-				     DESC_TYPE_PAGE);
+	if (!skb_has_frag_list(skb))
+		goto out;
 
-		if (unlikely(ret))
+	skb_walk_frags(skb, frag_skb) {
+		ret = hns3_fill_skb_to_desc(ring, frag_skb, DESC_TYPE_PAGE);
+		if (unlikely(ret < 0))
 			goto fill_err;
+
+		bd_num += ret;
 	}
+out:
+	pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
+					(ring->desc_num - 1);
+	ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
+				cpu_to_le16(BIT(HNS3_TXD_FE_B));
 
 	/* Complete translate all packets */
-	dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index);
+	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
 	netdev_tx_sent_queue(dev_queue, skb->len);
 
 	wmb(); /* Commit all data before submit */
 
-	hnae3_queue_xmit(ring->tqp, buf_num);
+	hnae3_queue_xmit(ring->tqp, bd_num);
 
 	return NETDEV_TX_OK;
 
@@ -1392,7 +1459,7 @@ out_err_tx_ok:
 	return NETDEV_TX_OK;
 
 out_net_tx_busy:
-	netif_stop_subqueue(netdev, ring_data->queue_index);
+	netif_stop_subqueue(netdev, ring->queue_index);
 	smp_mb(); /* Commit all data before submit */
 
 	return NETDEV_TX_BUSY;
@@ -1413,6 +1480,16 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 		return 0;
 	}
 
+	/* For VF device, if there is a perm_addr, then the user will not
+	 * be allowed to change the address.
+	 */
+	if (!hns3_is_phys_func(h->pdev) &&
+	    !is_zero_ether_addr(netdev->perm_addr)) {
+		netdev_err(netdev, "has permanent MAC %pM, user MAC %pM not allow\n",
+			   netdev->perm_addr, mac_addr->sa_data);
+		return -EPERM;
+	}
+
 	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
 	if (ret) {
 		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
@@ -1505,7 +1582,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 
 	for (idx = 0; idx < queue_num; idx++) {
 		/* fetch the tx stats */
-		ring = priv->ring_data[idx].ring;
+		ring = &priv->ring[idx];
 		do {
 			start = u64_stats_fetch_begin_irq(&ring->syncp);
 			tx_bytes += ring->stats.tx_bytes;
@@ -1523,7 +1600,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
-		ring = priv->ring_data[idx + queue_num].ring;
+		ring = &priv->ring[idx + queue_num];
 		do {
 			start = u64_stats_fetch_begin_irq(&ring->syncp);
 			rx_bytes += ring->stats.rx_bytes;
@@ -1633,8 +1710,8 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 	int ret = -EIO;
 
 	netif_dbg(h, drv, netdev,
-		  "set vf vlan: vf=%d, vlan=%u, qos=%u, vlan_proto=%u\n",
-		  vf, vlan, qos, vlan_proto);
+		  "set vf vlan: vf=%d, vlan=%u, qos=%u, vlan_proto=0x%x\n",
+		  vf, vlan, qos, ntohs(vlan_proto));
 
 	if (h->ae_algo->ops->set_vf_vlan_filter)
 		ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
@@ -1643,6 +1720,29 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 	return ret;
 }
 
+static int hns3_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+
+	if (hns3_nic_resetting(netdev))
+		return -EBUSY;
+
+	if (!handle->ae_algo->ops->set_vf_spoofchk)
+		return -EOPNOTSUPP;
+
+	return handle->ae_algo->ops->set_vf_spoofchk(handle, vf, enable);
+}
+
+static int hns3_set_vf_trust(struct net_device *netdev, int vf, bool enable)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+
+	if (!handle->ae_algo->ops->set_vf_trust)
+		return -EOPNOTSUPP;
+
+	return handle->ae_algo->ops->set_vf_trust(handle, vf, enable);
+}
+
 static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -1671,7 +1771,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
 	struct hnae3_handle *h = hns3_get_handle(ndev);
-	struct hns3_enet_ring *tx_ring = NULL;
+	struct hns3_enet_ring *tx_ring;
 	struct napi_struct *napi;
 	int timeout_queue = 0;
 	int hw_head, hw_tail;
@@ -1692,6 +1792,9 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 		    time_after(jiffies,
 			       (trans_start + ndev->watchdog_timeo))) {
 			timeout_queue = i;
+			netdev_info(ndev, "queue state: 0x%lx, delta msecs: %u\n",
+				    q->state,
+				    jiffies_to_msecs(jiffies - trans_start));
 			break;
 		}
 	}
@@ -1705,7 +1808,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 
 	priv->tx_timeout_count++;
 
-	tx_ring = priv->ring_data[timeout_queue].ring;
+	tx_ring = &priv->ring[timeout_queue];
 	napi = &tx_ring->tqp_vector->napi;
 
 	netdev_info(ndev,
@@ -1805,6 +1908,57 @@ static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 }
 #endif
 
+static int hns3_nic_get_vf_config(struct net_device *ndev, int vf,
+				  struct ifla_vf_info *ivf)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (!h->ae_algo->ops->get_vf_config)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_vf_config(h, vf, ivf);
+}
+
+static int hns3_nic_set_vf_link_state(struct net_device *ndev, int vf,
+				      int link_state)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (!h->ae_algo->ops->set_vf_link_state)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->set_vf_link_state(h, vf, link_state);
+}
+
+static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf,
+				int min_tx_rate, int max_tx_rate)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (!h->ae_algo->ops->set_vf_rate)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->set_vf_rate(h, vf, min_tx_rate, max_tx_rate,
+					    false);
+}
+
+static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (!h->ae_algo->ops->set_vf_mac)
+		return -EOPNOTSUPP;
+
+	if (is_multicast_ether_addr(mac)) {
+		netdev_err(netdev,
+			   "Invalid MAC:%pM specified. Could not set MAC\n",
+			   mac);
+		return -EINVAL;
+	}
+
+	return h->ae_algo->ops->set_vf_mac(h, vf_id, mac);
+}
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_open		= hns3_nic_net_open,
 	.ndo_stop		= hns3_nic_net_stop,
@@ -1820,10 +1974,15 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
 	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+	.ndo_set_vf_spoofchk	= hns3_set_vf_spoofchk,
+	.ndo_set_vf_trust	= hns3_set_vf_trust,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= hns3_rx_flow_steer,
 #endif
-
+	.ndo_get_vf_config	= hns3_nic_get_vf_config,
+	.ndo_set_vf_link_state	= hns3_nic_set_vf_link_state,
+	.ndo_set_vf_rate	= hns3_nic_set_vf_rate,
+	.ndo_set_vf_mac		= hns3_nic_set_vf_mac,
 };
 
 bool hns3_is_phys_func(struct pci_dev *pdev)
@@ -1843,7 +2002,7 @@ bool hns3_is_phys_func(struct pci_dev *pdev)
 	case HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF:
 		return false;
 	default:
-		dev_warn(&pdev->dev, "un-recognized pci device-id %d",
+		dev_warn(&pdev->dev, "un-recognized pci device-id %u",
 			 dev_id);
 	}
 
@@ -2069,9 +2228,8 @@ static void hns3_set_default_feature(struct net_device *netdev)
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
-
-	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+		NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST;
 
 	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
 
@@ -2081,21 +2239,24 @@ static void hns3_set_default_feature(struct net_device *netdev)
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+		NETIF_F_FRAGLIST;
 
 	netdev->vlan_features |=
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 		NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
 		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+		NETIF_F_FRAGLIST;
 
 	netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC;
+		NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC |
+		NETIF_F_FRAGLIST;
 
 	if (pdev->revision >= 0x21) {
 		netdev->hw_features |= NETIF_F_GRO_HW;
@@ -2320,18 +2481,19 @@ static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
 
 void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 {
-	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct net_device *netdev = ring_to_netdev(ring);
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct netdev_queue *dev_queue;
 	int bytes, pkts;
 	int head;
 
 	head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
-	rmb(); /* Make sure head is ready before touch any data */
 
 	if (is_ring_empty(ring) || head == ring->next_to_clean)
 		return; /* no data to poll */
 
+	rmb(); /* Make sure head is ready before touch any data */
+
 	if (unlikely(!is_valid_clean_head(ring, head))) {
 		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
 			   ring->next_to_use, ring->next_to_clean);
@@ -2358,7 +2520,7 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 	netdev_tx_completed_queue(dev_queue, pkts, bytes);
 
 	if (unlikely(pkts && netif_carrier_ok(netdev) &&
-		     (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) {
+		     ring_space(ring) > HNS3_MAX_TSO_BD_NUM)) {
 		/* Make sure that anybody stopping the queue after this
 		 * sees the new next_to_clean.
 		 */
@@ -2401,7 +2563,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 				ring->stats.sw_err_cnt++;
 				u64_stats_update_end(&ring->syncp);
 
-				hns3_rl_err(ring->tqp_vector->napi.dev,
+				hns3_rl_err(ring_to_netdev(ring),
 					    "alloc rx buffer failed: %d\n",
 					    ret);
 				break;
@@ -2510,7 +2672,7 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
 static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 			     u32 l234info, u32 bd_base_info, u32 ol_info)
 {
-	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct net_device *netdev = ring_to_netdev(ring);
 	int l3_type, l4_type;
 	int ol4_type;
 
@@ -2626,7 +2788,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 {
 #define HNS3_NEED_ADD_FRAG	1
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
-	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct net_device *netdev = ring_to_netdev(ring);
 	struct sk_buff *skb;
 
 	ring->skb = napi_alloc_skb(&ring->tqp_vector->napi, HNS3_RX_HEAD_SIZE);
@@ -2672,10 +2834,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 }
 
 static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
-			 struct sk_buff **out_skb, bool pending)
+			 bool pending)
 {
-	struct sk_buff *skb = *out_skb;
-	struct sk_buff *head_skb = *out_skb;
+	struct sk_buff *skb = ring->skb;
+	struct sk_buff *head_skb = skb;
 	struct sk_buff *new_skb;
 	struct hns3_desc_cb *desc_cb;
 	struct hns3_desc *pre_desc;
@@ -2704,10 +2866,9 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
 			return -ENXIO;
 
 		if (unlikely(ring->frag_num >= MAX_SKB_FRAGS)) {
-			new_skb = napi_alloc_skb(&ring->tqp_vector->napi,
-						 HNS3_RX_HEAD_SIZE);
+			new_skb = napi_alloc_skb(&ring->tqp_vector->napi, 0);
 			if (unlikely(!new_skb)) {
-				hns3_rl_err(ring->tqp_vector->napi.dev,
+				hns3_rl_err(ring_to_netdev(ring),
 					    "alloc rx fraglist skb fail\n");
 				return -ENXIO;
 			}
@@ -2783,7 +2944,7 @@ static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
 
 static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 {
-	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct net_device *netdev = ring_to_netdev(ring);
 	enum hns3_pkt_l2t_type l2_frame_type;
 	u32 bd_base_info, l234info, ol_info;
 	struct hns3_desc *desc;
@@ -2858,8 +3019,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 	return 0;
 }
 
-static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
-			     struct sk_buff **out_skb)
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 {
 	struct sk_buff *skb = ring->skb;
 	struct hns3_desc_cb *desc_cb;
@@ -2897,12 +3057,12 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 
 	if (!skb) {
 		ret = hns3_alloc_skb(ring, length, ring->va);
-		*out_skb = skb = ring->skb;
+		skb = ring->skb;
 
 		if (ret < 0) /* alloc buffer fail */
 			return ret;
 		if (ret > 0) { /* need add frag */
-			ret = hns3_add_frag(ring, desc, &skb, false);
+			ret = hns3_add_frag(ring, desc, false);
 			if (ret)
 				return ret;
 
@@ -2913,7 +3073,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 			       ALIGN(ring->pull_len, sizeof(long)));
 		}
 	} else {
-		ret = hns3_add_frag(ring, desc, &skb, true);
+		ret = hns3_add_frag(ring, desc, true);
 		if (ret)
 			return ret;
 
@@ -2931,8 +3091,6 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 	}
 
 	skb_record_rx_queue(skb, ring->tqp->tqp_index);
-	*out_skb = skb;
-
 	return 0;
 }
 
@@ -2941,17 +3099,19 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 {
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
 	int unused_count = hns3_desc_unused(ring);
-	struct sk_buff *skb = ring->skb;
 	int recv_pkts = 0;
 	int recv_bds = 0;
 	int err, num;
 
 	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
-	rmb(); /* Make sure num taken effect before the other data is touched */
-
 	num -= unused_count;
 	unused_count -= ring->pending_buf;
 
+	if (num <= 0)
+		goto out;
+
+	rmb(); /* Make sure num taken effect before the other data is touched */
+
 	while (recv_pkts < budget && recv_bds < num) {
 		/* Reuse or realloc buffers */
 		if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
@@ -2961,27 +3121,19 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget,
 		}
 
 		/* Poll one pkt */
-		err = hns3_handle_rx_bd(ring, &skb);
-		if (unlikely(!skb)) /* This fault cannot be repaired */
-			goto out;
-
-		if (err == -ENXIO) { /* Do not get FE for the packet */
+		err = hns3_handle_rx_bd(ring);
+		/* Do not get FE for the packet or failed to alloc skb */
+		if (unlikely(!ring->skb || err == -ENXIO)) {
 			goto out;
-		} else if (unlikely(err)) {  /* Do jump the err */
-			recv_bds += ring->pending_buf;
-			unused_count += ring->pending_buf;
-			ring->skb = NULL;
-			ring->pending_buf = 0;
-			continue;
+		} else if (likely(!err)) {
+			rx_fn(ring, ring->skb);
+			recv_pkts++;
 		}
 
-		rx_fn(ring, skb);
 		recv_bds += ring->pending_buf;
 		unused_count += ring->pending_buf;
 		ring->skb = NULL;
 		ring->pending_buf = 0;
-
-		recv_pkts++;
 	}
 
 out:
@@ -3324,13 +3476,13 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 		tqp_vector = &priv->tqp_vector[vector_i];
 
 		hns3_add_ring_to_group(&tqp_vector->tx_group,
-				       priv->ring_data[i].ring);
+				       &priv->ring[i]);
 
 		hns3_add_ring_to_group(&tqp_vector->rx_group,
-				       priv->ring_data[i + tqp_num].ring);
+				       &priv->ring[i + tqp_num]);
 
-		priv->ring_data[i].ring->tqp_vector = tqp_vector;
-		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+		priv->ring[i].tqp_vector = tqp_vector;
+		priv->ring[i + tqp_num].tqp_vector = tqp_vector;
 		tqp_vector->num_tqps++;
 	}
 
@@ -3474,28 +3626,22 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
 	return 0;
 }
 
-static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
-			     unsigned int ring_type)
+static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
+			      unsigned int ring_type)
 {
-	struct hns3_nic_ring_data *ring_data = priv->ring_data;
 	int queue_num = priv->ae_handle->kinfo.num_tqps;
-	struct pci_dev *pdev = priv->ae_handle->pdev;
 	struct hns3_enet_ring *ring;
 	int desc_num;
 
-	ring = devm_kzalloc(&pdev->dev, sizeof(*ring), GFP_KERNEL);
-	if (!ring)
-		return -ENOMEM;
-
 	if (ring_type == HNAE3_RING_TYPE_TX) {
+		ring = &priv->ring[q->tqp_index];
 		desc_num = priv->ae_handle->kinfo.num_tx_desc;
-		ring_data[q->tqp_index].ring = ring;
-		ring_data[q->tqp_index].queue_index = q->tqp_index;
+		ring->queue_index = q->tqp_index;
 		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
 	} else {
+		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
-		ring_data[q->tqp_index + queue_num].ring = ring;
-		ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index;
+		ring->queue_index = q->tqp_index;
 		ring->io_base = q->io_base;
 	}
 
@@ -3510,76 +3656,41 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
-
-	return 0;
 }
 
-static int hns3_queue_to_ring(struct hnae3_queue *tqp,
-			      struct hns3_nic_priv *priv)
+static void hns3_queue_to_ring(struct hnae3_queue *tqp,
+			       struct hns3_nic_priv *priv)
 {
-	int ret;
-
-	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
-	if (ret)
-		return ret;
-
-	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
-	if (ret) {
-		devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
-		return ret;
-	}
-
-	return 0;
+	hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
+	hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
 }
 
 static int hns3_get_ring_config(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	struct pci_dev *pdev = h->pdev;
-	int i, ret;
+	int i;
 
-	priv->ring_data =  devm_kzalloc(&pdev->dev,
-					array3_size(h->kinfo.num_tqps,
-						    sizeof(*priv->ring_data),
-						    2),
-					GFP_KERNEL);
-	if (!priv->ring_data)
+	priv->ring = devm_kzalloc(&pdev->dev,
+				  array3_size(h->kinfo.num_tqps,
+					      sizeof(*priv->ring), 2),
+				  GFP_KERNEL);
+	if (!priv->ring)
 		return -ENOMEM;
 
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		ret = hns3_queue_to_ring(h->kinfo.tqp[i], priv);
-		if (ret)
-			goto err;
-	}
+	for (i = 0; i < h->kinfo.num_tqps; i++)
+		hns3_queue_to_ring(h->kinfo.tqp[i], priv);
 
 	return 0;
-err:
-	while (i--) {
-		devm_kfree(priv->dev, priv->ring_data[i].ring);
-		devm_kfree(priv->dev,
-			   priv->ring_data[i + h->kinfo.num_tqps].ring);
-	}
-
-	devm_kfree(&pdev->dev, priv->ring_data);
-	priv->ring_data = NULL;
-	return ret;
 }
 
 static void hns3_put_ring_config(struct hns3_nic_priv *priv)
 {
-	struct hnae3_handle *h = priv->ae_handle;
-	int i;
-
-	if (!priv->ring_data)
+	if (!priv->ring)
 		return;
 
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		devm_kfree(priv->dev, priv->ring_data[i].ring);
-		devm_kfree(priv->dev,
-			   priv->ring_data[i + h->kinfo.num_tqps].ring);
-	}
-	devm_kfree(priv->dev, priv->ring_data);
-	priv->ring_data = NULL;
+	devm_kfree(priv->dev, priv->ring);
+	priv->ring = NULL;
 }
 
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
@@ -3696,7 +3807,7 @@ static void hns3_init_tx_ring_tc(struct hns3_nic_priv *priv)
 		for (j = 0; j < tc_info->tqp_count; j++) {
 			struct hnae3_queue *q;
 
-			q = priv->ring_data[tc_info->tqp_offset + j].ring->tqp;
+			q = priv->ring[tc_info->tqp_offset + j].tqp;
 			hns3_write_dev(q, HNS3_RING_TX_RING_TC_REG,
 				       tc_info->tc);
 		}
@@ -3711,21 +3822,21 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv)
 	int ret;
 
 	for (i = 0; i < ring_num; i++) {
-		ret = hns3_alloc_ring_memory(priv->ring_data[i].ring);
+		ret = hns3_alloc_ring_memory(&priv->ring[i]);
 		if (ret) {
 			dev_err(priv->dev,
 				"Alloc ring memory fail! ret=%d\n", ret);
 			goto out_when_alloc_ring_memory;
 		}
 
-		u64_stats_init(&priv->ring_data[i].ring->syncp);
+		u64_stats_init(&priv->ring[i].syncp);
 	}
 
 	return 0;
 
 out_when_alloc_ring_memory:
 	for (j = i - 1; j >= 0; j--)
-		hns3_fini_ring(priv->ring_data[j].ring);
+		hns3_fini_ring(&priv->ring[j]);
 
 	return -ENOMEM;
 }
@@ -3736,30 +3847,31 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 	int i;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		hns3_fini_ring(priv->ring_data[i].ring);
-		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
+		hns3_fini_ring(&priv->ring[i]);
+		hns3_fini_ring(&priv->ring[i + h->kinfo.num_tqps]);
 	}
 	return 0;
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static int hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
 	u8 mac_addr_temp[ETH_ALEN];
 	int ret = 0;
 
-	if (h->ae_algo->ops->get_mac_addr && init) {
+	if (h->ae_algo->ops->get_mac_addr)
 		h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
-		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
-	}
 
 	/* Check if the MAC address is valid, if not get a random one */
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
+	if (!is_valid_ether_addr(mac_addr_temp)) {
 		eth_hw_addr_random(netdev);
 		dev_warn(priv->dev, "using random MAC address %pM\n",
 			 netdev->dev_addr);
+	} else {
+		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
+		ether_addr_copy(netdev->perm_addr, mac_addr_temp);
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
@@ -3827,14 +3939,14 @@ static void hns3_info_show(struct hns3_nic_priv *priv)
 	struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo;
 
 	dev_info(priv->dev, "MAC address: %pM\n", priv->netdev->dev_addr);
-	dev_info(priv->dev, "Task queue pairs numbers: %d\n", kinfo->num_tqps);
-	dev_info(priv->dev, "RSS size: %d\n", kinfo->rss_size);
-	dev_info(priv->dev, "Allocated RSS size: %d\n", kinfo->req_rss_size);
-	dev_info(priv->dev, "RX buffer length: %d\n", kinfo->rx_buf_len);
-	dev_info(priv->dev, "Desc num per TX queue: %d\n", kinfo->num_tx_desc);
-	dev_info(priv->dev, "Desc num per RX queue: %d\n", kinfo->num_rx_desc);
-	dev_info(priv->dev, "Total number of enabled TCs: %d\n", kinfo->num_tc);
-	dev_info(priv->dev, "Max mtu size: %d\n", priv->netdev->max_mtu);
+	dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps);
+	dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size);
+	dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size);
+	dev_info(priv->dev, "RX buffer length: %u\n", kinfo->rx_buf_len);
+	dev_info(priv->dev, "Desc num per TX queue: %u\n", kinfo->num_tx_desc);
+	dev_info(priv->dev, "Desc num per RX queue: %u\n", kinfo->num_rx_desc);
+	dev_info(priv->dev, "Total number of enabled TCs: %u\n", kinfo->num_tc);
+	dev_info(priv->dev, "Max mtu size: %u\n", priv->netdev->max_mtu);
 }
 
 static int hns3_client_init(struct hnae3_handle *handle)
@@ -3863,7 +3975,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	handle->kinfo.netdev = netdev;
 	handle->priv = (void *)priv;
 
-	hns3_init_mac_addr(netdev, true);
+	hns3_init_mac_addr(netdev);
 
 	hns3_set_default_feature(netdev);
 
@@ -3897,7 +4009,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	ret = hns3_init_all_ring(priv);
 	if (ret) {
 		ret = -ENOMEM;
-		goto out_init_ring_data;
+		goto out_init_ring;
 	}
 
 	ret = hns3_init_phy(netdev);
@@ -3936,12 +4048,12 @@ out_reg_netdev_fail:
 	hns3_uninit_phy(netdev);
 out_init_phy:
 	hns3_uninit_all_ring(priv);
-out_init_ring_data:
+out_init_ring:
 	hns3_nic_uninit_vector_data(priv);
 out_init_vector_data:
 	hns3_nic_dealloc_vector_data(priv);
 out_alloc_vector_data:
-	priv->ring_data = NULL;
+	priv->ring = NULL;
 out_get_ring_cfg:
 	priv->ae_handle = NULL;
 	free_netdev(netdev);
@@ -4102,7 +4214,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
 				/* if alloc new buffer fail, exit directly
 				 * and reclear in up flow.
 				 */
-				netdev_warn(ring->tqp->handle->kinfo.netdev,
+				netdev_warn(ring_to_netdev(ring),
 					    "reserve buffer map failed, ret = %d\n",
 					    ret);
 				return ret;
@@ -4148,10 +4260,10 @@ static void hns3_clear_all_ring(struct hnae3_handle *h, bool force)
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		struct hns3_enet_ring *ring;
 
-		ring = priv->ring_data[i].ring;
+		ring = &priv->ring[i];
 		hns3_clear_tx_ring(ring);
 
-		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
+		ring = &priv->ring[i + h->kinfo.num_tqps];
 		/* Continue to clear other rings even if clearing some
 		 * rings failed.
 		 */
@@ -4175,16 +4287,16 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 		if (ret)
 			return ret;
 
-		hns3_init_ring_hw(priv->ring_data[i].ring);
+		hns3_init_ring_hw(&priv->ring[i]);
 
 		/* We need to clear tx ring here because self test will
 		 * use the ring and will not run down before up
 		 */
-		hns3_clear_tx_ring(priv->ring_data[i].ring);
-		priv->ring_data[i].ring->next_to_clean = 0;
-		priv->ring_data[i].ring->next_to_use = 0;
+		hns3_clear_tx_ring(&priv->ring[i]);
+		priv->ring[i].next_to_clean = 0;
+		priv->ring[i].next_to_use = 0;
 
-		rx_ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
+		rx_ring = &priv->ring[i + h->kinfo.num_tqps];
 		hns3_init_ring_hw(rx_ring);
 		ret = hns3_clear_rx_ring(rx_ring);
 		if (ret)
@@ -4331,7 +4443,7 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
 	bool vlan_filter_enable;
 	int ret;
 
-	ret = hns3_init_mac_addr(netdev, false);
+	ret = hns3_init_mac_addr(netdev);
 	if (ret)
 		return ret;
 
@@ -4454,7 +4566,7 @@ int hns3_set_channels(struct net_device *netdev,
 	if (new_tqp_num > hns3_get_max_available_channels(h) ||
 	    new_tqp_num < 1) {
 		dev_err(&netdev->dev,
-			"Change tqps fail, the tqp range is from 1 to %d",
+			"Change tqps fail, the tqp range is from 1 to %u",
 			hns3_get_max_available_channels(h));
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 5d468ed404a6..9d47abd5c37c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -76,7 +76,7 @@ enum hns3_nic_state {
 #define HNS3_RING_NAME_LEN			16
 #define HNS3_BUFFER_SIZE_2048			2048
 #define HNS3_RING_MAX_PENDING			32760
-#define HNS3_RING_MIN_PENDING			24
+#define HNS3_RING_MIN_PENDING			72
 #define HNS3_RING_BD_MULTIPLE			8
 /* max frame size of mac */
 #define HNS3_MAC_MAX_FRAME			9728
@@ -186,7 +186,7 @@ enum hns3_nic_state {
 #define HNS3_TXD_MSS_S				0
 #define HNS3_TXD_MSS_M				(0x3fff << HNS3_TXD_MSS_S)
 
-#define HNS3_TX_LAST_SIZE_M                    0xffff
+#define HNS3_TX_LAST_SIZE_M			0xffff
 
 #define HNS3_VECTOR_TX_IRQ			BIT_ULL(0)
 #define HNS3_VECTOR_RX_IRQ			BIT_ULL(1)
@@ -195,9 +195,13 @@ enum hns3_nic_state {
 #define HNS3_VECTOR_INITED			1
 
 #define HNS3_MAX_BD_SIZE			65535
-#define HNS3_MAX_BD_NUM_NORMAL			8
-#define HNS3_MAX_BD_NUM_TSO			63
-#define HNS3_MAX_BD_PER_PKT			MAX_SKB_FRAGS
+#define HNS3_MAX_NON_TSO_BD_NUM			8U
+#define HNS3_MAX_TSO_BD_NUM			63U
+#define HNS3_MAX_TSO_SIZE \
+	(HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM)
+
+#define HNS3_MAX_NON_TSO_SIZE \
+	(HNS3_MAX_BD_SIZE * HNS3_MAX_NON_TSO_BD_NUM)
 
 #define HNS3_VECTOR_GL0_OFFSET			0x100
 #define HNS3_VECTOR_GL1_OFFSET			0x200
@@ -309,7 +313,7 @@ struct hns3_desc_cb {
 
 	u16 reuse_flag;
 
-       /* desc type, used by the ring user to mark the type of the priv data */
+	/* desc type, used by the ring user to mark the type of the priv data */
 	u16 type;
 };
 
@@ -405,6 +409,7 @@ struct hns3_enet_ring {
 	struct hns3_enet_ring *next;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	struct hnae3_queue *tqp;
+	int queue_index;
 	struct device *dev; /* will be used for DMA mapping of descriptors */
 
 	/* statistic */
@@ -430,18 +435,7 @@ struct hns3_enet_ring {
 	int pending_buf;
 	struct sk_buff *skb;
 	struct sk_buff *tail_skb;
-};
-
-struct hns_queue;
-
-struct hns3_nic_ring_data {
-	struct hns3_enet_ring *ring;
-	struct napi_struct napi;
-	int queue_index;
-	int (*poll_one)(struct hns3_nic_ring_data *, int, void *);
-	void (*ex_process)(struct hns3_nic_ring_data *, struct sk_buff *);
-	void (*fini_process)(struct hns3_nic_ring_data *);
-};
+} ____cacheline_internodealigned_in_smp;
 
 enum hns3_flow_level_range {
 	HNS3_FLOW_LOW = 0,
@@ -518,7 +512,7 @@ struct hns3_nic_priv {
 	 * the cb for nic to manage the ring buffer, the first half of the
 	 * array is for tx_ring and vice versa for the second half
 	 */
-	struct hns3_nic_ring_data *ring_data;
+	struct hns3_enet_ring *ring;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	u16 vector_num;
 
@@ -613,11 +607,11 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
 
 #define ring_to_dev(ring) ((ring)->dev)
 
+#define ring_to_netdev(ring)	((ring)->tqp_vector->napi.dev)
+
 #define ring_to_dma_dir(ring) (HNAE3_IS_TX_RING(ring) ? \
 	DMA_TO_DEVICE : DMA_FROM_DEVICE)
 
-#define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
-
 #define hns3_buf_size(_ring) ((_ring)->buf_size)
 
 static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 680c3508876d..b104d3c3b757 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -203,7 +203,7 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
 
 	kinfo = &h->kinfo;
 	for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) {
-		struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+		struct hns3_enet_ring *ring = &priv->ring[i];
 		struct hns3_enet_ring_group *rx_group;
 		u64 pre_rx_pkt;
 
@@ -226,7 +226,7 @@ static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
 	u32 i;
 
 	for (i = start_ringid; i <= end_ringid; i++) {
-		struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+		struct hns3_enet_ring *ring = &priv->ring[i];
 
 		hns3_clean_tx_ring(ring);
 	}
@@ -491,7 +491,7 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
 
 	/* get stats for Tx */
 	for (i = 0; i < kinfo->num_tqps; i++) {
-		ring = nic_priv->ring_data[i].ring;
+		ring = &nic_priv->ring[i];
 		for (j = 0; j < HNS3_TXQ_STATS_COUNT; j++) {
 			stat = (u8 *)ring + hns3_txq_stats[j].stats_offset;
 			*data++ = *(u64 *)stat;
@@ -500,7 +500,7 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
 
 	/* get stats for Rx */
 	for (i = 0; i < kinfo->num_tqps; i++) {
-		ring = nic_priv->ring_data[i + kinfo->num_tqps].ring;
+		ring = &nic_priv->ring[i + kinfo->num_tqps];
 		for (j = 0; j < HNS3_RXQ_STATS_COUNT; j++) {
 			stat = (u8 *)ring + hns3_rxq_stats[j].stats_offset;
 			*data++ = *(u64 *)stat;
@@ -603,8 +603,8 @@ static void hns3_get_ringparam(struct net_device *netdev,
 	param->tx_max_pending = HNS3_RING_MAX_PENDING;
 	param->rx_max_pending = HNS3_RING_MAX_PENDING;
 
-	param->tx_pending = priv->ring_data[0].ring->desc_num;
-	param->rx_pending = priv->ring_data[queue_num].ring->desc_num;
+	param->tx_pending = priv->ring[0].desc_num;
+	param->rx_pending = priv->ring[queue_num].desc_num;
 }
 
 static void hns3_get_pauseparam(struct net_device *netdev,
@@ -906,9 +906,8 @@ static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
 	h->kinfo.num_rx_desc = rx_desc_num;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		priv->ring_data[i].ring->desc_num = tx_desc_num;
-		priv->ring_data[i + h->kinfo.num_tqps].ring->desc_num =
-			rx_desc_num;
+		priv->ring[i].desc_num = tx_desc_num;
+		priv->ring[i + h->kinfo.num_tqps].desc_num = rx_desc_num;
 	}
 }
 
@@ -924,7 +923,7 @@ static struct hns3_enet_ring *hns3_backup_ringparam(struct hns3_nic_priv *priv)
 		return NULL;
 
 	for (i = 0; i < handle->kinfo.num_tqps * 2; i++) {
-		memcpy(&tmp_rings[i], priv->ring_data[i].ring,
+		memcpy(&tmp_rings[i], &priv->ring[i],
 		       sizeof(struct hns3_enet_ring));
 		tmp_rings[i].skb = NULL;
 	}
@@ -972,8 +971,8 @@ static int hns3_set_ringparam(struct net_device *ndev,
 	/* Hardware requires that its descriptors must be multiple of eight */
 	new_tx_desc_num = ALIGN(param->tx_pending, HNS3_RING_BD_MULTIPLE);
 	new_rx_desc_num = ALIGN(param->rx_pending, HNS3_RING_BD_MULTIPLE);
-	old_tx_desc_num = priv->ring_data[0].ring->desc_num;
-	old_rx_desc_num = priv->ring_data[queue_num].ring->desc_num;
+	old_tx_desc_num = priv->ring[0].desc_num;
+	old_rx_desc_num = priv->ring[queue_num].desc_num;
 	if (old_tx_desc_num == new_tx_desc_num &&
 	    old_rx_desc_num == new_rx_desc_num)
 		return 0;
@@ -986,7 +985,7 @@ static int hns3_set_ringparam(struct net_device *ndev,
 	}
 
 	netdev_info(ndev,
-		    "Changing Tx/Rx ring depth from %d/%d to %d/%d\n",
+		    "Changing Tx/Rx ring depth from %u/%u to %u/%u\n",
 		    old_tx_desc_num, old_rx_desc_num,
 		    new_tx_desc_num, new_rx_desc_num);
 
@@ -1002,7 +1001,7 @@ static int hns3_set_ringparam(struct net_device *ndev,
 		hns3_change_all_ring_bd_num(priv, old_tx_desc_num,
 					    old_rx_desc_num);
 		for (i = 0; i < h->kinfo.num_tqps * 2; i++)
-			memcpy(priv->ring_data[i].ring, &tmp_rings[i],
+			memcpy(&priv->ring[i], &tmp_rings[i],
 			       sizeof(struct hns3_enet_ring));
 	} else {
 		for (i = 0; i < h->kinfo.num_tqps * 2; i++)
@@ -1098,13 +1097,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
 
 	if (queue >= queue_num) {
 		netdev_err(netdev,
-			   "Invalid queue value %d! Queue max id=%d\n",
+			   "Invalid queue value %u! Queue max id=%u\n",
 			   queue, queue_num - 1);
 		return -EINVAL;
 	}
 
-	tx_vector = priv->ring_data[queue].ring->tqp_vector;
-	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
+	tx_vector = priv->ring[queue].tqp_vector;
+	rx_vector = priv->ring[queue_num + queue].tqp_vector;
 
 	cmd->use_adaptive_tx_coalesce =
 			tx_vector->tx_group.coal.gl_adapt_enable;
@@ -1148,14 +1147,14 @@ static int hns3_check_gl_coalesce_para(struct net_device *netdev,
 	rx_gl = hns3_gl_round_down(cmd->rx_coalesce_usecs);
 	if (rx_gl != cmd->rx_coalesce_usecs) {
 		netdev_info(netdev,
-			    "rx_usecs(%d) rounded down to %d, because it must be multiple of 2.\n",
+			    "rx_usecs(%u) rounded down to %u, because it must be multiple of 2.\n",
 			    cmd->rx_coalesce_usecs, rx_gl);
 	}
 
 	tx_gl = hns3_gl_round_down(cmd->tx_coalesce_usecs);
 	if (tx_gl != cmd->tx_coalesce_usecs) {
 		netdev_info(netdev,
-			    "tx_usecs(%d) rounded down to %d, because it must be multiple of 2.\n",
+			    "tx_usecs(%u) rounded down to %u, because it must be multiple of 2.\n",
 			    cmd->tx_coalesce_usecs, tx_gl);
 	}
 
@@ -1183,7 +1182,7 @@ static int hns3_check_rl_coalesce_para(struct net_device *netdev,
 	rl = hns3_rl_round_down(cmd->rx_coalesce_usecs_high);
 	if (rl != cmd->rx_coalesce_usecs_high) {
 		netdev_info(netdev,
-			    "usecs_high(%d) rounded down to %d, because it must be multiple of 4.\n",
+			    "usecs_high(%u) rounded down to %u, because it must be multiple of 4.\n",
 			    cmd->rx_coalesce_usecs_high, rl);
 	}
 
@@ -1212,7 +1211,7 @@ static int hns3_check_coalesce_para(struct net_device *netdev,
 	if (cmd->use_adaptive_tx_coalesce == 1 ||
 	    cmd->use_adaptive_rx_coalesce == 1) {
 		netdev_info(netdev,
-			    "adaptive-tx=%d and adaptive-rx=%d, tx_usecs or rx_usecs will changed dynamically.\n",
+			    "adaptive-tx=%u and adaptive-rx=%u, tx_usecs or rx_usecs will changed dynamically.\n",
 			    cmd->use_adaptive_tx_coalesce,
 			    cmd->use_adaptive_rx_coalesce);
 	}
@@ -1229,8 +1228,8 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
 	struct hnae3_handle *h = priv->ae_handle;
 	int queue_num = h->kinfo.num_tqps;
 
-	tx_vector = priv->ring_data[queue].ring->tqp_vector;
-	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
+	tx_vector = priv->ring[queue].tqp_vector;
+	rx_vector = priv->ring[queue_num + queue].tqp_vector;
 
 	tx_vector->tx_group.coal.gl_adapt_enable =
 				cmd->use_adaptive_tx_coalesce;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index ecf58cfd253d..940ead3970d1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -145,7 +145,7 @@ static int hclge_cmd_csq_clean(struct hclge_hw *hw)
 	rmb(); /* Make sure head is ready before touch any data */
 
 	if (!is_valid_csq_clean_head(csq, head)) {
-		dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+		dev_warn(&hdev->pdev->dev, "wrong cmd head (%u, %d-%d)\n", head,
 			 csq->next_to_use, csq->next_to_clean);
 		dev_warn(&hdev->pdev->dev,
 			 "Disabling any further commands to IMP firmware\n");
@@ -314,11 +314,10 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 		} while (timeout < hw->cmq.tx_timeout);
 	}
 
-	if (!complete) {
+	if (!complete)
 		retval = -EBADE;
-	} else {
+	else
 		retval = hclge_cmd_check_retval(hw, desc, num, ntc);
-	}
 
 	/* Clean the command send queue */
 	handle = hclge_cmd_csq_clean(hw);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 1426eb5ddf3d..d97da67f07a1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -5,8 +5,10 @@
 #define __HCLGE_CMD_H
 #include <linux/types.h>
 #include <linux/io.h>
+#include <linux/etherdevice.h>
 
 #define HCLGE_CMDQ_TX_TIMEOUT		30000
+#define HCLGE_DESC_DATA_LEN		6
 
 struct hclge_dev;
 struct hclge_desc {
@@ -18,7 +20,7 @@ struct hclge_desc {
 	__le16 flag;
 	__le16 retval;
 	__le16 rsv;
-	__le32 data[6];
+	__le32 data[HCLGE_DESC_DATA_LEN];
 };
 
 struct hclge_cmq_ring {
@@ -244,7 +246,7 @@ enum hclge_opcode_type {
 	/* QCN commands */
 	HCLGE_OPC_QCN_MOD_CFG		= 0x1A01,
 	HCLGE_OPC_QCN_GRP_TMPLT_CFG	= 0x1A02,
-	HCLGE_OPC_QCN_SHAPPING_IR_CFG	= 0x1A03,
+	HCLGE_OPC_QCN_SHAPPING_CFG	= 0x1A03,
 	HCLGE_OPC_QCN_SHAPPING_BS_CFG	= 0x1A04,
 	HCLGE_OPC_QCN_QSET_LINK_CFG	= 0x1A05,
 	HCLGE_OPC_QCN_RP_STATUS_GET	= 0x1A06,
@@ -259,6 +261,7 @@ enum hclge_opcode_type {
 
 	/* NCL config command */
 	HCLGE_OPC_QUERY_NCL_CONFIG	= 0x7011,
+
 	/* M7 stats command */
 	HCLGE_OPC_M7_STATS_BD		= 0x7012,
 	HCLGE_OPC_M7_STATS_INFO		= 0x7013,
@@ -428,8 +431,10 @@ struct hclge_rx_pkt_buf_cmd {
 #define HCLGE_PF_MAC_NUM_MASK	0x3
 #define HCLGE_PF_STATE_MAIN	BIT(HCLGE_PF_STATE_MAIN_B)
 #define HCLGE_PF_STATE_DONE	BIT(HCLGE_PF_STATE_DONE_B)
+#define HCLGE_VF_RST_STATUS_CMD	4
+
 struct hclge_func_status_cmd {
-	__le32  vf_rst_state[4];
+	__le32  vf_rst_state[HCLGE_VF_RST_STATUS_CMD];
 	u8 pf_state;
 	u8 mac_id;
 	u8 rsv1;
@@ -485,10 +490,12 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_UMV_TBL_SPACE_S	16
 #define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 
+#define HCLGE_CFG_CMD_CNT		4
+
 struct hclge_cfg_param_cmd {
 	__le32 offset;
 	__le32 rsv;
-	__le32 param[4];
+	__le32 param[HCLGE_CFG_CMD_CNT];
 };
 
 #define HCLGE_MAC_MODE		0x0
@@ -712,8 +719,7 @@ struct hclge_mac_mgr_tbl_entry_cmd {
 	u8      flags;
 	u8      resp_code;
 	__le16  vlan_tag;
-	__le32  mac_addr_hi32;
-	__le16  mac_addr_lo16;
+	u8      mac_addr[ETH_ALEN];
 	__le16  rsv1;
 	__le16  ethter_type;
 	__le16  egress_port;
@@ -758,20 +764,27 @@ struct hclge_vlan_filter_ctrl_cmd {
 	u8 rsv2[19];
 };
 
+#define HCLGE_VLAN_ID_OFFSET_STEP	160
+#define HCLGE_VLAN_BYTE_SIZE		8
+#define	HCLGE_VLAN_OFFSET_BITMAP \
+	(HCLGE_VLAN_ID_OFFSET_STEP / HCLGE_VLAN_BYTE_SIZE)
+
 struct hclge_vlan_filter_pf_cfg_cmd {
 	u8 vlan_offset;
 	u8 vlan_cfg;
 	u8 rsv[2];
-	u8 vlan_offset_bitmap[20];
+	u8 vlan_offset_bitmap[HCLGE_VLAN_OFFSET_BITMAP];
 };
 
+#define HCLGE_MAX_VF_BYTES  16
+
 struct hclge_vlan_filter_vf_cfg_cmd {
 	__le16 vlan_id;
 	u8  resp_code;
 	u8  rsv;
 	u8  vlan_cfg;
 	u8  rsv1[3];
-	u8  vf_bitmap[16];
+	u8  vf_bitmap[HCLGE_MAX_VF_BYTES];
 };
 
 #define HCLGE_SWITCH_ANTI_SPOOF_B	0U
@@ -806,6 +819,7 @@ enum hclge_mac_vlan_cfg_sel {
 #define HCLGE_CFG_NIC_ROCE_SEL_B	4
 #define HCLGE_ACCEPT_TAG2_B		5
 #define HCLGE_ACCEPT_UNTAG2_B		6
+#define HCLGE_VF_NUM_PER_BYTE		8
 
 struct hclge_vport_vtag_tx_cfg_cmd {
 	u8 vport_vlan_cfg;
@@ -813,7 +827,7 @@ struct hclge_vport_vtag_tx_cfg_cmd {
 	u8 rsv1[2];
 	__le16 def_vlan_tag1;
 	__le16 def_vlan_tag2;
-	u8 vf_bitmap[8];
+	u8 vf_bitmap[HCLGE_VF_NUM_PER_BYTE];
 	u8 rsv2[8];
 };
 
@@ -825,7 +839,7 @@ struct hclge_vport_vtag_rx_cfg_cmd {
 	u8 vport_vlan_cfg;
 	u8 vf_offset;
 	u8 rsv1[6];
-	u8 vf_bitmap[8];
+	u8 vf_bitmap[HCLGE_VF_NUM_PER_BYTE];
 	u8 rsv2[8];
 };
 
@@ -864,7 +878,7 @@ struct hclge_mac_ethertype_idx_rd_cmd {
 	u8	flags;
 	u8	resp_code;
 	__le16  vlan_tag;
-	u8      mac_addr[6];
+	u8      mac_addr[ETH_ALEN];
 	__le16  index;
 	__le16	ethter_type;
 	__le16  egress_port;
@@ -1090,9 +1104,6 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 				enum hclge_opcode_type opcode, bool is_read);
 void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read);
 
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
-			       struct hclge_promisc_param *param);
-
 enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw,
 					   struct hclge_desc *desc);
 enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index c063301d6060..49ad8483723d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -87,7 +87,7 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc,
 	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
 		if (prio_tc[i] >= num_tc) {
 			dev_err(&hdev->pdev->dev,
-				"prio_tc[%u] checking failed, %u >= num_tc(%u)\n",
+				"prio_tc[%d] checking failed, %u >= num_tc(%u)\n",
 				i, prio_tc[i], num_tc);
 			return -EINVAL;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index d0128d792717..112df34b3869 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -145,7 +145,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 		return;
 	}
 
-	buf_len	 = sizeof(struct hclge_desc) * bd_num;
+	buf_len	= sizeof(struct hclge_desc) * bd_num;
 	desc_src = kzalloc(buf_len, GFP_KERNEL);
 	if (!desc_src) {
 		dev_err(&hdev->pdev->dev, "call kzalloc failed\n");
@@ -153,7 +153,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 	}
 
 	desc = desc_src;
-	ret  = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
+	ret = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
 	if (ret) {
 		kfree(desc_src);
 		return;
@@ -169,7 +169,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 		if (dfx_message->flag)
 			dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
 				 dfx_message->message,
-				 desc->data[i % entries_per_desc]);
+				 le32_to_cpu(desc->data[i % entries_per_desc]));
 
 		dfx_message++;
 	}
@@ -237,44 +237,48 @@ static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
 	if (ret)
 		return;
 
-	dev_info(dev, "sch_nq_cnt: 0x%x\n", desc[0].data[1]);
+	dev_info(dev, "sch_nq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
 
 	ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, HCLGE_OPC_SCH_RQ_CNT);
 	if (ret)
 		return;
 
-	dev_info(dev, "sch_rq_cnt: 0x%x\n", desc[0].data[1]);
+	dev_info(dev, "sch_rq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
 
 	ret = hclge_dbg_cmd_send(hdev, desc, 0, 2, HCLGE_OPC_TM_INTERNAL_STS);
 	if (ret)
 		return;
 
-	dev_info(dev, "pri_bp: 0x%x\n", desc[0].data[1]);
-	dev_info(dev, "fifo_dfx_info: 0x%x\n", desc[0].data[2]);
-	dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n", desc[0].data[3]);
-	dev_info(dev, "tx_private_waterline: 0x%x\n", desc[0].data[4]);
-	dev_info(dev, "tm_bypass_en: 0x%x\n", desc[0].data[5]);
-	dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", desc[1].data[0]);
-	dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", desc[1].data[1]);
+	dev_info(dev, "pri_bp: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+	dev_info(dev, "fifo_dfx_info: 0x%x\n", le32_to_cpu(desc[0].data[2]));
+	dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n",
+		 le32_to_cpu(desc[0].data[3]));
+	dev_info(dev, "tx_private_waterline: 0x%x\n",
+		 le32_to_cpu(desc[0].data[4]));
+	dev_info(dev, "tm_bypass_en: 0x%x\n", le32_to_cpu(desc[0].data[5]));
+	dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", le32_to_cpu(desc[1].data[0]));
+	dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", le32_to_cpu(desc[1].data[1]));
 
 	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
 				 HCLGE_OPC_TM_INTERNAL_CNT);
 	if (ret)
 		return;
 
-	dev_info(dev, "SCH_NIC_NUM: 0x%x\n", desc[0].data[1]);
-	dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", desc[0].data[2]);
+	dev_info(dev, "SCH_NIC_NUM: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+	dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", le32_to_cpu(desc[0].data[2]));
 
 	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
 				 HCLGE_OPC_TM_INTERNAL_STS_1);
 	if (ret)
 		return;
 
-	dev_info(dev, "TC_MAP_SEL: 0x%x\n", desc[0].data[1]);
-	dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", desc[0].data[2]);
-	dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", desc[0].data[3]);
-	dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[4]);
-	dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]);
+	dev_info(dev, "TC_MAP_SEL: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+	dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[2]));
+	dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[3]));
+	dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n",
+		 le32_to_cpu(desc[0].data[4]));
+	dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n",
+		 le32_to_cpu(desc[0].data[5]));
 }
 
 static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
@@ -364,7 +368,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "PG_C pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
 	dev_info(&hdev->pdev->dev, "PG_C pg_shapping: 0x%x\n",
-		 pg_shap_cfg_cmd->pg_shapping_para);
+		 le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
 
 	cmd = HCLGE_OPC_TM_PG_P_SHAPPING;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -375,7 +379,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "PG_P pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
 	dev_info(&hdev->pdev->dev, "PG_P pg_shapping: 0x%x\n",
-		 pg_shap_cfg_cmd->pg_shapping_para);
+		 le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
 
 	cmd = HCLGE_OPC_TM_PORT_SHAPPING;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -385,7 +389,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 
 	port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "PORT port_shapping: 0x%x\n",
-		 port_shap_cfg_cmd->port_shapping_para);
+		 le32_to_cpu(port_shap_cfg_cmd->port_shapping_para));
 
 	cmd = HCLGE_OPC_TM_PG_SCH_MODE_CFG;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -393,7 +397,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	if (ret)
 		goto err_tm_pg_cmd_send;
 
-	dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n", desc.data[0]);
+	dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n",
+		 le32_to_cpu(desc.data[0]));
 
 	cmd = HCLGE_OPC_TM_PRI_SCH_MODE_CFG;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -401,7 +406,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	if (ret)
 		goto err_tm_pg_cmd_send;
 
-	dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n", desc.data[0]);
+	dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n",
+		 le32_to_cpu(desc.data[0]));
 
 	cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -409,7 +415,8 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	if (ret)
 		goto err_tm_pg_cmd_send;
 
-	dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]);
+	dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n",
+		 le32_to_cpu(desc.data[0]));
 
 	if (!hnae3_dev_dcb_supported(hdev)) {
 		dev_info(&hdev->pdev->dev,
@@ -429,7 +436,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
 	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n",
 		 bp_to_qs_map_cmd->qs_group_id);
 	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n",
-		 bp_to_qs_map_cmd->qs_bit_map);
+		 le32_to_cpu(bp_to_qs_map_cmd->qs_bit_map));
 	return;
 
 err_tm_pg_cmd_send:
@@ -471,7 +478,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 
 	qs_to_pri_map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "QS_TO_PRI qs_id: %u\n",
-		 qs_to_pri_map->qs_id);
+		 le16_to_cpu(qs_to_pri_map->qs_id));
 	dev_info(&hdev->pdev->dev, "QS_TO_PRI priority: %u\n",
 		 qs_to_pri_map->priority);
 	dev_info(&hdev->pdev->dev, "QS_TO_PRI link_vld: %u\n",
@@ -484,9 +491,10 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 		goto err_tm_cmd_send;
 
 	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n", nq_to_qs_map->nq_id);
+	dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n",
+		 le16_to_cpu(nq_to_qs_map->nq_id));
 	dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n",
-		 nq_to_qs_map->qset_id);
+		 le16_to_cpu(nq_to_qs_map->qset_id));
 
 	cmd = HCLGE_OPC_TM_PG_WEIGHT;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -505,7 +513,8 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 		goto err_tm_cmd_send;
 
 	qs_weight = (struct hclge_qs_weight_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "QS qs_id: %u\n", qs_weight->qs_id);
+	dev_info(&hdev->pdev->dev, "QS qs_id: %u\n",
+		 le16_to_cpu(qs_weight->qs_id));
 	dev_info(&hdev->pdev->dev, "QS dwrr: %u\n", qs_weight->dwrr);
 
 	cmd = HCLGE_OPC_TM_PRI_WEIGHT;
@@ -527,7 +536,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "PRI_C pri_id: %u\n", shap_cfg_cmd->pri_id);
 	dev_info(&hdev->pdev->dev, "PRI_C pri_shapping: 0x%x\n",
-		 shap_cfg_cmd->pri_shapping_para);
+		 le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
 
 	cmd = HCLGE_OPC_TM_PRI_P_SHAPPING;
 	hclge_cmd_setup_basic_desc(&desc, cmd, true);
@@ -538,7 +547,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
 	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
 	dev_info(&hdev->pdev->dev, "PRI_P pri_id: %u\n", shap_cfg_cmd->pri_id);
 	dev_info(&hdev->pdev->dev, "PRI_P pri_shapping: 0x%x\n",
-		 shap_cfg_cmd->pri_shapping_para);
+		 le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
 
 	hclge_dbg_dump_tm_pg(hdev);
 
@@ -658,7 +667,7 @@ static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev)
 	dev_info(&hdev->pdev->dev, "pause_trans_gap: 0x%x\n",
 		 pause_param->pause_trans_gap);
 	dev_info(&hdev->pdev->dev, "pause_trans_time: 0x%x\n",
-		 pause_param->pause_trans_time);
+		 le16_to_cpu(pause_param->pause_trans_time));
 }
 
 static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev)
@@ -712,7 +721,7 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc[0].data;
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
 		dev_info(&hdev->pdev->dev, "tx_packet_buf_tc_%d: 0x%x\n", i,
-			 tx_buf_cmd->tx_pkt_buff[i]);
+			 le16_to_cpu(tx_buf_cmd->tx_pkt_buff[i]));
 
 	cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC;
 	hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -724,10 +733,10 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc[0].data;
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
 		dev_info(&hdev->pdev->dev, "rx_packet_buf_tc_%d: 0x%x\n", i,
-			 rx_buf_cmd->buf_num[i]);
+			 le16_to_cpu(rx_buf_cmd->buf_num[i]));
 
 	dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n",
-		 rx_buf_cmd->shared_buf);
+		 le16_to_cpu(rx_buf_cmd->shared_buf));
 
 	cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
 	hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -738,7 +747,8 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data;
 	dev_info(&hdev->pdev->dev, "\n");
 	dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
-		 rx_com_wl->com_wl.high, rx_com_wl->com_wl.low);
+		 le16_to_cpu(rx_com_wl->com_wl.high),
+		 le16_to_cpu(rx_com_wl->com_wl.low));
 
 	cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
 	hclge_cmd_setup_basic_desc(desc, cmd, true);
@@ -749,7 +759,8 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data;
 	dev_info(&hdev->pdev->dev,
 		 "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
-		 rx_packet_cnt->com_wl.high, rx_packet_cnt->com_wl.low);
+		 le16_to_cpu(rx_packet_cnt->com_wl.high),
+		 le16_to_cpu(rx_packet_cnt->com_wl.low));
 	dev_info(&hdev->pdev->dev, "\n");
 
 	if (!hnae3_dev_dcb_supported(hdev)) {
@@ -769,14 +780,16 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
 		dev_info(&hdev->pdev->dev,
 			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n", i,
-			 rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+			 le16_to_cpu(rx_priv_wl->tc_wl[i].high),
+			 le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
 	rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[1].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
 		dev_info(&hdev->pdev->dev,
 			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n",
 			 i + HCLGE_TC_NUM_ONE_DESC,
-			 rx_priv_wl->tc_wl[i].high, rx_priv_wl->tc_wl[i].low);
+			 le16_to_cpu(rx_priv_wl->tc_wl[i].high),
+			 le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
 	cmd = HCLGE_OPC_RX_COM_THRD_ALLOC;
 	hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
@@ -791,16 +804,16 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
 		dev_info(&hdev->pdev->dev,
 			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n", i,
-			 rx_com_thrd->com_thrd[i].high,
-			 rx_com_thrd->com_thrd[i].low);
+			 le16_to_cpu(rx_com_thrd->com_thrd[i].high),
+			 le16_to_cpu(rx_com_thrd->com_thrd[i].low));
 
 	rx_com_thrd = (struct hclge_rx_com_thrd *)desc[1].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
 		dev_info(&hdev->pdev->dev,
 			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n",
 			 i + HCLGE_TC_NUM_ONE_DESC,
-			 rx_com_thrd->com_thrd[i].high,
-			 rx_com_thrd->com_thrd[i].low);
+			 le16_to_cpu(rx_com_thrd->com_thrd[i].high),
+			 le16_to_cpu(rx_com_thrd->com_thrd[i].low));
 	return;
 
 err_qos_cmd_send:
@@ -845,7 +858,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
 		memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
 		snprintf(printf_buf, HCLGE_DBG_BUF_LEN,
 			 "%02u   |%02x:%02x:%02x:%02x:%02x:%02x|",
-			 req0->index, req0->mac_addr[0], req0->mac_addr[1],
+			 le16_to_cpu(req0->index),
+			 req0->mac_addr[0], req0->mac_addr[1],
 			 req0->mac_addr[2], req0->mac_addr[3],
 			 req0->mac_addr[4], req0->mac_addr[5]);
 
@@ -929,7 +943,7 @@ static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
 	}
 }
 
-static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
+void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 {
 	dev_info(&hdev->pdev->dev, "PF reset count: %u\n",
 		 hdev->rst_stats.pf_rst_cnt);
@@ -945,8 +959,6 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 		 hdev->rst_stats.hw_reset_done_cnt);
 	dev_info(&hdev->pdev->dev, "reset count: %u\n",
 		 hdev->rst_stats.reset_cnt);
-	dev_info(&hdev->pdev->dev, "reset count: %u\n",
-		 hdev->rst_stats.reset_cnt);
 	dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
 		 hdev->rst_stats.reset_fail_cnt);
 	dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
@@ -961,6 +973,7 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 		 hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG));
 	dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
 		 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING));
+	dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
 }
 
 static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
@@ -1110,6 +1123,82 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
 	}
 }
 
+static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid)
+{
+	struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+	u8 ir_u, ir_b, ir_s, bs_b, bs_s;
+	struct hclge_desc desc;
+	u32 shapping_para;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true);
+
+	shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+	shap_cfg_cmd->qs_id = cpu_to_le16(qsid);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"qs%u failed to get tx_rate, ret=%d\n",
+			qsid, ret);
+		return;
+	}
+
+	shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para);
+	ir_b = hclge_tm_get_field(shapping_para, IR_B);
+	ir_u = hclge_tm_get_field(shapping_para, IR_U);
+	ir_s = hclge_tm_get_field(shapping_para, IR_S);
+	bs_b = hclge_tm_get_field(shapping_para, BS_B);
+	bs_s = hclge_tm_get_field(shapping_para, BS_S);
+
+	dev_info(&hdev->pdev->dev,
+		 "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u\n",
+		 qsid, ir_b, ir_u, ir_s, bs_b, bs_s);
+}
+
+static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev)
+{
+	struct hnae3_knic_private_info *kinfo;
+	struct hclge_vport *vport;
+	int vport_id, i;
+
+	for (vport_id = 0; vport_id <= pci_num_vf(hdev->pdev); vport_id++) {
+		vport = &hdev->vport[vport_id];
+		kinfo = &vport->nic.kinfo;
+
+		dev_info(&hdev->pdev->dev, "qs cfg of vport%d:\n", vport_id);
+
+		for (i = 0; i < kinfo->num_tc; i++) {
+			u16 qsid = vport->qs_offset + i;
+
+			hclge_dbg_dump_qs_shaper_single(hdev, qsid);
+		}
+	}
+}
+
+static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev,
+				     const char *cmd_buf)
+{
+#define HCLGE_MAX_QSET_NUM 1024
+
+	u16 qsid;
+	int ret;
+
+	ret = kstrtou16(cmd_buf, 0, &qsid);
+	if (ret) {
+		hclge_dbg_dump_qs_shaper_all(hdev);
+		return;
+	}
+
+	if (qsid >= HCLGE_MAX_QSET_NUM) {
+		dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-1023]\n",
+			qsid);
+		return;
+	}
+
+	hclge_dbg_dump_qs_shaper_single(hdev, qsid);
+}
+
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 #define DUMP_REG	"dump reg"
@@ -1145,6 +1234,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 					  &cmd_buf[sizeof("dump ncl_config")]);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
 		hclge_dbg_dump_mac_tnl_status(hdev);
+	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
+		hclge_dbg_dump_qs_shaper(hdev,
+					 &cmd_buf[sizeof("dump qs shaper")]);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 87dece0e745d..dc66b4e13377 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1747,7 +1747,7 @@ static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
 
 	if (vf_id) {
 		if (vf_id >= hdev->num_alloc_vport) {
-			dev_err(dev, "invalid vf id(%d)\n", vf_id);
+			dev_err(dev, "invalid vf id(%u)\n", vf_id);
 			return;
 		}
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 16f7d0e15b4f..eb14c43e4a90 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -55,6 +55,8 @@
 
 #define HCLGE_LINK_STATUS_MS	10
 
+#define HCLGE_VF_VPORT_START_NUM	1
+
 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
 static int hclge_init_vlan_config(struct hclge_dev *hdev);
 static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
@@ -323,8 +325,7 @@ static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = {
 	{
 		.flags = HCLGE_MAC_MGR_MASK_VLAN_B,
 		.ethter_type = cpu_to_le16(ETH_P_LLDP),
-		.mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
-		.mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
+		.mac_addr = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e},
 		.i_port_bitmap = 0x1,
 	},
 };
@@ -1194,6 +1195,35 @@ static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
 		hclge_parse_backplane_link_mode(hdev, speed_ability);
 }
 
+static u32 hclge_get_max_speed(u8 speed_ability)
+{
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		return HCLGE_MAC_SPEED_100G;
+
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		return HCLGE_MAC_SPEED_50G;
+
+	if (speed_ability & HCLGE_SUPPORT_40G_BIT)
+		return HCLGE_MAC_SPEED_40G;
+
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		return HCLGE_MAC_SPEED_25G;
+
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		return HCLGE_MAC_SPEED_10G;
+
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		return HCLGE_MAC_SPEED_1G;
+
+	if (speed_ability & HCLGE_SUPPORT_100M_BIT)
+		return HCLGE_MAC_SPEED_100M;
+
+	if (speed_ability & HCLGE_SUPPORT_10M_BIT)
+		return HCLGE_MAC_SPEED_10M;
+
+	return HCLGE_MAC_SPEED_1G;
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
 	struct hclge_cfg_param_cmd *req;
@@ -1364,9 +1394,11 @@ static int hclge_configure(struct hclge_dev *hdev)
 
 	hclge_parse_link_mode(hdev, cfg.speed_ability);
 
+	hdev->hw.mac.max_speed = hclge_get_max_speed(cfg.speed_ability);
+
 	if ((hdev->tc_max > HNAE3_MAX_TC) ||
 	    (hdev->tc_max < 1)) {
-		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
+		dev_warn(&hdev->pdev->dev, "TC num = %u.\n",
 			 hdev->tc_max);
 		hdev->tc_max = 1;
 	}
@@ -1626,7 +1658,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 
 	if (hdev->num_tqps < num_vport) {
-		dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)",
+		dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)",
 			hdev->num_tqps, num_vport);
 		return -EINVAL;
 	}
@@ -1649,6 +1681,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 	for (i = 0; i < num_vport; i++) {
 		vport->back = hdev;
 		vport->vport_id = i;
+		vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO;
 		vport->mps = HCLGE_MAC_DEFAULT_FRAME;
 		vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
 		vport->rxvlan_cfg.rx_vlan_offload_en = true;
@@ -2312,7 +2345,7 @@ static int hclge_init_msi(struct hclge_dev *hdev)
 	}
 	if (vectors < hdev->num_msi)
 		dev_warn(&hdev->pdev->dev,
-			 "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+			 "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n",
 			 hdev->num_msi, vectors);
 
 	hdev->num_msi = vectors;
@@ -2744,7 +2777,7 @@ static void hclge_update_port_capability(struct hclge_mac *mac)
 	else if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
 		mac->module_type = HNAE3_MODULE_TYPE_TP;
 
-	if (mac->support_autoneg == true) {
+	if (mac->support_autoneg) {
 		linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mac->supported);
 		linkmode_copy(mac->advertising, mac->supported);
 	} else {
@@ -2871,6 +2904,62 @@ static int hclge_get_status(struct hnae3_handle *handle)
 	return hdev->hw.mac.link;
 }
 
+static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
+{
+	if (pci_num_vf(hdev->pdev) == 0) {
+		dev_err(&hdev->pdev->dev,
+			"SRIOV is disabled, can not get vport(%d) info.\n", vf);
+		return NULL;
+	}
+
+	if (vf < 0 || vf >= pci_num_vf(hdev->pdev)) {
+		dev_err(&hdev->pdev->dev,
+			"vf id(%d) is out of range(0 <= vfid < %d)\n",
+			vf, pci_num_vf(hdev->pdev));
+		return NULL;
+	}
+
+	/* VF start from 1 in vport */
+	vf += HCLGE_VF_VPORT_START_NUM;
+	return &hdev->vport[vf];
+}
+
+static int hclge_get_vf_config(struct hnae3_handle *handle, int vf,
+			       struct ifla_vf_info *ivf)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	ivf->vf = vf;
+	ivf->linkstate = vport->vf_info.link_state;
+	ivf->spoofchk = vport->vf_info.spoofchk;
+	ivf->trusted = vport->vf_info.trusted;
+	ivf->min_tx_rate = 0;
+	ivf->max_tx_rate = vport->vf_info.max_tx_rate;
+	ether_addr_copy(ivf->mac, vport->vf_info.mac);
+
+	return 0;
+}
+
+static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
+				   int link_state)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	vport->vf_info.link_state = link_state;
+
+	return 0;
+}
+
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
 	u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
@@ -3191,7 +3280,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 
 		if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
 			dev_err(&hdev->pdev->dev,
-				"flr wait timeout: %d\n", cnt);
+				"flr wait timeout: %u\n", cnt);
 			return -EBUSY;
 		}
 
@@ -3241,7 +3330,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 		ret = hclge_set_vf_rst(hdev, vport->vport_id, reset);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"set vf(%d) rst failed %d!\n",
+				"set vf(%u) rst failed %d!\n",
 				vport->vport_id, ret);
 			return ret;
 		}
@@ -3256,7 +3345,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 		ret = hclge_inform_reset_assert_to_vf(vport);
 		if (ret)
 			dev_warn(&hdev->pdev->dev,
-				 "inform reset to vf(%d) failed %d!\n",
+				 "inform reset to vf(%u) failed %d!\n",
 				 vport->vport_id, ret);
 	}
 
@@ -3569,7 +3658,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 		hdev->rst_stats.reset_fail_cnt++;
 		set_bit(hdev->reset_type, &hdev->reset_pending);
 		dev_info(&hdev->pdev->dev,
-			 "re-schedule reset task(%d)\n",
+			 "re-schedule reset task(%u)\n",
 			 hdev->rst_stats.reset_fail_cnt);
 		return true;
 	}
@@ -3580,6 +3669,9 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 	hclge_reset_handshake(hdev, true);
 
 	dev_err(&hdev->pdev->dev, "Reset fail!\n");
+
+	hclge_dbg_dump_rst_info(hdev);
+
 	return false;
 }
 
@@ -3779,12 +3871,13 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
 				  HCLGE_RESET_INTERVAL))) {
 		mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
 		return;
-	} else if (hdev->default_reset_request)
+	} else if (hdev->default_reset_request) {
 		hdev->reset_level =
 			hclge_get_reset_level(ae_dev,
 					      &hdev->default_reset_request);
-	else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ)))
+	} else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) {
 		hdev->reset_level = HNAE3_FUNC_RESET;
+	}
 
 	dev_info(&hdev->pdev->dev, "received reset event, reset type is %d\n",
 		 hdev->reset_level);
@@ -3909,6 +4002,7 @@ static void hclge_service_task(struct work_struct *work)
 	hclge_update_link_status(hdev);
 	hclge_update_vport_alive(hdev);
 	hclge_sync_vlan_filter(hdev);
+
 	if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
 		hclge_rfs_filter_expire(hdev);
 		hdev->fd_arfs_expire_timer = 0;
@@ -4415,7 +4509,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 	 */
 	if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
 		dev_err(&hdev->pdev->dev,
-			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
+			"Configure rss tc size failed, invalid TC_SIZE = %u\n",
 			rss_size);
 		return -EINVAL;
 	}
@@ -4593,8 +4687,8 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector,
 	return ret;
 }
 
-int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
-			       struct hclge_promisc_param *param)
+static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+				      struct hclge_promisc_param *param)
 {
 	struct hclge_promisc_cfg_cmd *req;
 	struct hclge_desc desc;
@@ -4621,8 +4715,9 @@ int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
 	return ret;
 }
 
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
-			      bool en_mc, bool en_bc, int vport_id)
+static void hclge_promisc_param_init(struct hclge_promisc_param *param,
+				     bool en_uc, bool en_mc, bool en_bc,
+				     int vport_id)
 {
 	if (!param)
 		return;
@@ -4637,12 +4732,21 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
 	param->vf_id = vport_id;
 }
 
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
+				 bool en_mc_pmc, bool en_bc_pmc)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_promisc_param param;
+
+	hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, en_bc_pmc,
+				 vport->vport_id);
+	return hclge_cmd_set_promisc_mode(hdev, &param);
+}
+
 static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 				  bool en_mc_pmc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	struct hclge_promisc_param param;
 	bool en_bc_pmc = true;
 
 	/* For revision 0x20, if broadcast promisc enabled, vlan filter is
@@ -4652,9 +4756,8 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 	if (handle->pdev->revision == 0x20)
 		en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false;
 
-	hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, en_bc_pmc,
-				 vport->vport_id);
-	return hclge_cmd_set_promisc_mode(hdev, &param);
+	return hclge_set_vport_promisc_mode(vport, en_uc_pmc, en_mc_pmc,
+					    en_bc_pmc);
 }
 
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@ -4756,7 +4859,7 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
 		break;
 	default:
 		dev_err(&hdev->pdev->dev,
-			"Unsupported flow director mode %d\n",
+			"Unsupported flow director mode %u\n",
 			hdev->fd_cfg.fd_mode);
 		return -EOPNOTSUPP;
 	}
@@ -5086,7 +5189,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 				   true);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"fd key_y config fail, loc=%d, ret=%d\n",
+			"fd key_y config fail, loc=%u, ret=%d\n",
 			rule->queue_id, ret);
 		return ret;
 	}
@@ -5095,7 +5198,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 				   true);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"fd key_x config fail, loc=%d, ret=%d\n",
+			"fd key_x config fail, loc=%u, ret=%d\n",
 			rule->queue_id, ret);
 	return ret;
 }
@@ -5344,7 +5447,7 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 		}
 	} else if (!is_add) {
 		dev_err(&hdev->pdev->dev,
-			"delete fail, rule %d is inexistent\n",
+			"delete fail, rule %u is inexistent\n",
 			location);
 		return -EINVAL;
 	}
@@ -5584,7 +5687,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 
 		if (vf > hdev->num_req_vfs) {
 			dev_err(&hdev->pdev->dev,
-				"Error: vf id (%d) > max vf num (%d)\n",
+				"Error: vf id (%u) > max vf num (%u)\n",
 				vf, hdev->num_req_vfs);
 			return -EINVAL;
 		}
@@ -5594,7 +5697,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 
 		if (ring >= tqps) {
 			dev_err(&hdev->pdev->dev,
-				"Error: queue id (%d) > max tqp num (%d)\n",
+				"Error: queue id (%u) > max tqp num (%u)\n",
 				ring, tqps - 1);
 			return -EINVAL;
 		}
@@ -5653,7 +5756,7 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 
 	if (!hclge_fd_rule_exist(hdev, fs->location)) {
 		dev_err(&hdev->pdev->dev,
-			"Delete fail, rule %d is inexistent\n", fs->location);
+			"Delete fail, rule %u is inexistent\n", fs->location);
 		return -ENOENT;
 	}
 
@@ -5730,7 +5833,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 
 		if (ret) {
 			dev_warn(&hdev->pdev->dev,
-				 "Restore rule %d failed, remove it\n",
+				 "Restore rule %u failed, remove it\n",
 				 rule->location);
 			clear_bit(rule->location, hdev->fd_bmap);
 			hlist_del(&rule->rule_node);
@@ -6723,7 +6826,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 
 	if (cmdq_resp) {
 		dev_err(&hdev->pdev->dev,
-			"cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n",
+			"cmdq execute failed for get_mac_vlan_cmd_status,status=%u.\n",
 			cmdq_resp);
 		return -EIO;
 	}
@@ -6975,7 +7078,7 @@ static int hclge_init_umv_space(struct hclge_dev *hdev)
 
 	if (allocated_size < hdev->wanted_umv_size)
 		dev_warn(&hdev->pdev->dev,
-			 "Alloc umv space failed, want %d, get %d\n",
+			 "Alloc umv space failed, want %u, get %u\n",
 			 hdev->wanted_umv_size, allocated_size);
 
 	mutex_init(&hdev->umv_mutex);
@@ -7143,7 +7246,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
 	/* check if we just hit the duplicate */
 	if (!ret) {
-		dev_warn(&hdev->pdev->dev, "VF %d mac(%pM) exists\n",
+		dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n",
 			 vport->vport_id, addr);
 		return 0;
 	}
@@ -7324,7 +7427,7 @@ void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
 	mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
 
 	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
-		if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) {
+		if (ether_addr_equal(mac_cfg->mac_addr, mac_addr)) {
 			if (uc_flag && mac_cfg->hd_tbl_status)
 				hclge_rm_uc_addr_common(vport, mac_addr);
 
@@ -7396,7 +7499,7 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
 
 	if (cmdq_resp) {
 		dev_err(&hdev->pdev->dev,
-			"cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n",
+			"cmdq execute failed for get_mac_ethertype_cmd_status, status=%u.\n",
 			cmdq_resp);
 		return -EIO;
 	}
@@ -7418,7 +7521,7 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
 		break;
 	default:
 		dev_err(&hdev->pdev->dev,
-			"add mac ethertype failed for undefined, code=%d.\n",
+			"add mac ethertype failed for undefined, code=%u.\n",
 			resp_code);
 		return_status = -EIO;
 	}
@@ -7426,6 +7529,67 @@ static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
 	return return_status;
 }
 
+static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx,
+				     u8 *mac_addr)
+{
+	struct hclge_mac_vlan_tbl_entry_cmd req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	u16 egress_port = 0;
+	int i;
+
+	if (is_zero_ether_addr(mac_addr))
+		return false;
+
+	memset(&req, 0, sizeof(req));
+	hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
+			HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
+	req.egress_port = cpu_to_le16(egress_port);
+	hclge_prepare_mac_addr(&req, mac_addr, false);
+
+	if (hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false) != -ENOENT)
+		return true;
+
+	vf_idx += HCLGE_VF_VPORT_START_NUM;
+	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++)
+		if (i != vf_idx &&
+		    ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac))
+			return true;
+
+	return false;
+}
+
+static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+			    u8 *mac_addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	if (ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+		dev_info(&hdev->pdev->dev,
+			 "Specified MAC(=%pM) is same as before, no change committed!\n",
+			 mac_addr);
+		return 0;
+	}
+
+	if (hclge_check_vf_mac_exist(vport, vf, mac_addr)) {
+		dev_err(&hdev->pdev->dev, "Specified MAC(=%pM) exists!\n",
+			mac_addr);
+		return -EEXIST;
+	}
+
+	ether_addr_copy(vport->vf_info.mac, mac_addr);
+	dev_info(&hdev->pdev->dev,
+		 "MAC of VF %d has been set to %pM, and it will be reinitialized!\n",
+		 vf, mac_addr);
+
+	return hclge_inform_reset_assert_to_vf(vport);
+}
+
 static int hclge_add_mgr_tbl(struct hclge_dev *hdev,
 			     const struct hclge_mac_mgr_tbl_entry_cmd *req)
 {
@@ -7598,7 +7762,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 				    bool is_kill, u16 vlan,
 				    __be16 proto)
 {
-#define HCLGE_MAX_VF_BYTES  16
+	struct hclge_vport *vport = &hdev->vport[vfid];
 	struct hclge_vlan_filter_vf_cfg_cmd *req0;
 	struct hclge_vlan_filter_vf_cfg_cmd *req1;
 	struct hclge_desc desc[2];
@@ -7607,10 +7771,18 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 	int ret;
 
 	/* if vf vlan table is full, firmware will close vf vlan filter, it
-	 * is unable and unnecessary to add new vlan id to vf vlan filter
+	 * is unable and unnecessary to add new vlan id to vf vlan filter.
+	 * If spoof check is enable, and vf vlan is full, it shouldn't add
+	 * new vlan, because tx packets with these vlan id will be dropped.
 	 */
-	if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill)
+	if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) {
+		if (vport->vf_info.spoofchk && vlan) {
+			dev_err(&hdev->pdev->dev,
+				"Can't add vlan due to spoof check is on and vf vlan table is full\n");
+			return -EPERM;
+		}
 		return 0;
+	}
 
 	hclge_cmd_setup_basic_desc(&desc[0],
 				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
@@ -7654,7 +7826,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 		}
 
 		dev_err(&hdev->pdev->dev,
-			"Add vf vlan filter fail, ret =%d.\n",
+			"Add vf vlan filter fail, ret =%u.\n",
 			req0->resp_code);
 	} else {
 #define HCLGE_VF_VLAN_DEL_NO_FOUND	1
@@ -7670,7 +7842,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 			return 0;
 
 		dev_err(&hdev->pdev->dev,
-			"Kill vf vlan filter fail, ret =%d.\n",
+			"Kill vf vlan filter fail, ret =%u.\n",
 			req0->resp_code);
 	}
 
@@ -7689,9 +7861,10 @@ static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto,
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
 
-	vlan_offset_160 = vlan_id / 160;
-	vlan_offset_byte = (vlan_id % 160) / 8;
-	vlan_offset_byte_val = 1 << (vlan_id % 8);
+	vlan_offset_160 = vlan_id / HCLGE_VLAN_ID_OFFSET_STEP;
+	vlan_offset_byte = (vlan_id % HCLGE_VLAN_ID_OFFSET_STEP) /
+			   HCLGE_VLAN_BYTE_SIZE;
+	vlan_offset_byte_val = 1 << (vlan_id % HCLGE_VLAN_BYTE_SIZE);
 
 	req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data;
 	req->vlan_offset = vlan_offset_160;
@@ -7719,7 +7892,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 				       proto);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"Set %d vport vlan filter config fail, ret =%d.\n",
+			"Set %u vport vlan filter config fail, ret =%d.\n",
 			vport_id, ret);
 		return ret;
 	}
@@ -7731,7 +7904,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 
 	if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) {
 		dev_err(&hdev->pdev->dev,
-			"Add port vlan failed, vport %d is already in vlan %d\n",
+			"Add port vlan failed, vport %u is already in vlan %u\n",
 			vport_id, vlan_id);
 		return -EINVAL;
 	}
@@ -7739,7 +7912,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 	if (is_kill &&
 	    !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) {
 		dev_err(&hdev->pdev->dev,
-			"Delete port vlan failed, vport %d is not in vlan %d\n",
+			"Delete port vlan failed, vport %u is not in vlan %u\n",
 			vport_id, vlan_id);
 		return -EINVAL;
 	}
@@ -8107,12 +8280,15 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
 		}
 
 		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
-			if (vlan->hd_tbl_status)
-				hclge_set_vlan_filter_hw(hdev,
-							 htons(ETH_P_8021Q),
-							 vport->vport_id,
-							 vlan->vlan_id,
-							 false);
+			int ret;
+
+			if (!vlan->hd_tbl_status)
+				continue;
+			ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+						       vport->vport_id,
+						       vlan->vlan_id, false);
+			if (ret)
+				break;
 		}
 	}
 
@@ -8392,6 +8568,7 @@ int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu)
 	struct hclge_dev *hdev = vport->back;
 	int i, max_frm_size, ret;
 
+	/* HW supprt 2 layer vlan */
 	max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
 	if (max_frm_size < HCLGE_MAC_MIN_FRAME ||
 	    max_frm_size > HCLGE_MAC_MAX_FRAME)
@@ -8807,16 +8984,16 @@ static void hclge_info_show(struct hclge_dev *hdev)
 
 	dev_info(dev, "PF info begin:\n");
 
-	dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
-	dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
-	dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
-	dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
-	dev_info(dev, "Numbers of vmdp vports: %d\n", hdev->num_vmdq_vport);
-	dev_info(dev, "Numbers of VF for this PF: %d\n", hdev->num_req_vfs);
-	dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
-	dev_info(dev, "Total buffer size for TX/RX: %d\n", hdev->pkt_buf_size);
-	dev_info(dev, "TX buffer size for each TC: %d\n", hdev->tx_buf_size);
-	dev_info(dev, "DV buffer size for each TC: %d\n", hdev->dv_buf_size);
+	dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps);
+	dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
+	dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
+	dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
+	dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport);
+	dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs);
+	dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
+	dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size);
+	dev_info(dev, "TX buffer size for each TC: %u\n", hdev->tx_buf_size);
+	dev_info(dev, "DV buffer size for each TC: %u\n", hdev->dv_buf_size);
 	dev_info(dev, "This is %s PF\n",
 		 hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
 	dev_info(dev, "DCB %s\n",
@@ -8832,10 +9009,9 @@ static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
 {
 	struct hnae3_client *client = vport->nic.client;
 	struct hclge_dev *hdev = ae_dev->priv;
-	int rst_cnt;
+	int rst_cnt = hdev->rst_stats.reset_cnt;
 	int ret;
 
-	rst_cnt = hdev->rst_stats.reset_cnt;
 	ret = client->ops->init_instance(&vport->nic);
 	if (ret)
 		return ret;
@@ -8935,7 +9111,6 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
 		switch (client->type) {
 		case HNAE3_CLIENT_KNIC:
-
 			hdev->nic_client = client;
 			vport->nic.client = client;
 			ret = hclge_init_nic_client_instance(ae_dev, vport);
@@ -9134,7 +9309,7 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
 		ret = hclge_set_vf_rst(hdev, vport->vport_id, false);
 		if (ret)
 			dev_warn(&hdev->pdev->dev,
-				 "clear vf(%d) rst failed %d!\n",
+				 "clear vf(%u) rst failed %d!\n",
 				 vport->vport_id, ret);
 	}
 }
@@ -9156,6 +9331,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hdev->reset_type = HNAE3_NONE_RESET;
 	hdev->reset_level = HNAE3_FUNC_RESET;
 	ae_dev->priv = hdev;
+
+	/* HW supprt 2 layer vlan */
 	hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
 
 	mutex_init(&hdev->vport_lock);
@@ -9354,6 +9531,219 @@ static void hclge_stats_clear(struct hclge_dev *hdev)
 	memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
 }
 
+static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+	return hclge_config_switch_param(hdev, vf, enable,
+					 HCLGE_SWITCH_ANTI_SPOOF_MASK);
+}
+
+static int hclge_set_vlan_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
+{
+	return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+					  HCLGE_FILTER_FE_NIC_INGRESS_B,
+					  enable, vf);
+}
+
+static int hclge_set_vf_spoofchk_hw(struct hclge_dev *hdev, int vf, bool enable)
+{
+	int ret;
+
+	ret = hclge_set_mac_spoofchk(hdev, vf, enable);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set vf %d mac spoof check %s failed, ret=%d\n",
+			vf, enable ? "on" : "off", ret);
+		return ret;
+	}
+
+	ret = hclge_set_vlan_spoofchk(hdev, vf, enable);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Set vf %d vlan spoof check %s failed, ret=%d\n",
+			vf, enable ? "on" : "off", ret);
+
+	return ret;
+}
+
+static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf,
+				 bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u32 new_spoofchk = enable ? 1 : 0;
+	int ret;
+
+	if (hdev->pdev->revision == 0x20)
+		return -EOPNOTSUPP;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	if (vport->vf_info.spoofchk == new_spoofchk)
+		return 0;
+
+	if (enable && test_bit(vport->vport_id, hdev->vf_vlan_full))
+		dev_warn(&hdev->pdev->dev,
+			 "vf %d vlan table is full, enable spoof check may cause its packet send fail\n",
+			 vf);
+	else if (enable && hclge_is_umv_space_full(vport))
+		dev_warn(&hdev->pdev->dev,
+			 "vf %d mac table is full, enable spoof check may cause its packet send fail\n",
+			 vf);
+
+	ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, enable);
+	if (ret)
+		return ret;
+
+	vport->vf_info.spoofchk = new_spoofchk;
+	return 0;
+}
+
+static int hclge_reset_vport_spoofchk(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	int i;
+
+	if (hdev->pdev->revision == 0x20)
+		return 0;
+
+	/* resume the vf spoof check state after reset */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id,
+					       vport->vf_info.spoofchk);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u32 new_trusted = enable ? 1 : 0;
+	bool en_bc_pmc;
+	int ret;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	if (vport->vf_info.trusted == new_trusted)
+		return 0;
+
+	/* Disable promisc mode for VF if it is not trusted any more. */
+	if (!enable && vport->vf_info.promisc_enable) {
+		en_bc_pmc = hdev->pdev->revision != 0x20;
+		ret = hclge_set_vport_promisc_mode(vport, false, false,
+						   en_bc_pmc);
+		if (ret)
+			return ret;
+		vport->vf_info.promisc_enable = 0;
+		hclge_inform_vf_promisc_info(vport);
+	}
+
+	vport->vf_info.trusted = new_trusted;
+
+	return 0;
+}
+
+static void hclge_reset_vf_rate(struct hclge_dev *hdev)
+{
+	int ret;
+	int vf;
+
+	/* reset vf rate to default value */
+	for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) {
+		struct hclge_vport *vport = &hdev->vport[vf];
+
+		vport->vf_info.max_tx_rate = 0;
+		ret = hclge_tm_qs_shaper_cfg(vport, vport->vf_info.max_tx_rate);
+		if (ret)
+			dev_err(&hdev->pdev->dev,
+				"vf%d failed to reset to default, ret=%d\n",
+				vf - HCLGE_VF_VPORT_START_NUM, ret);
+	}
+}
+
+static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf,
+				     int min_tx_rate, int max_tx_rate)
+{
+	if (min_tx_rate != 0 ||
+	    max_tx_rate < 0 || max_tx_rate > hdev->hw.mac.max_speed) {
+		dev_err(&hdev->pdev->dev,
+			"min_tx_rate:%d [0], max_tx_rate:%d [0, %u]\n",
+			min_tx_rate, max_tx_rate, hdev->hw.mac.max_speed);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf,
+			     int min_tx_rate, int max_tx_rate, bool force)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate);
+	if (ret)
+		return ret;
+
+	vport = hclge_get_vf_vport(hdev, vf);
+	if (!vport)
+		return -EINVAL;
+
+	if (!force && max_tx_rate == vport->vf_info.max_tx_rate)
+		return 0;
+
+	ret = hclge_tm_qs_shaper_cfg(vport, max_tx_rate);
+	if (ret)
+		return ret;
+
+	vport->vf_info.max_tx_rate = max_tx_rate;
+
+	return 0;
+}
+
+static int hclge_resume_vf_rate(struct hclge_dev *hdev)
+{
+	struct hnae3_handle *handle = &hdev->vport->nic;
+	struct hclge_vport *vport;
+	int ret;
+	int vf;
+
+	/* resume the vf max_tx_rate after reset */
+	for (vf = 0; vf < pci_num_vf(hdev->pdev); vf++) {
+		vport = hclge_get_vf_vport(hdev, vf);
+		if (!vport)
+			return -EINVAL;
+
+		/* zero means max rate, after reset, firmware already set it to
+		 * max rate, so just continue.
+		 */
+		if (!vport->vf_info.max_tx_rate)
+			continue;
+
+		ret = hclge_set_vf_rate(handle, vf, 0,
+					vport->vf_info.max_tx_rate, true);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"vf%d failed to resume tx_rate:%u, ret=%d\n",
+				vf, vport->vf_info.max_tx_rate, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static void hclge_reset_vport_state(struct hclge_dev *hdev)
 {
 	struct hclge_vport *vport = hdev->vport;
@@ -9431,6 +9821,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	/* Log and clear the hw errors those already occurred */
+	hclge_handle_all_hns_hw_errors(ae_dev);
+
 	/* Re-enable the hw error interrupts because
 	 * the interrupts get disabled on global reset.
 	 */
@@ -9453,6 +9846,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 	}
 
 	hclge_reset_vport_state(hdev);
+	ret = hclge_reset_vport_spoofchk(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_resume_vf_rate(hdev);
+	if (ret)
+		return ret;
 
 	dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
 		 HCLGE_DRIVER_NAME);
@@ -9465,6 +9865,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	struct hclge_dev *hdev = ae_dev->priv;
 	struct hclge_mac *mac = &hdev->hw.mac;
 
+	hclge_reset_vf_rate(hdev);
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
 
@@ -9529,8 +9930,8 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
 	u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
 	struct hclge_dev *hdev = vport->back;
 	u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
-	int cur_rss_size = kinfo->rss_size;
-	int cur_tqps = kinfo->num_tqps;
+	u16 cur_rss_size = kinfo->rss_size;
+	u16 cur_tqps = kinfo->num_tqps;
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 roundup_size;
 	u32 *rss_indir;
@@ -9584,7 +9985,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
 out:
 	if (!ret)
 		dev_info(&hdev->pdev->dev,
-			 "Channels changed, rss_size from %d to %d, tqps from %d to %d",
+			 "Channels changed, rss_size from %u to %u, tqps from %u to %u",
 			 cur_rss_size, kinfo->rss_size,
 			 cur_tqps, kinfo->rss_size * kinfo->num_tc);
 
@@ -10187,6 +10588,12 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.mac_connect_phy = hclge_mac_connect_phy,
 	.mac_disconnect_phy = hclge_mac_disconnect_phy,
 	.restore_vlan_table = hclge_restore_vlan_table,
+	.get_vf_config = hclge_get_vf_config,
+	.set_vf_link_state = hclge_set_vf_link_state,
+	.set_vf_spoofchk = hclge_set_vf_spoofchk,
+	.set_vf_trust = hclge_set_vf_trust,
+	.set_vf_rate = hclge_set_vf_rate,
+	.set_vf_mac = hclge_set_vf_mac,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 59b824347ba4..88f6c4cbac7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -141,7 +141,6 @@
 
 /* Factor used to calculate offset and bitmap of VF num */
 #define HCLGE_VF_NUM_PER_CMD           64
-#define HCLGE_VF_NUM_PER_BYTE          8
 
 enum HLCGE_PORT_TYPE {
 	HOST_PORT,
@@ -226,8 +225,6 @@ enum hclge_evt_cause {
 	HCLGE_VECTOR0_EVENT_OTHER,
 };
 
-#define HCLGE_MPF_ENBALE 1
-
 enum HCLGE_MAC_SPEED {
 	HCLGE_MAC_SPEED_UNKNOWN = 0,		/* unknown */
 	HCLGE_MAC_SPEED_10M	= 10,		/* 10 Mbps */
@@ -258,6 +255,7 @@ struct hclge_mac {
 	u8 support_autoneg;
 	u8 speed_type;	/* 0: sfp speed, 1: active speed */
 	u32 speed;
+	u32 max_speed;
 	u32 speed_ability; /* speed ability supported by current media */
 	u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */
 	u32 fec_mode; /* active fec mode */
@@ -655,7 +653,6 @@ struct hclge_rst_stats {
 	u32 hw_reset_done_cnt;	/* the number of HW reset has completed */
 	u32 pf_rst_cnt;		/* the number of PF reset */
 	u32 flr_rst_cnt;	/* the number of FLR */
-	u32 core_rst_cnt;	/* the number of CORE reset */
 	u32 global_rst_cnt;	/* the number of GLOBAL */
 	u32 imp_rst_cnt;	/* the number of IMP reset */
 	u32 reset_cnt;		/* the number of reset */
@@ -886,6 +883,15 @@ struct hclge_port_base_vlan_config {
 	struct hclge_vlan_info vlan_info;
 };
 
+struct hclge_vf_info {
+	int link_state;
+	u8 mac[ETH_ALEN];
+	u32 spoofchk;
+	u32 max_tx_rate;
+	u32 trusted;
+	u16 promisc_enable;
+};
+
 struct hclge_vport {
 	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
 
@@ -917,15 +923,15 @@ struct hclge_vport {
 	unsigned long state;
 	unsigned long last_active_jiffies;
 	u32 mps; /* Max packet size */
+	struct hclge_vf_info vf_info;
 
 	struct list_head uc_mac_list;   /* Store VF unicast table */
 	struct list_head mc_mac_list;   /* Store VF multicast table */
 	struct list_head vlan_list;     /* Store VF vlan table */
 };
 
-void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
-			      bool en_mc, bool en_bc, int vport_id);
-
+int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
+				 bool en_mc_pmc, bool en_bc_pmc);
 int hclge_add_uc_addr_common(struct hclge_vport *vport,
 			     const unsigned char *addr);
 int hclge_rm_uc_addr_common(struct hclge_vport *vport,
@@ -994,4 +1000,6 @@ int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev,
 				struct hclge_desc *desc);
 void hclge_report_hw_error(struct hclge_dev *hdev,
 			   enum hnae3_hw_error_type type);
+void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
+void hclge_dbg_dump_rst_info(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index f5da28a60d00..0b433ebe6a2d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -26,7 +26,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
 
 	if (resp_data_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
 		dev_err(&hdev->pdev->dev,
-			"PF fail to gen resp to VF len %d exceeds max len %d\n",
+			"PF fail to gen resp to VF len %u exceeds max len %u\n",
 			resp_data_len,
 			HCLGE_MBX_MAX_RESP_DATA_SIZE);
 		/* If resp_data_len is too long, set the value to max length
@@ -205,12 +205,38 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
 static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
 				     struct hclge_mbx_vf_to_pf_cmd *req)
 {
-	bool en_bc = req->msg[1] ? true : false;
-	struct hclge_promisc_param param;
+#define HCLGE_MBX_BC_INDEX	1
+#define HCLGE_MBX_UC_INDEX	2
+#define HCLGE_MBX_MC_INDEX	3
 
-	/* vf is not allowed to enable unicast/multicast broadcast */
-	hclge_promisc_param_init(&param, false, false, en_bc, vport->vport_id);
-	return hclge_cmd_set_promisc_mode(vport->back, &param);
+	bool en_bc = req->msg[HCLGE_MBX_BC_INDEX] ? true : false;
+	bool en_uc = req->msg[HCLGE_MBX_UC_INDEX] ? true : false;
+	bool en_mc = req->msg[HCLGE_MBX_MC_INDEX] ? true : false;
+	int ret;
+
+	if (!vport->vf_info.trusted) {
+		en_uc = false;
+		en_mc = false;
+	}
+
+	ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc);
+	if (req->mbx_need_resp)
+		hclge_gen_resp_to_vf(vport, req, ret, NULL, 0);
+
+	vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0;
+
+	return ret;
+}
+
+void hclge_inform_vf_promisc_info(struct hclge_vport *vport)
+{
+	u8 dest_vfid = (u8)vport->vport_id;
+	u8 msg_data[2];
+
+	memcpy(&msg_data[0], &vport->vf_info.promisc_enable, sizeof(u16));
+
+	hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+			   HCLGE_MBX_PUSH_PROMISC_INFO, dest_vfid);
 }
 
 static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
@@ -223,6 +249,20 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 	if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) {
 		const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]);
 
+		/* If VF MAC has been configured by the host then it
+		 * cannot be overridden by the MAC specified by the VM.
+		 */
+		if (!is_zero_ether_addr(vport->vf_info.mac) &&
+		    !ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+			status = -EPERM;
+			goto out;
+		}
+
+		if (!is_valid_ether_addr(mac_addr)) {
+			status = -EINVAL;
+			goto out;
+		}
+
 		hclge_rm_uc_addr_common(vport, old_addr);
 		status = hclge_add_uc_addr_common(vport, mac_addr);
 		if (status) {
@@ -245,11 +285,12 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 						 false, HCLGE_MAC_ADDR_UC);
 	} else {
 		dev_err(&hdev->pdev->dev,
-			"failed to set unicast mac addr, unknown subcode %d\n",
+			"failed to set unicast mac addr, unknown subcode %u\n",
 			mbx_req->msg[1]);
 		return -EIO;
 	}
 
+out:
 	if (mbx_req->mbx_need_resp & HCLGE_MBX_NEED_RESP_BIT)
 		hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
 
@@ -278,7 +319,7 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
 						 false, HCLGE_MAC_ADDR_MC);
 	} else {
 		dev_err(&hdev->pdev->dev,
-			"failed to set mcast mac addr, unknown subcode %d\n",
+			"failed to set mcast mac addr, unknown subcode %u\n",
 			mbx_req->msg[1]);
 		return -EIO;
 	}
@@ -324,6 +365,9 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
 		proto =  msg_cmd->proto;
 		status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
 					       vlan, is_kill);
+		if (mbx_req->mbx_need_resp)
+			return hclge_gen_resp_to_vf(vport, mbx_req, status,
+						    NULL, 0);
 	} else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) {
 		struct hnae3_handle *handle = &vport->nic;
 		bool en = msg_cmd->is_kill ? true : false;
@@ -398,6 +442,13 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
 				    HCLGE_TQPS_RSS_INFO_LEN);
 }
 
+static int hclge_get_vf_mac_addr(struct hclge_vport *vport,
+				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, vport->vf_info.mac,
+				    ETH_ALEN);
+}
+
 static int hclge_get_vf_queue_depth(struct hclge_vport *vport,
 				    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
 				    bool gen_resp)
@@ -428,6 +479,9 @@ static int hclge_get_vf_media_type(struct hclge_vport *vport,
 static int hclge_get_link_info(struct hclge_vport *vport,
 			       struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
+#define HCLGE_VF_LINK_STATE_UP		1U
+#define HCLGE_VF_LINK_STATE_DOWN	0U
+
 	struct hclge_dev *hdev = vport->back;
 	u16 link_status;
 	u8 msg_data[8];
@@ -435,7 +489,19 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 	u16 duplex;
 
 	/* mac.link can only be 0 or 1 */
-	link_status = (u16)hdev->hw.mac.link;
+	switch (vport->vf_info.link_state) {
+	case IFLA_VF_LINK_STATE_ENABLE:
+		link_status = HCLGE_VF_LINK_STATE_UP;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		link_status = HCLGE_VF_LINK_STATE_DOWN;
+		break;
+	case IFLA_VF_LINK_STATE_AUTO:
+	default:
+		link_status = (u16)hdev->hw.mac.link;
+		break;
+	}
+
 	duplex = hdev->hw.mac.duplex;
 	memcpy(&msg_data[0], &link_status, sizeof(u16));
 	memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
@@ -489,7 +555,7 @@ static void hclge_reset_vf(struct hclge_vport *vport,
 	struct hclge_dev *hdev = vport->back;
 	int ret;
 
-	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %u!",
 		 vport->vport_id);
 
 	ret = hclge_func_reset_cmd(hdev, vport->vport_id);
@@ -524,7 +590,8 @@ static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
 	qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
 	memcpy(resp_data, &qid_in_pf, sizeof(qid_in_pf));
 
-	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data, 2);
+	return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+				    sizeof(resp_data));
 }
 
 static int hclge_get_rss_key(struct hclge_vport *vport,
@@ -614,7 +681,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 		if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) {
 			dev_warn(&hdev->pdev->dev,
-				 "dropped invalid mailbox message, code = %d\n",
+				 "dropped invalid mailbox message, code = %u\n",
 				 req->msg[0]);
 
 			/* dropping/not processing this invalid message */
@@ -749,12 +816,19 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		case HCLGE_MBX_PUSH_LINK_STATUS:
 			hclge_handle_link_change_event(hdev, req);
 			break;
+		case HCLGE_MBX_GET_MAC_ADDR:
+			ret = hclge_get_vf_mac_addr(vport, req);
+			if (ret)
+				dev_err(&hdev->pdev->dev,
+					"PF failed(%d) to get MAC for VF\n",
+					ret);
+			break;
 		case HCLGE_MBX_NCSI_ERROR:
 			hclge_handle_ncsi_error(hdev);
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,
-				"un-supported mailbox message, code = %d\n",
+				"un-supported mailbox message, code = %u\n",
 				req->msg[0]);
 			break;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index dc4dfd4602ab..696c5ae922e3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -134,7 +134,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
 			 "no phy device is connected to mdio bus\n");
 		return 0;
 	} else if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
-		dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
+		dev_err(&hdev->pdev->dev, "phy_addr(%u) is too large.\n",
 			hdev->hw.mac.phy_addr);
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 62399cc1c5a6..fbc39a2480d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -46,7 +46,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 #define DIVISOR_CLK		(1000 * 8)
 #define DIVISOR_IR_B_126	(126 * DIVISOR_CLK)
 
-	const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
+	static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
 		6 * 256,        /* Prioriy level */
 		6 * 32,         /* Prioriy group level */
 		6 * 8,          /* Port level */
@@ -511,6 +511,49 @@ static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc, u8 grp_id,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	u8 ir_b, ir_u, ir_s;
+	u32 shaper_para;
+	int ret, i;
+
+	if (!max_tx_rate)
+		max_tx_rate = HCLGE_ETHER_MAX_RATE;
+
+	ret = hclge_shaper_para_calc(max_tx_rate, HCLGE_SHAPER_LVL_QSET,
+				     &ir_b, &ir_u, &ir_s);
+	if (ret)
+		return ret;
+
+	shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s,
+						 HCLGE_SHAPER_BS_U_DEF,
+						 HCLGE_SHAPER_BS_S_DEF);
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG,
+					   false);
+
+		shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+		shap_cfg_cmd->qs_id = cpu_to_le16(vport->qs_offset + i);
+		shap_cfg_cmd->qs_shapping_para = cpu_to_le32(shaper_para);
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"vf%u, qs%u failed to set tx_rate:%d, ret=%d\n",
+				vport->vport_id, shap_cfg_cmd->qs_id,
+				max_tx_rate, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
@@ -532,7 +575,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 	/* Set to user value, no larger than max_rss_size. */
 	if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
 	    kinfo->req_rss_size <= max_rss_size) {
-		dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
+		dev_info(&hdev->pdev->dev, "rss changes from %u to %u\n",
 			 kinfo->rss_size, kinfo->req_rss_size);
 		kinfo->rss_size = kinfo->req_rss_size;
 	} else if (kinfo->rss_size > max_rss_size ||
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 260f22d19d81..45bcb67f90fd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -96,6 +96,12 @@ struct hclge_pg_shapping_cmd {
 	__le32 pg_shapping_para;
 };
 
+struct hclge_qs_shapping_cmd {
+	__le16 qs_id;
+	u8 rsvd[2];
+	__le32 qs_shapping_para;
+};
+
 #define HCLGE_BP_GRP_NUM		32
 #define HCLGE_BP_SUB_GRP_ID_S		0
 #define HCLGE_BP_SUB_GRP_ID_M		GENMASK(4, 0)
@@ -154,4 +160,6 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
 int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
 int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
+
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index d5d1cc5d1b6e..af2245e3bb95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -50,7 +50,7 @@ static int hclgevf_cmd_csq_clean(struct hclgevf_hw *hw)
 	rmb(); /* Make sure head is ready before touch any data */
 
 	if (!hclgevf_is_valid_csq_clean_head(csq, head)) {
-		dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+		dev_warn(&hdev->pdev->dev, "wrong cmd head (%u, %d-%d)\n", head,
 			 csq->next_to_use, csq->next_to_clean);
 		dev_warn(&hdev->pdev->dev,
 			 "Disabling any further commands to IMP firmware\n");
@@ -92,9 +92,9 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 	u32 reg_val;
 
 	if (ring->flag == HCLGEVF_TYPE_CSQ) {
-		reg_val = (u32)ring->desc_dma_addr;
+		reg_val = lower_32_bits(ring->desc_dma_addr);
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, reg_val);
-		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+		reg_val = upper_32_bits(ring->desc_dma_addr);
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG);
@@ -105,9 +105,9 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
 	} else {
-		reg_val = (u32)ring->desc_dma_addr;
+		reg_val = lower_32_bits(ring->desc_dma_addr);
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, reg_val);
-		reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+		reg_val = upper_32_bits(ring->desc_dma_addr);
 		hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
 
 		reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7d7e712691b9..25d78a5aaa34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1113,6 +1113,7 @@ static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
 }
 
 static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
+					bool en_uc_pmc, bool en_mc_pmc,
 					bool en_bc_pmc)
 {
 	struct hclge_mbx_vf_to_pf_cmd *req;
@@ -1120,10 +1121,11 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
 	int ret;
 
 	req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
-
 	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
 	req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE;
 	req->msg[1] = en_bc_pmc ? 1 : 0;
+	req->msg[2] = en_uc_pmc ? 1 : 0;
+	req->msg[3] = en_mc_pmc ? 1 : 0;
 
 	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
@@ -1133,9 +1135,17 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
 	return ret;
 }
 
-static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc)
+static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+				    bool en_mc_pmc)
 {
-	return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc);
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct pci_dev *pdev = hdev->pdev;
+	bool en_bc_pmc;
+
+	en_bc_pmc = pdev->revision != 0x20;
+
+	return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc,
+					    en_bc_pmc);
 }
 
 static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
@@ -1174,11 +1184,37 @@ static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
 	}
 }
 
+static int hclgevf_get_host_mac_addr(struct hclgevf_dev *hdev, u8 *p)
+{
+	u8 host_mac[ETH_ALEN];
+	int status;
+
+	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_MAC_ADDR, 0, NULL, 0,
+				      true, host_mac, ETH_ALEN);
+	if (status) {
+		dev_err(&hdev->pdev->dev,
+			"fail to get VF MAC from host %d", status);
+		return status;
+	}
+
+	ether_addr_copy(p, host_mac);
+
+	return 0;
+}
+
 static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	u8 host_mac_addr[ETH_ALEN];
 
-	ether_addr_copy(p, hdev->hw.mac.mac_addr);
+	if (hclgevf_get_host_mac_addr(hdev, host_mac_addr))
+		return;
+
+	hdev->has_pf_mac = !is_zero_ether_addr(host_mac_addr);
+	if (hdev->has_pf_mac)
+		ether_addr_copy(p, host_mac_addr);
+	else
+		ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
 static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
@@ -1275,7 +1311,7 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
 	memcpy(&msg_data[3], &proto, sizeof(proto));
 	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
 				   HCLGE_MBX_VLAN_FILTER, msg_data,
-				   HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+				   HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0);
 
 	/* when remove hw vlan filter failed, record the vlan id,
 	 * and try to remove it from hw later, to be consistence
@@ -1513,12 +1549,39 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 	return ret;
 }
 
+static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
+{
+	dev_info(&hdev->pdev->dev, "VF function reset count: %u\n",
+		 hdev->rst_stats.vf_func_rst_cnt);
+	dev_info(&hdev->pdev->dev, "FLR reset count: %u\n",
+		 hdev->rst_stats.flr_rst_cnt);
+	dev_info(&hdev->pdev->dev, "VF reset count: %u\n",
+		 hdev->rst_stats.vf_rst_cnt);
+	dev_info(&hdev->pdev->dev, "reset done count: %u\n",
+		 hdev->rst_stats.rst_done_cnt);
+	dev_info(&hdev->pdev->dev, "HW reset done count: %u\n",
+		 hdev->rst_stats.hw_rst_done_cnt);
+	dev_info(&hdev->pdev->dev, "reset count: %u\n",
+		 hdev->rst_stats.rst_cnt);
+	dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
+		 hdev->rst_stats.rst_fail_cnt);
+	dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
+		 hclgevf_read_dev(&hdev->hw, HCLGEVF_MISC_VECTOR_REG_BASE));
+	dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n",
+		 hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STAT_REG));
+	dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
+		 hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG));
+	dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
+		 hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING));
+	dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
+}
+
 static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
 {
 	/* recover handshake status with IMP when reset fail */
 	hclgevf_reset_handshake(hdev, true);
 	hdev->rst_stats.rst_fail_cnt++;
-	dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n",
+	dev_err(&hdev->pdev->dev, "failed to reset VF(%u)\n",
 		hdev->rst_stats.rst_fail_cnt);
 
 	if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
@@ -1527,6 +1590,8 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
 	if (hclgevf_is_reset_pending(hdev)) {
 		set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
 		hclgevf_reset_task_schedule(hdev);
+	} else {
+		hclgevf_dump_rst_info(hdev);
 	}
 }
 
@@ -1748,6 +1813,8 @@ static void hclgevf_service_timer(struct timer_list *t)
 
 static void hclgevf_reset_service_task(struct work_struct *work)
 {
+#define	HCLGEVF_MAX_RESET_ATTEMPTS_CNT	3
+
 	struct hclgevf_dev *hdev =
 		container_of(work, struct hclgevf_dev, rst_service_task);
 	int ret;
@@ -1800,7 +1867,7 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 		 * We cannot do much for 2. but to check first we can try reset
 		 * our PCIe + stack and see if it alleviates the problem.
 		 */
-		if (hdev->reset_attempts > 3) {
+		if (hdev->reset_attempts > HCLGEVF_MAX_RESET_ATTEMPTS_CNT) {
 			/* prepare for full reset of stack + pcie interface */
 			set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending);
 
@@ -2103,7 +2170,6 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
 		ret = hclgevf_set_rss_input_tuple(hdev, rss_cfg);
 		if (ret)
 			return ret;
-
 	}
 
 	/* Initialize RSS indirect table */
@@ -2272,7 +2338,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
 	}
 	if (vectors < hdev->num_msi)
 		dev_warn(&hdev->pdev->dev,
-			 "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+			 "requested %u MSI/MSI-X, but allocated %d MSI/MSI-X\n",
 			 hdev->num_msi, vectors);
 
 	hdev->num_msi = vectors;
@@ -2348,12 +2414,12 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev)
 
 	dev_info(dev, "VF info begin:\n");
 
-	dev_info(dev, "Task queue pairs numbers: %d\n", hdev->num_tqps);
-	dev_info(dev, "Desc num per TX queue: %d\n", hdev->num_tx_desc);
-	dev_info(dev, "Desc num per RX queue: %d\n", hdev->num_rx_desc);
-	dev_info(dev, "Numbers of vports: %d\n", hdev->num_alloc_vport);
-	dev_info(dev, "HW tc map: %d\n", hdev->hw_tc_map);
-	dev_info(dev, "PF media type of this VF: %d\n",
+	dev_info(dev, "Task queue pairs numbers: %u\n", hdev->num_tqps);
+	dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
+	dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
+	dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
+	dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
+	dev_info(dev, "PF media type of this VF: %u\n",
 		 hdev->hw.mac.media_type);
 
 	dev_info(dev, "VF info end.\n");
@@ -2648,12 +2714,6 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 		return ret;
 	}
 
-	if (pdev->revision >= 0x21) {
-		ret = hclgevf_set_promisc_mode(hdev, true);
-		if (ret)
-			return ret;
-	}
-
 	dev_info(&hdev->pdev->dev, "Reset done\n");
 
 	return 0;
@@ -2728,17 +2788,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	if (ret)
 		goto err_config;
 
-	/* vf is not allowed to enable unicast/multicast promisc mode.
-	 * For revision 0x20, default to disable broadcast promisc mode,
-	 * firmware makes sure broadcast packets can be accepted.
-	 * For revision 0x21, default to enable broadcast promisc mode.
-	 */
-	if (pdev->revision >= 0x21) {
-		ret = hclgevf_set_promisc_mode(hdev, true);
-		if (ret)
-			goto err_config;
-	}
-
 	/* Initialize RSS for this VF */
 	ret = hclgevf_rss_init_hw(hdev);
 	if (ret) {
@@ -3152,6 +3201,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_global_queue_id = hclgevf_get_qid_global,
 	.set_timer_task = hclgevf_set_timer_task,
 	.get_link_mode = hclgevf_get_link_mode,
+	.set_promisc_mode = hclgevf_set_promisc_mode,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 2b8d6bc6d224..2f4c81bf4169 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -150,8 +150,6 @@ enum hclgevf_states {
 	HCLGEVF_STATE_CMD_DISABLE,
 };
 
-#define HCLGEVF_MPF_ENBALE 1
-
 struct hclgevf_mac {
 	u8 media_type;
 	u8 module_type;
@@ -266,6 +264,7 @@ struct hclgevf_dev {
 	u16 num_tx_desc;	/* desc num of per tx queue */
 	u16 num_rx_desc;	/* desc num of per rx queue */
 	u8 hw_tc_map;
+	u8 has_pf_mac;
 
 	u16 num_msi;
 	u16 num_msi_left;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index a108191c9e50..7cbd715d5e7a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -33,7 +33,7 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
 	if (resp_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
 		dev_err(&hdev->pdev->dev,
-			"VF mbx response len(=%d) exceeds maximum(=%d)\n",
+			"VF mbx response len(=%u) exceeds maximum(=%u)\n",
 			resp_len,
 			HCLGE_MBX_MAX_RESP_DATA_SIZE);
 		return -EINVAL;
@@ -49,7 +49,7 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
 	if (i >= HCLGEVF_MAX_TRY_TIMES) {
 		dev_err(&hdev->pdev->dev,
-			"VF could not get mbx(%d,%d) resp(=%d) from PF in %d tries\n",
+			"VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
 			code0, code1, hdev->mbx_resp.received_resp, i);
 		return -EIO;
 	}
@@ -68,10 +68,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 
 	if (!(r_code0 == code0 && r_code1 == code1 && !mbx_resp->resp_status)) {
 		dev_err(&hdev->pdev->dev,
-			"VF could not match resp code(code0=%d,code1=%d), %d\n",
+			"VF could not match resp code(code0=%u,code1=%u), %d\n",
 			code0, code1, mbx_resp->resp_status);
 		dev_err(&hdev->pdev->dev,
-			"VF could not match resp r_code(r_code0=%d,r_code1=%d)\n",
+			"VF could not match resp r_code(r_code0=%u,r_code1=%u)\n",
 			r_code0, r_code1);
 		return -EIO;
 	}
@@ -168,7 +168,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 		if (unlikely(!hnae3_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B))) {
 			dev_warn(&hdev->pdev->dev,
-				 "dropped invalid mailbox message, code = %d\n",
+				 "dropped invalid mailbox message, code = %u\n",
 				 req->msg[0]);
 
 			/* dropping/not processing this invalid message */
@@ -187,7 +187,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		case HCLGE_MBX_PF_VF_RESP:
 			if (resp->received_resp)
 				dev_warn(&hdev->pdev->dev,
-					 "VF mbx resp flag not clear(%d)\n",
+					 "VF mbx resp flag not clear(%u)\n",
 					 req->msg[1]);
 			resp->received_resp = true;
 
@@ -205,6 +205,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		case HCLGE_MBX_ASSERTING_RESET:
 		case HCLGE_MBX_LINK_STAT_MODE:
 		case HCLGE_MBX_PUSH_VLAN_INFO:
+		case HCLGE_MBX_PUSH_PROMISC_INFO:
 			/* set this mbx event as pending. This is required as we
 			 * might loose interrupt event when mbx task is busy
 			 * handling. This shall be cleared when mbx task just
@@ -218,7 +219,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			if (atomic_read(&hdev->arq.count) >=
 			    HCLGE_MBX_MAX_ARQ_MSG_NUM) {
 				dev_warn(&hdev->pdev->dev,
-					 "Async Q full, dropping msg(%d)\n",
+					 "Async Q full, dropping msg(%u)\n",
 					 req->msg[1]);
 				break;
 			}
@@ -235,7 +236,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,
-				"VF received unsupported(%d) mbx msg from PF\n",
+				"VF received unsupported(%u) mbx msg from PF\n",
 				req->msg[0]);
 			break;
 		}
@@ -248,6 +249,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			  crq->next_to_use);
 }
 
+static void hclgevf_parse_promisc_info(struct hclgevf_dev *hdev,
+				       u16 promisc_info)
+{
+	if (!promisc_info)
+		dev_info(&hdev->pdev->dev,
+			 "Promisc mode is closed by host for being untrusted.\n");
+}
+
 void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 {
 	enum hnae3_reset_type reset_type;
@@ -313,9 +322,12 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 			hclgevf_update_port_base_vlan_info(hdev, state,
 							   (u8 *)vlan_info, 8);
 			break;
+		case HCLGE_MBX_PUSH_PROMISC_INFO:
+			hclgevf_parse_promisc_info(hdev, msg_q[1]);
+			break;
 		default:
 			dev_err(&hdev->pdev->dev,
-				"fetched unsupported(%d) message from arq\n",
+				"fetched unsupported(%u) message from arq\n",
 				msg_q[0]);
 			break;
 		}
diff --git a/drivers/net/ethernet/hp/Kconfig b/drivers/net/ethernet/hp/Kconfig
deleted file mode 100644
index fb395cfe6b92..000000000000
--- a/drivers/net/ethernet/hp/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# HP network device configuration
-#
-
-config NET_VENDOR_HP
-	bool "HP devices"
-	default y
-	depends on ISA || EISA || PCI
-	---help---
-	  If you have a network (Ethernet) card belonging to this class, say Y.
-
-	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about HP cards. If you say Y, you will be asked for
-	  your specific card in the following questions.
-
-if NET_VENDOR_HP
-
-config HP100
-	tristate "HP 10/100VG PCLAN (ISA, EISA, PCI) support"
-	depends on (ISA || EISA || PCI)
-	---help---
-	  If you have a network (Ethernet) card of this type, say Y here.
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called hp100.
-
-endif # NET_VENDOR_HP
diff --git a/drivers/net/ethernet/hp/Makefile b/drivers/net/ethernet/hp/Makefile
deleted file mode 100644
index 5ed723bb11e2..000000000000
--- a/drivers/net/ethernet/hp/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the HP network device drivers.
-#
-
-obj-$(CONFIG_HP100) += hp100.o
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
deleted file mode 100644
index 6ec78f5c602f..000000000000
--- a/drivers/net/ethernet/hp/hp100.c
+++ /dev/null
@@ -1,3037 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-** hp100.c
-** HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters
-**
-** $Id: hp100.c,v 1.58 2001/09/24 18:03:01 perex Exp perex $
-**
-** Based on the HP100 driver written by Jaroslav Kysela <perex@jcu.cz>
-** Extended for new busmaster capable chipsets by
-** Siegfried "Frieder" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>
-**
-** Maintained by: Jaroslav Kysela <perex@perex.cz>
-**
-** This driver has only been tested with
-** -- HP J2585B 10/100 Mbit/s PCI Busmaster
-** -- HP J2585A 10/100 Mbit/s PCI
-** -- HP J2970A 10 Mbit/s PCI Combo 10base-T/BNC
-** -- HP J2973A 10 Mbit/s PCI 10base-T
-** -- HP J2573  10/100 ISA
-** -- Compex ReadyLink ENET100-VG4  10/100 Mbit/s PCI / EISA
-** -- Compex FreedomLine 100/VG  10/100 Mbit/s ISA / EISA / PCI
-**
-** but it should also work with the other CASCADE based adapters.
-**
-** TODO:
-**       -  J2573 seems to hang sometimes when in shared memory mode.
-**       -  Mode for Priority TX
-**       -  Check PCI registers, performance might be improved?
-**       -  To reduce interrupt load in busmaster, one could switch off
-**          the interrupts that are used to refill the queues whenever the
-**          queues are filled up to more than a certain threshold.
-**       -  some updates for EISA version of card
-**
-**
-**
-** 1.57c -> 1.58
-**   - used indent to change coding-style
-**   - added KTI DP-200 EISA ID
-**   - ioremap is also used for low (<1MB) memory (multi-architecture support)
-**
-** 1.57b -> 1.57c - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-**   - release resources on failure in init_module
-**
-** 1.57 -> 1.57b - Jean II
-**   - fix spinlocks, SMP is now working !
-**
-** 1.56 -> 1.57
-**   - updates for new PCI interface for 2.1 kernels
-**
-** 1.55 -> 1.56
-**   - removed printk in misc. interrupt and update statistics to allow
-**     monitoring of card status
-**   - timing changes in xmit routines, relogin to 100VG hub added when
-**     driver does reset
-**   - included fix for Compex FreedomLine PCI adapter
-**
-** 1.54 -> 1.55
-**   - fixed bad initialization in init_module
-**   - added Compex FreedomLine adapter
-**   - some fixes in card initialization
-**
-** 1.53 -> 1.54
-**   - added hardware multicast filter support (doesn't work)
-**   - little changes in hp100_sense_lan routine
-**     - added support for Coax and AUI (J2970)
-**   - fix for multiple cards and hp100_mode parameter (insmod)
-**   - fix for shared IRQ
-**
-** 1.52 -> 1.53
-**   - fixed bug in multicast support
-**
-*/
-
-#define HP100_DEFAULT_PRIORITY_TX 0
-
-#undef HP100_DEBUG
-#undef HP100_DEBUG_B		/* Trace  */
-#undef HP100_DEBUG_BM		/* Debug busmaster code (PDL stuff) */
-
-#undef HP100_DEBUG_TRAINING	/* Debug login-to-hub procedure */
-#undef HP100_DEBUG_TX
-#undef HP100_DEBUG_IRQ
-#undef HP100_DEBUG_RX
-
-#undef HP100_MULTICAST_FILTER	/* Need to be debugged... */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/eisa.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/spinlock.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/jiffies.h>
-
-#include <asm/io.h>
-
-#include "hp100.h"
-
-/*
- *  defines
- */
-
-#define HP100_BUS_ISA     0
-#define HP100_BUS_EISA    1
-#define HP100_BUS_PCI     2
-
-#define HP100_REGION_SIZE	0x20	/* for ioports */
-#define HP100_SIG_LEN		8	/* same as EISA_SIG_LEN */
-
-#define HP100_MAX_PACKET_SIZE	(1536+4)
-#define HP100_MIN_PACKET_SIZE	60
-
-#ifndef HP100_DEFAULT_RX_RATIO
-/* default - 75% onboard memory on the card are used for RX packets */
-#define HP100_DEFAULT_RX_RATIO	75
-#endif
-
-#ifndef HP100_DEFAULT_PRIORITY_TX
-/* default - don't enable transmit outgoing packets as priority */
-#define HP100_DEFAULT_PRIORITY_TX 0
-#endif
-
-/*
- *  structures
- */
-
-struct hp100_private {
-	spinlock_t lock;
-	char id[HP100_SIG_LEN];
-	u_short chip;
-	u_short soft_model;
-	u_int memory_size;
-	u_int virt_memory_size;
-	u_short rx_ratio;	/* 1 - 99 */
-	u_short priority_tx;	/* != 0 - priority tx */
-	u_short mode;		/* PIO, Shared Mem or Busmaster */
-	u_char bus;
-	struct pci_dev *pci_dev;
-	short mem_mapped;	/* memory mapped access */
-	void __iomem *mem_ptr_virt;	/* virtual memory mapped area, maybe NULL */
-	unsigned long mem_ptr_phys;	/* physical memory mapped area */
-	short lan_type;		/* 10Mb/s, 100Mb/s or -1 (error) */
-	int hub_status;		/* was login to hub successful? */
-	u_char mac1_mode;
-	u_char mac2_mode;
-	u_char hash_bytes[8];
-
-	/* Rings for busmaster mode: */
-	hp100_ring_t *rxrhead;	/* Head (oldest) index into rxring */
-	hp100_ring_t *rxrtail;	/* Tail (newest) index into rxring */
-	hp100_ring_t *txrhead;	/* Head (oldest) index into txring */
-	hp100_ring_t *txrtail;	/* Tail (newest) index into txring */
-
-	hp100_ring_t rxring[MAX_RX_PDL];
-	hp100_ring_t txring[MAX_TX_PDL];
-
-	u_int *page_vaddr_algn;	/* Aligned virtual address of allocated page */
-	u_long whatever_offset;	/* Offset to bus/phys/dma address */
-	int rxrcommit;		/* # Rx PDLs committed to adapter */
-	int txrcommit;		/* # Tx PDLs committed to adapter */
-};
-
-/*
- *  variables
- */
-#ifdef CONFIG_ISA
-static const char *hp100_isa_tbl[] = {
-	"HWPF150", /* HP J2573 rev A */
-	"HWP1950", /* HP J2573 */
-};
-#endif
-
-static const struct eisa_device_id hp100_eisa_tbl[] = {
-	{ "HWPF180" }, /* HP J2577 rev A */
-	{ "HWP1920" }, /* HP 27248B */
-	{ "HWP1940" }, /* HP J2577 */
-	{ "HWP1990" }, /* HP J2577 */
-	{ "CPX0301" }, /* ReadyLink ENET100-VG4 */
-	{ "CPX0401" }, /* FreedomLine 100/VG */
-	{ "" }	       /* Mandatory final entry ! */
-};
-MODULE_DEVICE_TABLE(eisa, hp100_eisa_tbl);
-
-static const struct pci_device_id hp100_pci_tbl[] = {
-	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585A, PCI_ANY_ID, PCI_ANY_ID,},
-	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2585B, PCI_ANY_ID, PCI_ANY_ID,},
-	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2970A, PCI_ANY_ID, PCI_ANY_ID,},
-	{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_J2973A, PCI_ANY_ID, PCI_ANY_ID,},
-	{PCI_VENDOR_ID_COMPEX, PCI_DEVICE_ID_COMPEX_ENET100VG4, PCI_ANY_ID, PCI_ANY_ID,},
-	{PCI_VENDOR_ID_COMPEX2, PCI_DEVICE_ID_COMPEX2_100VG, PCI_ANY_ID, PCI_ANY_ID,},
-/*	{PCI_VENDOR_ID_KTI, PCI_DEVICE_ID_KTI_DP200, PCI_ANY_ID, PCI_ANY_ID }, */
-	{}			/* Terminating entry */
-};
-MODULE_DEVICE_TABLE(pci, hp100_pci_tbl);
-
-static int hp100_rx_ratio = HP100_DEFAULT_RX_RATIO;
-static int hp100_priority_tx = HP100_DEFAULT_PRIORITY_TX;
-static int hp100_mode = 1;
-
-module_param(hp100_rx_ratio, int, 0);
-module_param(hp100_priority_tx, int, 0);
-module_param(hp100_mode, int, 0);
-
-/*
- *  prototypes
- */
-
-static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
-			struct pci_dev *pci_dev);
-
-
-static int hp100_open(struct net_device *dev);
-static int hp100_close(struct net_device *dev);
-static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
-				    struct net_device *dev);
-static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
-				       struct net_device *dev);
-static void hp100_rx(struct net_device *dev);
-static struct net_device_stats *hp100_get_stats(struct net_device *dev);
-static void hp100_misc_interrupt(struct net_device *dev);
-static void hp100_update_stats(struct net_device *dev);
-static void hp100_clear_stats(struct hp100_private *lp, int ioaddr);
-static void hp100_set_multicast_list(struct net_device *dev);
-static irqreturn_t hp100_interrupt(int irq, void *dev_id);
-static void hp100_start_interface(struct net_device *dev);
-static void hp100_stop_interface(struct net_device *dev);
-static void hp100_load_eeprom(struct net_device *dev, u_short ioaddr);
-static int hp100_sense_lan(struct net_device *dev);
-static int hp100_login_to_vg_hub(struct net_device *dev,
-				 u_short force_relogin);
-static int hp100_down_vg_link(struct net_device *dev);
-static void hp100_cascade_reset(struct net_device *dev, u_short enable);
-static void hp100_BM_shutdown(struct net_device *dev);
-static void hp100_mmuinit(struct net_device *dev);
-static void hp100_init_pdls(struct net_device *dev);
-static int hp100_init_rxpdl(struct net_device *dev,
-			    register hp100_ring_t * ringptr,
-			    register u_int * pdlptr);
-static int hp100_init_txpdl(struct net_device *dev,
-			    register hp100_ring_t * ringptr,
-			    register u_int * pdlptr);
-static void hp100_rxfill(struct net_device *dev);
-static void hp100_hwinit(struct net_device *dev);
-static void hp100_clean_txring(struct net_device *dev);
-#ifdef HP100_DEBUG
-static void hp100_RegisterDump(struct net_device *dev);
-#endif
-
-/* Conversion to new PCI API :
- * Convert an address in a kernel buffer to a bus/phys/dma address.
- * This work *only* for memory fragments part of lp->page_vaddr,
- * because it was properly DMA allocated via pci_alloc_consistent(),
- * so we just need to "retrieve" the original mapping to bus/phys/dma
- * address - Jean II */
-static inline dma_addr_t virt_to_whatever(struct net_device *dev, u32 * ptr)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-	return ((u_long) ptr) + lp->whatever_offset;
-}
-
-static inline u_int pdl_map_data(struct hp100_private *lp, void *data)
-{
-	return pci_map_single(lp->pci_dev, data,
-			      MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
-}
-
-/* TODO: This function should not really be needed in a good design... */
-static void wait(void)
-{
-	mdelay(1);
-}
-
-/*
- *  probe functions
- *  These functions should - if possible - avoid doing write operations
- *  since this could cause problems when the card is not installed.
- */
-
-/*
- * Read board id and convert to string.
- * Effectively same code as decode_eisa_sig
- */
-static const char *hp100_read_id(int ioaddr)
-{
-	int i;
-	static char str[HP100_SIG_LEN];
-	unsigned char sig[4], sum;
-        unsigned short rev;
-
-	hp100_page(ID_MAC_ADDR);
-	sum = 0;
-	for (i = 0; i < 4; i++) {
-		sig[i] = hp100_inb(BOARD_ID + i);
-		sum += sig[i];
-	}
-
-	sum += hp100_inb(BOARD_ID + i);
-	if (sum != 0xff)
-		return NULL;	/* bad checksum */
-
-        str[0] = ((sig[0] >> 2) & 0x1f) + ('A' - 1);
-        str[1] = (((sig[0] & 3) << 3) | (sig[1] >> 5)) + ('A' - 1);
-        str[2] = (sig[1] & 0x1f) + ('A' - 1);
-        rev = (sig[2] << 8) | sig[3];
-        sprintf(str + 3, "%04X", rev);
-
-	return str;
-}
-
-#ifdef CONFIG_ISA
-static __init int hp100_isa_probe1(struct net_device *dev, int ioaddr)
-{
-	const char *sig;
-	int i;
-
-	if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
-		goto err;
-
-	if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE) {
-		release_region(ioaddr, HP100_REGION_SIZE);
-		goto err;
-	}
-
-	sig = hp100_read_id(ioaddr);
-	release_region(ioaddr, HP100_REGION_SIZE);
-
-	if (sig == NULL)
-		goto err;
-
-	for (i = 0; i < ARRAY_SIZE(hp100_isa_tbl); i++) {
-		if (!strcmp(hp100_isa_tbl[i], sig))
-			break;
-
-	}
-
-	if (i < ARRAY_SIZE(hp100_isa_tbl))
-		return hp100_probe1(dev, ioaddr, HP100_BUS_ISA, NULL);
- err:
-	return -ENODEV;
-
-}
-/*
- * Probe for ISA board.
- * EISA and PCI are handled by device infrastructure.
- */
-
-static int  __init hp100_isa_probe(struct net_device *dev, int addr)
-{
-	int err = -ENODEV;
-
-	/* Probe for a specific ISA address */
-	if (addr > 0xff && addr < 0x400)
-		err = hp100_isa_probe1(dev, addr);
-
-	else if (addr != 0)
-		err = -ENXIO;
-
-	else {
-		/* Probe all ISA possible port regions */
-		for (addr = 0x100; addr < 0x400; addr += 0x20) {
-			err = hp100_isa_probe1(dev, addr);
-			if (!err)
-				break;
-		}
-	}
-	return err;
-}
-#endif /* CONFIG_ISA */
-
-#if !defined(MODULE) && defined(CONFIG_ISA)
-struct net_device * __init hp100_probe(int unit)
-{
-	struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
-	int err;
-
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4200, TRACE);
-	printk("hp100: %s: probe\n", dev->name);
-#endif
-
-	if (unit >= 0) {
-		sprintf(dev->name, "eth%d", unit);
-		netdev_boot_setup_check(dev);
-	}
-
-	err = hp100_isa_probe(dev, dev->base_addr);
-	if (err)
-		goto out;
-
-	return dev;
- out:
-	free_netdev(dev);
-	return ERR_PTR(err);
-}
-#endif /* !MODULE && CONFIG_ISA */
-
-static const struct net_device_ops hp100_bm_netdev_ops = {
-	.ndo_open		= hp100_open,
-	.ndo_stop		= hp100_close,
-	.ndo_start_xmit		= hp100_start_xmit_bm,
-	.ndo_get_stats 		= hp100_get_stats,
-	.ndo_set_rx_mode	= hp100_set_multicast_list,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static const struct net_device_ops hp100_netdev_ops = {
-	.ndo_open		= hp100_open,
-	.ndo_stop		= hp100_close,
-	.ndo_start_xmit		= hp100_start_xmit,
-	.ndo_get_stats 		= hp100_get_stats,
-	.ndo_set_rx_mode	= hp100_set_multicast_list,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
-			struct pci_dev *pci_dev)
-{
-	int i;
-	int err = -ENODEV;
-	const char *eid;
-	u_int chip;
-	u_char uc;
-	u_int memory_size = 0, virt_memory_size = 0;
-	u_short local_mode, lsw;
-	short mem_mapped;
-	unsigned long mem_ptr_phys;
-	void __iomem *mem_ptr_virt;
-	struct hp100_private *lp;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4201, TRACE);
-	printk("hp100: %s: probe1\n", dev->name);
-#endif
-
-	/* memory region for programmed i/o */
-	if (!request_region(ioaddr, HP100_REGION_SIZE, "hp100"))
-		goto out1;
-
-	if (hp100_inw(HW_ID) != HP100_HW_ID_CASCADE)
-		goto out2;
-
-	chip = hp100_inw(PAGING) & HP100_CHIPID_MASK;
-#ifdef HP100_DEBUG
-	if (chip == HP100_CHIPID_SHASTA)
-		printk("hp100: %s: Shasta Chip detected. (This is a pre 802.12 chip)\n", dev->name);
-	else if (chip == HP100_CHIPID_RAINIER)
-		printk("hp100: %s: Rainier Chip detected. (This is a pre 802.12 chip)\n", dev->name);
-	else if (chip == HP100_CHIPID_LASSEN)
-		printk("hp100: %s: Lassen Chip detected.\n", dev->name);
-	else
-		printk("hp100: %s: Warning: Unknown CASCADE chip (id=0x%.4x).\n", dev->name, chip);
-#endif
-
-	dev->base_addr = ioaddr;
-
-	eid = hp100_read_id(ioaddr);
-	if (eid == NULL) {	/* bad checksum? */
-		printk(KERN_WARNING "%s: bad ID checksum at base port 0x%x\n",
-		       __func__, ioaddr);
-		goto out2;
-	}
-
-	hp100_page(ID_MAC_ADDR);
-	for (i = uc = 0; i < 7; i++)
-		uc += hp100_inb(LAN_ADDR + i);
-	if (uc != 0xff) {
-		printk(KERN_WARNING
-		       "%s: bad lan address checksum at port 0x%x)\n",
-		       __func__, ioaddr);
-		err = -EIO;
-		goto out2;
-	}
-
-	/* Make sure, that all registers are correctly updated... */
-
-	hp100_load_eeprom(dev, ioaddr);
-	wait();
-
-	/*
-	 * Determine driver operation mode
-	 *
-	 * Use the variable "hp100_mode" upon insmod or as kernel parameter to
-	 * force driver modes:
-	 * hp100_mode=1 -> default, use busmaster mode if configured.
-	 * hp100_mode=2 -> enable shared memory mode
-	 * hp100_mode=3 -> force use of i/o mapped mode.
-	 * hp100_mode=4 -> same as 1, but re-set the enable bit on the card.
-	 */
-
-	/*
-	 * LSW values:
-	 *   0x2278 -> J2585B, PnP shared memory mode
-	 *   0x2270 -> J2585B, shared memory mode, 0xdc000
-	 *   0xa23c -> J2585B, I/O mapped mode
-	 *   0x2240 -> EISA COMPEX, BusMaster (Shasta Chip)
-	 *   0x2220 -> EISA HP, I/O (Shasta Chip)
-	 *   0x2260 -> EISA HP, BusMaster (Shasta Chip)
-	 */
-
-#if 0
-	local_mode = 0x2270;
-	hp100_outw(0xfefe, OPTION_LSW);
-	hp100_outw(local_mode | HP100_SET_LB | HP100_SET_HB, OPTION_LSW);
-#endif
-
-	/* hp100_mode value maybe used in future by another card */
-	local_mode = hp100_mode;
-	if (local_mode < 1 || local_mode > 4)
-		local_mode = 1;	/* default */
-#ifdef HP100_DEBUG
-	printk("hp100: %s: original LSW = 0x%x\n", dev->name,
-	       hp100_inw(OPTION_LSW));
-#endif
-
-	if (local_mode == 3) {
-		hp100_outw(HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
-		hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-		hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-		printk("hp100: IO mapped mode forced.\n");
-	} else if (local_mode == 2) {
-		hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-		hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-		hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-		printk("hp100: Shared memory mode requested.\n");
-	} else if (local_mode == 4) {
-		if (chip == HP100_CHIPID_LASSEN) {
-			hp100_outw(HP100_BM_WRITE | HP100_BM_READ | HP100_SET_HB, OPTION_LSW);
-			hp100_outw(HP100_IO_EN | HP100_MEM_EN | HP100_RESET_LB, OPTION_LSW);
-			printk("hp100: Busmaster mode requested.\n");
-		}
-		local_mode = 1;
-	}
-
-	if (local_mode == 1) {	/* default behaviour */
-		lsw = hp100_inw(OPTION_LSW);
-
-		if ((lsw & HP100_IO_EN) && (~lsw & HP100_MEM_EN) &&
-		    (~lsw & (HP100_BM_WRITE | HP100_BM_READ))) {
-#ifdef HP100_DEBUG
-			printk("hp100: %s: IO_EN bit is set on card.\n", dev->name);
-#endif
-			local_mode = 3;
-		} else if (chip == HP100_CHIPID_LASSEN &&
-			   (lsw & (HP100_BM_WRITE | HP100_BM_READ)) == (HP100_BM_WRITE | HP100_BM_READ)) {
-			/* Conversion to new PCI API :
-			 * I don't have the doc, but I assume that the card
-			 * can map the full 32bit address space.
-			 * Also, we can have EISA Busmaster cards (not tested),
-			 * so beware !!! - Jean II */
-			if((bus == HP100_BUS_PCI) &&
-			   (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)))) {
-				/* Gracefully fallback to shared memory */
-				goto busmasterfail;
-			}
-			printk("hp100: Busmaster mode enabled.\n");
-			hp100_outw(HP100_MEM_EN | HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
-		} else {
-		busmasterfail:
-#ifdef HP100_DEBUG
-			printk("hp100: %s: Card not configured for BM or BM not supported with this card.\n", dev->name);
-			printk("hp100: %s: Trying shared memory mode.\n", dev->name);
-#endif
-			/* In this case, try shared memory mode */
-			local_mode = 2;
-			hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-			/* hp100_outw(HP100_IO_EN|HP100_RESET_LB, OPTION_LSW); */
-		}
-	}
-#ifdef HP100_DEBUG
-	printk("hp100: %s: new LSW = 0x%x\n", dev->name, hp100_inw(OPTION_LSW));
-#endif
-
-	/* Check for shared memory on the card, eventually remap it */
-	hp100_page(HW_MAP);
-	mem_mapped = ((hp100_inw(OPTION_LSW) & (HP100_MEM_EN)) != 0);
-	mem_ptr_phys = 0UL;
-	mem_ptr_virt = NULL;
-	memory_size = (8192 << ((hp100_inb(SRAM) >> 5) & 0x07));
-	virt_memory_size = 0;
-
-	/* For memory mapped or busmaster mode, we want the memory address */
-	if (mem_mapped || (local_mode == 1)) {
-		mem_ptr_phys = (hp100_inw(MEM_MAP_LSW) | (hp100_inw(MEM_MAP_MSW) << 16));
-		mem_ptr_phys &= ~0x1fff;	/* 8k alignment */
-
-		if (bus == HP100_BUS_ISA && (mem_ptr_phys & ~0xfffff) != 0) {
-			printk("hp100: Can only use programmed i/o mode.\n");
-			mem_ptr_phys = 0;
-			mem_mapped = 0;
-			local_mode = 3;	/* Use programmed i/o */
-		}
-
-		/* We do not need access to shared memory in busmaster mode */
-		/* However in slave mode we need to remap high (>1GB) card memory  */
-		if (local_mode != 1) {	/* = not busmaster */
-			/* We try with smaller memory sizes, if ioremap fails */
-			for (virt_memory_size = memory_size; virt_memory_size > 16383; virt_memory_size >>= 1) {
-				if ((mem_ptr_virt = ioremap((u_long) mem_ptr_phys, virt_memory_size)) == NULL) {
-#ifdef HP100_DEBUG
-					printk("hp100: %s: ioremap for 0x%x bytes high PCI memory at 0x%lx failed\n", dev->name, virt_memory_size, mem_ptr_phys);
-#endif
-				} else {
-#ifdef HP100_DEBUG
-					printk("hp100: %s: remapped 0x%x bytes high PCI memory at 0x%lx to %p.\n", dev->name, virt_memory_size, mem_ptr_phys, mem_ptr_virt);
-#endif
-					break;
-				}
-			}
-
-			if (mem_ptr_virt == NULL) {	/* all ioremap tries failed */
-				printk("hp100: Failed to ioremap the PCI card memory. Will have to use i/o mapped mode.\n");
-				local_mode = 3;
-				virt_memory_size = 0;
-			}
-		}
-	}
-
-	if (local_mode == 3) {	/* io mapped forced */
-		mem_mapped = 0;
-		mem_ptr_phys = 0;
-		mem_ptr_virt = NULL;
-		printk("hp100: Using (slow) programmed i/o mode.\n");
-	}
-
-	/* Initialise the "private" data structure for this card. */
-	lp = netdev_priv(dev);
-
-	spin_lock_init(&lp->lock);
-	strlcpy(lp->id, eid, HP100_SIG_LEN);
-	lp->chip = chip;
-	lp->mode = local_mode;
-	lp->bus = bus;
-	lp->pci_dev = pci_dev;
-	lp->priority_tx = hp100_priority_tx;
-	lp->rx_ratio = hp100_rx_ratio;
-	lp->mem_ptr_phys = mem_ptr_phys;
-	lp->mem_ptr_virt = mem_ptr_virt;
-	hp100_page(ID_MAC_ADDR);
-	lp->soft_model = hp100_inb(SOFT_MODEL);
-	lp->mac1_mode = HP100_MAC1MODE3;
-	lp->mac2_mode = HP100_MAC2MODE3;
-	memset(&lp->hash_bytes, 0x00, 8);
-
-	dev->base_addr = ioaddr;
-
-	lp->memory_size = memory_size;
-	lp->virt_memory_size = virt_memory_size;
-	lp->rx_ratio = hp100_rx_ratio;	/* can be conf'd with insmod */
-
-	if (lp->mode == 1)	/* busmaster */
-		dev->netdev_ops = &hp100_bm_netdev_ops;
-	else
-		dev->netdev_ops = &hp100_netdev_ops;
-
-	/* Ask the card for which IRQ line it is configured */
-	if (bus == HP100_BUS_PCI) {
-		dev->irq = pci_dev->irq;
-	} else {
-		hp100_page(HW_MAP);
-		dev->irq = hp100_inb(IRQ_CHANNEL) & HP100_IRQMASK;
-		if (dev->irq == 2)
-			dev->irq = 9;
-	}
-
-	if (lp->mode == 1)	/* busmaster */
-		dev->dma = 4;
-
-	/* Ask the card for its MAC address and store it for later use. */
-	hp100_page(ID_MAC_ADDR);
-	for (i = uc = 0; i < 6; i++)
-		dev->dev_addr[i] = hp100_inb(LAN_ADDR + i);
-
-	/* Reset statistics (counters) */
-	hp100_clear_stats(lp, ioaddr);
-
-	/* If busmaster mode is wanted, a dma-capable memory area is needed for
-	 * the rx and tx PDLs
-	 * PCI cards can access the whole PC memory. Therefore GFP_DMA is not
-	 * needed for the allocation of the memory area.
-	 */
-
-	/* TODO: We do not need this with old cards, where PDLs are stored
-	 * in the cards shared memory area. But currently, busmaster has been
-	 * implemented/tested only with the lassen chip anyway... */
-	if (lp->mode == 1) {	/* busmaster */
-		dma_addr_t page_baddr;
-		/* Get physically continuous memory for TX & RX PDLs    */
-		/* Conversion to new PCI API :
-		 * Pages are always aligned and zeroed, no need to it ourself.
-		 * Doc says should be OK for EISA bus as well - Jean II */
-		lp->page_vaddr_algn = pci_alloc_consistent(lp->pci_dev, MAX_RINGSIZE, &page_baddr);
-		if (!lp->page_vaddr_algn) {
-			err = -ENOMEM;
-			goto out_mem_ptr;
-		}
-		lp->whatever_offset = ((u_long) page_baddr) - ((u_long) lp->page_vaddr_algn);
-
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: Reserved DMA memory from 0x%x to 0x%x\n", dev->name, (u_int) lp->page_vaddr_algn, (u_int) lp->page_vaddr_algn + MAX_RINGSIZE);
-#endif
-		lp->rxrcommit = lp->txrcommit = 0;
-		lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
-		lp->txrhead = lp->txrtail = &(lp->txring[0]);
-	}
-
-	/* Initialise the card. */
-	/* (I'm not really sure if it's a good idea to do this during probing, but
-	 * like this it's assured that the lan connection type can be sensed
-	 * correctly)
-	 */
-	hp100_hwinit(dev);
-
-	/* Try to find out which kind of LAN the card is connected to. */
-	lp->lan_type = hp100_sense_lan(dev);
-
-	/* Print out a message what about what we think we have probed. */
-	printk("hp100: at 0x%x, IRQ %d, ", ioaddr, dev->irq);
-	switch (bus) {
-	case HP100_BUS_EISA:
-		printk("EISA");
-		break;
-	case HP100_BUS_PCI:
-		printk("PCI");
-		break;
-	default:
-		printk("ISA");
-		break;
-	}
-	printk(" bus, %dk SRAM (rx/tx %d%%).\n", lp->memory_size >> 10, lp->rx_ratio);
-
-	if (lp->mode == 2) {	/* memory mapped */
-		printk("hp100: Memory area at 0x%lx-0x%lx", mem_ptr_phys,
-				(mem_ptr_phys + (mem_ptr_phys > 0x100000 ? (u_long) lp->memory_size : 16 * 1024)) - 1);
-		if (mem_ptr_virt)
-			printk(" (virtual base %p)", mem_ptr_virt);
-		printk(".\n");
-
-		/* Set for info when doing ifconfig */
-		dev->mem_start = mem_ptr_phys;
-		dev->mem_end = mem_ptr_phys + lp->memory_size;
-	}
-
-	printk("hp100: ");
-	if (lp->lan_type != HP100_LAN_ERR)
-		printk("Adapter is attached to ");
-	switch (lp->lan_type) {
-	case HP100_LAN_100:
-		printk("100Mb/s Voice Grade AnyLAN network.\n");
-		break;
-	case HP100_LAN_10:
-		printk("10Mb/s network (10baseT).\n");
-		break;
-	case HP100_LAN_COAX:
-		printk("10Mb/s network (coax).\n");
-		break;
-	default:
-		printk("Warning! Link down.\n");
-	}
-
-	err = register_netdev(dev);
-	if (err)
-		goto out3;
-
-	return 0;
-out3:
-	if (local_mode == 1)
-		pci_free_consistent(lp->pci_dev, MAX_RINGSIZE + 0x0f,
-				    lp->page_vaddr_algn,
-				    virt_to_whatever(dev, lp->page_vaddr_algn));
-out_mem_ptr:
-	if (mem_ptr_virt)
-		iounmap(mem_ptr_virt);
-out2:
-	release_region(ioaddr, HP100_REGION_SIZE);
-out1:
-	return err;
-}
-
-/* This procedure puts the card into a stable init state */
-static void hp100_hwinit(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4202, TRACE);
-	printk("hp100: %s: hwinit\n", dev->name);
-#endif
-
-	/* Initialise the card. -------------------------------------------- */
-
-	/* Clear all pending Ints and disable Ints */
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* clear all pending ints */
-
-	hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-	hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
-
-	if (lp->mode == 1) {
-		hp100_BM_shutdown(dev);	/* disables BM, puts cascade in reset */
-		wait();
-	} else {
-		hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-		hp100_cascade_reset(dev, 1);
-		hp100_page(MAC_CTRL);
-		hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
-	}
-
-	/* Initiate EEPROM reload */
-	hp100_load_eeprom(dev, 0);
-
-	wait();
-
-	/* Go into reset again. */
-	hp100_cascade_reset(dev, 1);
-
-	/* Set Option Registers to a safe state  */
-	hp100_outw(HP100_DEBUG_EN |
-		   HP100_RX_HDR |
-		   HP100_EE_EN |
-		   HP100_BM_WRITE |
-		   HP100_BM_READ | HP100_RESET_HB |
-		   HP100_FAKE_INT |
-		   HP100_INT_EN |
-		   HP100_MEM_EN |
-		   HP100_IO_EN | HP100_RESET_LB, OPTION_LSW);
-
-	hp100_outw(HP100_TRI_INT |
-		   HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
-
-	hp100_outb(HP100_PRIORITY_TX |
-		   HP100_ADV_NXT_PKT |
-		   HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
-
-	/* TODO: Configure MMU for Ram Test. */
-	/* TODO: Ram Test. */
-
-	/* Re-check if adapter is still at same i/o location      */
-	/* (If the base i/o in eeprom has been changed but the    */
-	/* registers had not been changed, a reload of the eeprom */
-	/* would move the adapter to the address stored in eeprom */
-
-	/* TODO: Code to implement. */
-
-	/* Until here it was code from HWdiscover procedure. */
-	/* Next comes code from mmuinit procedure of SCO BM driver which is
-	 * called from HWconfigure in the SCO driver.  */
-
-	/* Initialise MMU, eventually switch on Busmaster Mode, initialise
-	 * multicast filter...
-	 */
-	hp100_mmuinit(dev);
-
-	/* We don't turn the interrupts on here - this is done by start_interface. */
-	wait();			/* TODO: Do we really need this? */
-
-	/* Enable Hardware (e.g. unreset) */
-	hp100_cascade_reset(dev, 0);
-
-	/* ------- initialisation complete ----------- */
-
-	/* Finally try to log in the Hub if there may be a VG connection. */
-	if ((lp->lan_type == HP100_LAN_100) || (lp->lan_type == HP100_LAN_ERR))
-		hp100_login_to_vg_hub(dev, 0);	/* relogin */
-
-}
-
-
-/*
- * mmuinit - Reinitialise Cascade MMU and MAC settings.
- * Note: Must already be in reset and leaves card in reset.
- */
-static void hp100_mmuinit(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	int i;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4203, TRACE);
-	printk("hp100: %s: mmuinit\n", dev->name);
-#endif
-
-#ifdef HP100_DEBUG
-	if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
-		printk("hp100: %s: Not in reset when entering mmuinit. Fix me.\n", dev->name);
-		return;
-	}
-#endif
-
-	/* Make sure IRQs are masked off and ack'ed. */
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* ack IRQ */
-
-	/*
-	 * Enable Hardware
-	 * - Clear Debug En, Rx Hdr Pipe, EE En, I/O En, Fake Int and Intr En
-	 * - Set Tri-State Int, Bus Master Rd/Wr, and Mem Map Disable
-	 * - Clear Priority, Advance Pkt and Xmit Cmd
-	 */
-
-	hp100_outw(HP100_DEBUG_EN |
-		   HP100_RX_HDR |
-		   HP100_EE_EN | HP100_RESET_HB |
-		   HP100_IO_EN |
-		   HP100_FAKE_INT |
-		   HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-
-	hp100_outw(HP100_TRI_INT | HP100_SET_HB, OPTION_LSW);
-
-	if (lp->mode == 1) {	/* busmaster */
-		hp100_outw(HP100_BM_WRITE |
-			   HP100_BM_READ |
-			   HP100_MMAP_DIS | HP100_SET_HB, OPTION_LSW);
-	} else if (lp->mode == 2) {	/* memory mapped */
-		hp100_outw(HP100_BM_WRITE |
-			   HP100_BM_READ | HP100_RESET_HB, OPTION_LSW);
-		hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
-		hp100_outw(HP100_MEM_EN | HP100_SET_LB, OPTION_LSW);
-		hp100_outw(HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-	} else if (lp->mode == 3) {	/* i/o mapped mode */
-		hp100_outw(HP100_MMAP_DIS | HP100_SET_HB |
-			   HP100_IO_EN | HP100_SET_LB, OPTION_LSW);
-	}
-
-	hp100_page(HW_MAP);
-	hp100_outb(0, EARLYRXCFG);
-	hp100_outw(0, EARLYTXCFG);
-
-	/*
-	 * Enable Bus Master mode
-	 */
-	if (lp->mode == 1) {	/* busmaster */
-		/* Experimental: Set some PCI configuration bits */
-		hp100_page(HW_MAP);
-		hp100_andb(~HP100_PDL_USE3, MODECTRL1);	/* BM engine read maximum */
-		hp100_andb(~HP100_TX_DUALQ, MODECTRL1);	/* No Queue for Priority TX */
-
-		/* PCI Bus failures should result in a Misc. Interrupt */
-		hp100_orb(HP100_EN_BUS_FAIL, MODECTRL2);
-
-		hp100_outw(HP100_BM_READ | HP100_BM_WRITE | HP100_SET_HB, OPTION_LSW);
-		hp100_page(HW_MAP);
-		/* Use Burst Mode and switch on PAGE_CK */
-		hp100_orb(HP100_BM_BURST_RD | HP100_BM_BURST_WR, BM);
-		if ((lp->chip == HP100_CHIPID_RAINIER) || (lp->chip == HP100_CHIPID_SHASTA))
-			hp100_orb(HP100_BM_PAGE_CK, BM);
-		hp100_orb(HP100_BM_MASTER, BM);
-	} else {		/* not busmaster */
-
-		hp100_page(HW_MAP);
-		hp100_andb(~HP100_BM_MASTER, BM);
-	}
-
-	/*
-	 * Divide card memory into regions for Rx, Tx and, if non-ETR chip, PDLs
-	 */
-	hp100_page(MMU_CFG);
-	if (lp->mode == 1) {	/* only needed for Busmaster */
-		int xmit_stop, recv_stop;
-
-		if ((lp->chip == HP100_CHIPID_RAINIER) ||
-		    (lp->chip == HP100_CHIPID_SHASTA)) {
-			int pdl_stop;
-
-			/*
-			 * Each pdl is 508 bytes long. (63 frags * 4 bytes for address and
-			 * 4 bytes for header). We will leave NUM_RXPDLS * 508 (rounded
-			 * to the next higher 1k boundary) bytes for the rx-pdl's
-			 * Note: For non-etr chips the transmit stop register must be
-			 * programmed on a 1k boundary, i.e. bits 9:0 must be zero.
-			 */
-			pdl_stop = lp->memory_size;
-			xmit_stop = (pdl_stop - 508 * (MAX_RX_PDL) - 16) & ~(0x03ff);
-			recv_stop = (xmit_stop * (lp->rx_ratio) / 100) & ~(0x03ff);
-			hp100_outw((pdl_stop >> 4) - 1, PDL_MEM_STOP);
-#ifdef HP100_DEBUG_BM
-			printk("hp100: %s: PDL_STOP = 0x%x\n", dev->name, pdl_stop);
-#endif
-		} else {
-			/* ETR chip (Lassen) in busmaster mode */
-			xmit_stop = (lp->memory_size) - 1;
-			recv_stop = ((lp->memory_size * lp->rx_ratio) / 100) & ~(0x03ff);
-		}
-
-		hp100_outw(xmit_stop >> 4, TX_MEM_STOP);
-		hp100_outw(recv_stop >> 4, RX_MEM_STOP);
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: TX_STOP  = 0x%x\n", dev->name, xmit_stop >> 4);
-		printk("hp100: %s: RX_STOP  = 0x%x\n", dev->name, recv_stop >> 4);
-#endif
-	} else {
-		/* Slave modes (memory mapped and programmed io)  */
-		hp100_outw((((lp->memory_size * lp->rx_ratio) / 100) >> 4), RX_MEM_STOP);
-		hp100_outw(((lp->memory_size - 1) >> 4), TX_MEM_STOP);
-#ifdef HP100_DEBUG
-		printk("hp100: %s: TX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(TX_MEM_STOP));
-		printk("hp100: %s: RX_MEM_STOP: 0x%x\n", dev->name, hp100_inw(RX_MEM_STOP));
-#endif
-	}
-
-	/* Write MAC address into page 1 */
-	hp100_page(MAC_ADDRESS);
-	for (i = 0; i < 6; i++)
-		hp100_outb(dev->dev_addr[i], MAC_ADDR + i);
-
-	/* Zero the multicast hash registers */
-	for (i = 0; i < 8; i++)
-		hp100_outb(0x0, HASH_BYTE0 + i);
-
-	/* Set up MAC defaults */
-	hp100_page(MAC_CTRL);
-
-	/* Go to LAN Page and zero all filter bits */
-	/* Zero accept error, accept multicast, accept broadcast and accept */
-	/* all directed packet bits */
-	hp100_andb(~(HP100_RX_EN |
-		     HP100_TX_EN |
-		     HP100_ACC_ERRORED |
-		     HP100_ACC_MC |
-		     HP100_ACC_BC | HP100_ACC_PHY), MAC_CFG_1);
-
-	hp100_outb(0x00, MAC_CFG_2);
-
-	/* Zero the frame format bit. This works around a training bug in the */
-	/* new hubs. */
-	hp100_outb(0x00, VG_LAN_CFG_2);	/* (use 802.3) */
-
-	if (lp->priority_tx)
-		hp100_outb(HP100_PRIORITY_TX | HP100_SET_LB, OPTION_MSW);
-	else
-		hp100_outb(HP100_PRIORITY_TX | HP100_RESET_LB, OPTION_MSW);
-
-	hp100_outb(HP100_ADV_NXT_PKT |
-		   HP100_TX_CMD | HP100_RESET_LB, OPTION_MSW);
-
-	/* If busmaster, initialize the PDLs */
-	if (lp->mode == 1)
-		hp100_init_pdls(dev);
-
-	/* Go to performance page and initialize isr and imr registers */
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* ack IRQ */
-}
-
-/*
- *  open/close functions
- */
-
-static int hp100_open(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-#ifdef HP100_DEBUG_B
-	int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4204, TRACE);
-	printk("hp100: %s: open\n", dev->name);
-#endif
-
-	/* New: if bus is PCI or EISA, interrupts might be shared interrupts */
-	if (request_irq(dev->irq, hp100_interrupt,
-			lp->bus == HP100_BUS_PCI || lp->bus ==
-			HP100_BUS_EISA ? IRQF_SHARED : 0,
-			dev->name, dev)) {
-		printk("hp100: %s: unable to get IRQ %d\n", dev->name, dev->irq);
-		return -EAGAIN;
-	}
-
-	netif_trans_update(dev); /* prevent tx timeout */
-	netif_start_queue(dev);
-
-	lp->lan_type = hp100_sense_lan(dev);
-	lp->mac1_mode = HP100_MAC1MODE3;
-	lp->mac2_mode = HP100_MAC2MODE3;
-	memset(&lp->hash_bytes, 0x00, 8);
-
-	hp100_stop_interface(dev);
-
-	hp100_hwinit(dev);
-
-	hp100_start_interface(dev);	/* sets mac modes, enables interrupts */
-
-	return 0;
-}
-
-/* The close function is called when the interface is to be brought down */
-static int hp100_close(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4205, TRACE);
-	printk("hp100: %s: close\n", dev->name);
-#endif
-
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all IRQs */
-
-	hp100_stop_interface(dev);
-
-	if (lp->lan_type == HP100_LAN_100)
-		lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-
-	netif_stop_queue(dev);
-
-	free_irq(dev->irq, dev);
-
-#ifdef HP100_DEBUG
-	printk("hp100: %s: close LSW = 0x%x\n", dev->name,
-	       hp100_inw(OPTION_LSW));
-#endif
-
-	return 0;
-}
-
-
-/*
- * Configure the PDL Rx rings and LAN
- */
-static void hp100_init_pdls(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-	hp100_ring_t *ringptr;
-	u_int *pageptr;		/* Warning : increment by 4 - Jean II */
-	int i;
-
-#ifdef HP100_DEBUG_B
-	int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4206, TRACE);
-	printk("hp100: %s: init pdls\n", dev->name);
-#endif
-
-	if (!lp->page_vaddr_algn)
-		printk("hp100: %s: Warning: lp->page_vaddr_algn not initialised!\n", dev->name);
-	else {
-		/* pageptr shall point into the DMA accessible memory region  */
-		/* we use this pointer to status the upper limit of allocated */
-		/* memory in the allocated page. */
-		/* note: align the pointers to the pci cache line size */
-		memset(lp->page_vaddr_algn, 0, MAX_RINGSIZE);	/* Zero  Rx/Tx ring page */
-		pageptr = lp->page_vaddr_algn;
-
-		lp->rxrcommit = 0;
-		ringptr = lp->rxrhead = lp->rxrtail = &(lp->rxring[0]);
-
-		/* Initialise Rx Ring */
-		for (i = MAX_RX_PDL - 1; i >= 0; i--) {
-			lp->rxring[i].next = ringptr;
-			ringptr = &(lp->rxring[i]);
-			pageptr += hp100_init_rxpdl(dev, ringptr, pageptr);
-		}
-
-		/* Initialise Tx Ring */
-		lp->txrcommit = 0;
-		ringptr = lp->txrhead = lp->txrtail = &(lp->txring[0]);
-		for (i = MAX_TX_PDL - 1; i >= 0; i--) {
-			lp->txring[i].next = ringptr;
-			ringptr = &(lp->txring[i]);
-			pageptr += hp100_init_txpdl(dev, ringptr, pageptr);
-		}
-	}
-}
-
-
-/* These functions "format" the entries in the pdl structure   */
-/* They return how much memory the fragments need.            */
-static int hp100_init_rxpdl(struct net_device *dev,
-			    register hp100_ring_t * ringptr,
-			    register u32 * pdlptr)
-{
-	/* pdlptr is starting address for this pdl */
-
-	if (0 != (((unsigned long) pdlptr) & 0xf))
-		printk("hp100: %s: Init rxpdl: Unaligned pdlptr 0x%lx.\n",
-		       dev->name, (unsigned long) pdlptr);
-
-	ringptr->pdl = pdlptr + 1;
-	ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr + 1);
-	ringptr->skb = NULL;
-
-	/*
-	 * Write address and length of first PDL Fragment (which is used for
-	 * storing the RX-Header
-	 * We use the 4 bytes _before_ the PDH in the pdl memory area to
-	 * store this information. (PDH is at offset 0x04)
-	 */
-	/* Note that pdlptr+1 and not pdlptr is the pointer to the PDH */
-
-	*(pdlptr + 2) = (u_int) virt_to_whatever(dev, pdlptr);	/* Address Frag 1 */
-	*(pdlptr + 3) = 4;	/* Length  Frag 1 */
-
-	return roundup(MAX_RX_FRAG * 2 + 2, 4);
-}
-
-
-static int hp100_init_txpdl(struct net_device *dev,
-			    register hp100_ring_t * ringptr,
-			    register u32 * pdlptr)
-{
-	if (0 != (((unsigned long) pdlptr) & 0xf))
-		printk("hp100: %s: Init txpdl: Unaligned pdlptr 0x%lx.\n", dev->name, (unsigned long) pdlptr);
-
-	ringptr->pdl = pdlptr;	/* +1; */
-	ringptr->pdl_paddr = virt_to_whatever(dev, pdlptr);	/* +1 */
-	ringptr->skb = NULL;
-
-	return roundup(MAX_TX_FRAG * 2 + 2, 4);
-}
-
-/*
- * hp100_build_rx_pdl allocates an skb_buff of maximum size plus two bytes
- * for possible odd word alignment rounding up to next dword and set PDL
- * address for fragment#2
- * Returns: 0 if unable to allocate skb_buff
- *          1 if successful
- */
-static int hp100_build_rx_pdl(hp100_ring_t * ringptr,
-			      struct net_device *dev)
-{
-#ifdef HP100_DEBUG_B
-	int ioaddr = dev->base_addr;
-#endif
-#ifdef HP100_DEBUG_BM
-	u_int *p;
-#endif
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4207, TRACE);
-	printk("hp100: %s: build rx pdl\n", dev->name);
-#endif
-
-	/* Allocate skb buffer of maximum size */
-	/* Note: This depends on the alloc_skb functions allocating more
-	 * space than requested, i.e. aligning to 16bytes */
-
-	ringptr->skb = netdev_alloc_skb(dev, roundup(MAX_ETHER_SIZE + 2, 4));
-
-	if (NULL != ringptr->skb) {
-		/*
-		 * Reserve 2 bytes at the head of the buffer to land the IP header
-		 * on a long word boundary (According to the Network Driver section
-		 * in the Linux KHG, this should help to increase performance.)
-		 */
-		skb_reserve(ringptr->skb, 2);
-
-		ringptr->skb->data = skb_put(ringptr->skb, MAX_ETHER_SIZE);
-
-		/* ringptr->pdl points to the beginning of the PDL, i.e. the PDH */
-		/* Note: 1st Fragment is used for the 4 byte packet status
-		 * (receive header). Its PDL entries are set up by init_rxpdl. So
-		 * here we only have to set up the PDL fragment entries for the data
-		 * part. Those 4 bytes will be stored in the DMA memory region
-		 * directly before the PDL.
-		 */
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: build_rx_pdl: PDH@0x%x, skb->data (len %d) at 0x%x\n",
-				     dev->name, (u_int) ringptr->pdl,
-				     roundup(MAX_ETHER_SIZE + 2, 4),
-				     (unsigned int) ringptr->skb->data);
-#endif
-
-		/* Conversion to new PCI API : map skbuf data to PCI bus.
-		 * Doc says it's OK for EISA as well - Jean II */
-		ringptr->pdl[0] = 0x00020000;	/* Write PDH */
-		ringptr->pdl[3] = pdl_map_data(netdev_priv(dev),
-					       ringptr->skb->data);
-		ringptr->pdl[4] = MAX_ETHER_SIZE;	/* Length of Data */
-
-#ifdef HP100_DEBUG_BM
-		for (p = (ringptr->pdl); p < (ringptr->pdl + 5); p++)
-			printk("hp100: %s: Adr 0x%.8x = 0x%.8x\n", dev->name, (u_int) p, (u_int) * p);
-#endif
-		return 1;
-	}
-	/* else: */
-	/* alloc_skb failed (no memory) -> still can receive the header
-	 * fragment into PDL memory. make PDL safe by clearing msgptr and
-	 * making the PDL only 1 fragment (i.e. the 4 byte packet status)
-	 */
-#ifdef HP100_DEBUG_BM
-	printk("hp100: %s: build_rx_pdl: PDH@0x%x, No space for skb.\n", dev->name, (u_int) ringptr->pdl);
-#endif
-
-	ringptr->pdl[0] = 0x00010000;	/* PDH: Count=1 Fragment */
-
-	return 0;
-}
-
-/*
- *  hp100_rxfill - attempt to fill the Rx Ring will empty skb's
- *
- * Makes assumption that skb's are always contiguous memory areas and
- * therefore PDLs contain only 2 physical fragments.
- * -  While the number of Rx PDLs with buffers is less than maximum
- *      a.  Get a maximum packet size skb
- *      b.  Put the physical address of the buffer into the PDL.
- *      c.  Output physical address of PDL to adapter.
- */
-static void hp100_rxfill(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-
-	struct hp100_private *lp = netdev_priv(dev);
-	hp100_ring_t *ringptr;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4208, TRACE);
-	printk("hp100: %s: rxfill\n", dev->name);
-#endif
-
-	hp100_page(PERFORMANCE);
-
-	while (lp->rxrcommit < MAX_RX_PDL) {
-		/*
-		   ** Attempt to get a buffer and build a Rx PDL.
-		 */
-		ringptr = lp->rxrtail;
-		if (0 == hp100_build_rx_pdl(ringptr, dev)) {
-			return;	/* None available, return */
-		}
-
-		/* Hand this PDL over to the card */
-		/* Note: This needs performance page selected! */
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: rxfill: Hand to card: pdl #%d @0x%x phys:0x%x, buffer: 0x%x\n",
-				     dev->name, lp->rxrcommit, (u_int) ringptr->pdl,
-				     (u_int) ringptr->pdl_paddr, (u_int) ringptr->pdl[3]);
-#endif
-
-		hp100_outl((u32) ringptr->pdl_paddr, RX_PDA);
-
-		lp->rxrcommit += 1;
-		lp->rxrtail = ringptr->next;
-	}
-}
-
-/*
- * BM_shutdown - shutdown bus mastering and leave chip in reset state
- */
-
-static void hp100_BM_shutdown(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	unsigned long time;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4209, TRACE);
-	printk("hp100: %s: bm shutdown\n", dev->name);
-#endif
-
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* Ack all ints */
-
-	/* Ensure Interrupts are off */
-	hp100_outw(HP100_INT_EN | HP100_RESET_LB, OPTION_LSW);
-
-	/* Disable all MAC activity */
-	hp100_page(MAC_CTRL);
-	hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);	/* stop rx/tx */
-
-	/* If cascade MMU is not already in reset */
-	if (0 != (hp100_inw(OPTION_LSW) & HP100_HW_RST)) {
-		/* Wait 1.3ms (10Mb max packet time) to ensure MAC is idle so
-		 * MMU pointers will not be reset out from underneath
-		 */
-		hp100_page(MAC_CTRL);
-		for (time = 0; time < 5000; time++) {
-			if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE))
-				break;
-		}
-
-		/* Shutdown algorithm depends on the generation of Cascade */
-		if (lp->chip == HP100_CHIPID_LASSEN) {	/* ETR shutdown/reset */
-			/* Disable Busmaster mode and wait for bit to go to zero. */
-			hp100_page(HW_MAP);
-			hp100_andb(~HP100_BM_MASTER, BM);
-			/* 100 ms timeout */
-			for (time = 0; time < 32000; time++) {
-				if (0 == (hp100_inb(BM) & HP100_BM_MASTER))
-					break;
-			}
-		} else {	/* Shasta or Rainier Shutdown/Reset */
-			/* To ensure all bus master inloading activity has ceased,
-			 * wait for no Rx PDAs or no Rx packets on card.
-			 */
-			hp100_page(PERFORMANCE);
-			/* 100 ms timeout */
-			for (time = 0; time < 10000; time++) {
-				/* RX_PDL: PDLs not executed. */
-				/* RX_PKT_CNT: RX'd packets on card. */
-				if ((hp100_inb(RX_PDL) == 0) && (hp100_inb(RX_PKT_CNT) == 0))
-					break;
-			}
-
-			if (time >= 10000)
-				printk("hp100: %s: BM shutdown error.\n", dev->name);
-
-			/* To ensure all bus master outloading activity has ceased,
-			 * wait until the Tx PDA count goes to zero or no more Tx space
-			 * available in the Tx region of the card.
-			 */
-			/* 100 ms timeout */
-			for (time = 0; time < 10000; time++) {
-				if ((0 == hp100_inb(TX_PKT_CNT)) &&
-				    (0 != (hp100_inb(TX_MEM_FREE) & HP100_AUTO_COMPARE)))
-					break;
-			}
-
-			/* Disable Busmaster mode */
-			hp100_page(HW_MAP);
-			hp100_andb(~HP100_BM_MASTER, BM);
-		}	/* end of shutdown procedure for non-etr parts */
-
-		hp100_cascade_reset(dev, 1);
-	}
-	hp100_page(PERFORMANCE);
-	/* hp100_outw( HP100_BM_READ | HP100_BM_WRITE | HP100_RESET_HB, OPTION_LSW ); */
-	/* Busmaster mode should be shut down now. */
-}
-
-static int hp100_check_lan(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-
-	if (lp->lan_type < 0) {	/* no LAN type detected yet? */
-		hp100_stop_interface(dev);
-		if ((lp->lan_type = hp100_sense_lan(dev)) < 0) {
-			printk("hp100: %s: no connection found - check wire\n", dev->name);
-			hp100_start_interface(dev);	/* 10Mb/s RX packets maybe handled */
-			return -EIO;
-		}
-		if (lp->lan_type == HP100_LAN_100)
-			lp->hub_status = hp100_login_to_vg_hub(dev, 0);	/* relogin */
-		hp100_start_interface(dev);
-	}
-	return 0;
-}
-
-/*
- *  transmit functions
- */
-
-/* tx function for busmaster mode */
-static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
-				       struct net_device *dev)
-{
-	unsigned long flags;
-	int i, ok_flag;
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	hp100_ring_t *ringptr;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4210, TRACE);
-	printk("hp100: %s: start_xmit_bm\n", dev->name);
-#endif
-	if (skb->len <= 0)
-		goto drop;
-
-	if (lp->chip == HP100_CHIPID_SHASTA && skb_padto(skb, ETH_ZLEN))
-		return NETDEV_TX_OK;
-
-	/* Get Tx ring tail pointer */
-	if (lp->txrtail->next == lp->txrhead) {
-		/* No memory. */
-#ifdef HP100_DEBUG
-		printk("hp100: %s: start_xmit_bm: No TX PDL available.\n", dev->name);
-#endif
-		/* not waited long enough since last tx? */
-		if (time_before(jiffies, dev_trans_start(dev) + HZ))
-			goto drop;
-
-		if (hp100_check_lan(dev))
-			goto drop;
-
-		if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
-			/* we have a 100Mb/s adapter but it isn't connected to hub */
-			printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
-			hp100_stop_interface(dev);
-			lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-			hp100_start_interface(dev);
-		} else {
-			spin_lock_irqsave(&lp->lock, flags);
-			hp100_ints_off();	/* Useful ? Jean II */
-			i = hp100_sense_lan(dev);
-			hp100_ints_on();
-			spin_unlock_irqrestore(&lp->lock, flags);
-			if (i == HP100_LAN_ERR)
-				printk("hp100: %s: link down detected\n", dev->name);
-			else if (lp->lan_type != i) {	/* cable change! */
-				/* it's very hard - all network settings must be changed!!! */
-				printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
-				lp->lan_type = i;
-				hp100_stop_interface(dev);
-				if (lp->lan_type == HP100_LAN_100)
-					lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-				hp100_start_interface(dev);
-			} else {
-				printk("hp100: %s: interface reset\n", dev->name);
-				hp100_stop_interface(dev);
-				if (lp->lan_type == HP100_LAN_100)
-					lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-				hp100_start_interface(dev);
-			}
-		}
-
-		goto drop;
-	}
-
-	/*
-	 * we have to turn int's off before modifying this, otherwise
-	 * a tx_pdl_cleanup could occur at the same time
-	 */
-	spin_lock_irqsave(&lp->lock, flags);
-	ringptr = lp->txrtail;
-	lp->txrtail = ringptr->next;
-
-	/* Check whether packet has minimal packet size */
-	ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
-	i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
-
-	ringptr->skb = skb;
-	ringptr->pdl[0] = ((1 << 16) | i);	/* PDH: 1 Fragment & length */
-	if (lp->chip == HP100_CHIPID_SHASTA) {
-		/* TODO:Could someone who has the EISA card please check if this works? */
-		ringptr->pdl[2] = i;
-	} else {		/* Lassen */
-		/* In the PDL, don't use the padded size but the real packet size: */
-		ringptr->pdl[2] = skb->len;	/* 1st Frag: Length of frag */
-	}
-	/* Conversion to new PCI API : map skbuf data to PCI bus.
-	 * Doc says it's OK for EISA as well - Jean II */
-	ringptr->pdl[1] = ((u32) pci_map_single(lp->pci_dev, skb->data, ringptr->pdl[2], PCI_DMA_TODEVICE));	/* 1st Frag: Adr. of data */
-
-	/* Hand this PDL to the card. */
-	hp100_outl(ringptr->pdl_paddr, TX_PDA_L);	/* Low Prio. Queue */
-
-	lp->txrcommit++;
-
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-
-	spin_unlock_irqrestore(&lp->lock, flags);
-
-	return NETDEV_TX_OK;
-
-drop:
-	dev_kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-
-
-/* clean_txring checks if packets have been sent by the card by reading
- * the TX_PDL register from the performance page and comparing it to the
- * number of committed packets. It then frees the skb's of the packets that
- * obviously have been sent to the network.
- *
- * Needs the PERFORMANCE page selected.
- */
-static void hp100_clean_txring(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-	int ioaddr = dev->base_addr;
-	int donecount;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4211, TRACE);
-	printk("hp100: %s: clean txring\n", dev->name);
-#endif
-
-	/* How many PDLs have been transmitted? */
-	donecount = (lp->txrcommit) - hp100_inb(TX_PDL);
-
-#ifdef HP100_DEBUG
-	if (donecount > MAX_TX_PDL)
-		printk("hp100: %s: Warning: More PDLs transmitted than committed to card???\n", dev->name);
-#endif
-
-	for (; 0 != donecount; donecount--) {
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: Free skb: data @0x%.8x txrcommit=0x%x TXPDL=0x%x, done=0x%x\n",
-				dev->name, (u_int) lp->txrhead->skb->data,
-				lp->txrcommit, hp100_inb(TX_PDL), donecount);
-#endif
-		/* Conversion to new PCI API : NOP */
-		pci_unmap_single(lp->pci_dev, (dma_addr_t) lp->txrhead->pdl[1], lp->txrhead->pdl[2], PCI_DMA_TODEVICE);
-		dev_consume_skb_any(lp->txrhead->skb);
-		lp->txrhead->skb = NULL;
-		lp->txrhead = lp->txrhead->next;
-		lp->txrcommit--;
-	}
-}
-
-/* tx function for slave modes */
-static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
-				    struct net_device *dev)
-{
-	unsigned long flags;
-	int i, ok_flag;
-	int ioaddr = dev->base_addr;
-	u_short val;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4212, TRACE);
-	printk("hp100: %s: start_xmit\n", dev->name);
-#endif
-	if (skb->len <= 0)
-		goto drop;
-
-	if (hp100_check_lan(dev))
-		goto drop;
-
-	/* If there is not enough free memory on the card... */
-	i = hp100_inl(TX_MEM_FREE) & 0x7fffffff;
-	if (!(((i / 2) - 539) > (skb->len + 16) && (hp100_inb(TX_PKT_CNT) < 255))) {
-#ifdef HP100_DEBUG
-		printk("hp100: %s: start_xmit: tx free mem = 0x%x\n", dev->name, i);
-#endif
-		/* not waited long enough since last failed tx try? */
-		if (time_before(jiffies, dev_trans_start(dev) + HZ)) {
-#ifdef HP100_DEBUG
-			printk("hp100: %s: trans_start timing problem\n",
-			       dev->name);
-#endif
-			goto drop;
-		}
-		if (lp->lan_type == HP100_LAN_100 && lp->hub_status < 0) {
-			/* we have a 100Mb/s adapter but it isn't connected to hub */
-			printk("hp100: %s: login to 100Mb/s hub retry\n", dev->name);
-			hp100_stop_interface(dev);
-			lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-			hp100_start_interface(dev);
-		} else {
-			spin_lock_irqsave(&lp->lock, flags);
-			hp100_ints_off();	/* Useful ? Jean II */
-			i = hp100_sense_lan(dev);
-			hp100_ints_on();
-			spin_unlock_irqrestore(&lp->lock, flags);
-			if (i == HP100_LAN_ERR)
-				printk("hp100: %s: link down detected\n", dev->name);
-			else if (lp->lan_type != i) {	/* cable change! */
-				/* it's very hard - all network setting must be changed!!! */
-				printk("hp100: %s: cable change 10Mb/s <-> 100Mb/s detected\n", dev->name);
-				lp->lan_type = i;
-				hp100_stop_interface(dev);
-				if (lp->lan_type == HP100_LAN_100)
-					lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-				hp100_start_interface(dev);
-			} else {
-				printk("hp100: %s: interface reset\n", dev->name);
-				hp100_stop_interface(dev);
-				if (lp->lan_type == HP100_LAN_100)
-					lp->hub_status = hp100_login_to_vg_hub(dev, 0);
-				hp100_start_interface(dev);
-				mdelay(1);
-			}
-		}
-		goto drop;
-	}
-
-	for (i = 0; i < 6000 && (hp100_inb(OPTION_MSW) & HP100_TX_CMD); i++) {
-#ifdef HP100_DEBUG_TX
-		printk("hp100: %s: start_xmit: busy\n", dev->name);
-#endif
-	}
-
-	spin_lock_irqsave(&lp->lock, flags);
-	hp100_ints_off();
-	val = hp100_inw(IRQ_STATUS);
-	/* Ack / clear the interrupt TX_COMPLETE interrupt - this interrupt is set
-	 * when the current packet being transmitted on the wire is completed. */
-	hp100_outw(HP100_TX_COMPLETE, IRQ_STATUS);
-#ifdef HP100_DEBUG_TX
-	printk("hp100: %s: start_xmit: irq_status=0x%.4x, irqmask=0x%.4x, len=%d\n",
-			dev->name, val, hp100_inw(IRQ_MASK), (int) skb->len);
-#endif
-
-	ok_flag = skb->len >= HP100_MIN_PACKET_SIZE;
-	i = ok_flag ? skb->len : HP100_MIN_PACKET_SIZE;
-
-	hp100_outw(i, DATA32);	/* tell card the total packet length */
-	hp100_outw(i, FRAGMENT_LEN);	/* and first/only fragment length    */
-
-	if (lp->mode == 2) {	/* memory mapped */
-		/* Note: The J2585B needs alignment to 32bits here!  */
-		memcpy_toio(lp->mem_ptr_virt, skb->data, (skb->len + 3) & ~3);
-		if (!ok_flag)
-			memset_io(lp->mem_ptr_virt, 0, HP100_MIN_PACKET_SIZE - skb->len);
-	} else {		/* programmed i/o */
-		outsl(ioaddr + HP100_REG_DATA32, skb->data,
-		      (skb->len + 3) >> 2);
-		if (!ok_flag)
-			for (i = (skb->len + 3) & ~3; i < HP100_MIN_PACKET_SIZE; i += 4)
-				hp100_outl(0, DATA32);
-	}
-
-	hp100_outb(HP100_TX_CMD | HP100_SET_LB, OPTION_MSW);	/* send packet */
-
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-	hp100_ints_on();
-	spin_unlock_irqrestore(&lp->lock, flags);
-
-	dev_consume_skb_any(skb);
-
-#ifdef HP100_DEBUG_TX
-	printk("hp100: %s: start_xmit: end\n", dev->name);
-#endif
-
-	return NETDEV_TX_OK;
-
-drop:
-	dev_kfree_skb(skb);
-	return NETDEV_TX_OK;
-
-}
-
-
-/*
- * Receive Function (Non-Busmaster mode)
- * Called when an "Receive Packet" interrupt occurs, i.e. the receive
- * packet counter is non-zero.
- * For non-busmaster, this function does the whole work of transferring
- * the packet to the host memory and then up to higher layers via skb
- * and netif_rx.
- */
-
-static void hp100_rx(struct net_device *dev)
-{
-	int packets, pkt_len;
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	u_int header;
-	struct sk_buff *skb;
-
-#ifdef DEBUG_B
-	hp100_outw(0x4213, TRACE);
-	printk("hp100: %s: rx\n", dev->name);
-#endif
-
-	/* First get indication of received lan packet */
-	/* RX_PKT_CND indicates the number of packets which have been fully */
-	/* received onto the card but have not been fully transferred of the card */
-	packets = hp100_inb(RX_PKT_CNT);
-#ifdef HP100_DEBUG_RX
-	if (packets > 1)
-		printk("hp100: %s: rx: waiting packets = %d\n", dev->name, packets);
-#endif
-
-	while (packets-- > 0) {
-		/* If ADV_NXT_PKT is still set, we have to wait until the card has */
-		/* really advanced to the next packet. */
-		for (pkt_len = 0; pkt_len < 6000 && (hp100_inb(OPTION_MSW) & HP100_ADV_NXT_PKT); pkt_len++) {
-#ifdef HP100_DEBUG_RX
-			printk ("hp100: %s: rx: busy, remaining packets = %d\n", dev->name, packets);
-#endif
-		}
-
-		/* First we get the header, which contains information about the */
-		/* actual length of the received packet. */
-		if (lp->mode == 2) {	/* memory mapped mode */
-			header = readl(lp->mem_ptr_virt);
-		} else		/* programmed i/o */
-			header = hp100_inl(DATA32);
-
-		pkt_len = ((header & HP100_PKT_LEN_MASK) + 3) & ~3;
-
-#ifdef HP100_DEBUG_RX
-		printk("hp100: %s: rx: new packet - length=%d, errors=0x%x, dest=0x%x\n",
-				     dev->name, header & HP100_PKT_LEN_MASK,
-				     (header >> 16) & 0xfff8, (header >> 16) & 7);
-#endif
-
-		/* Now we allocate the skb and transfer the data into it. */
-		skb = netdev_alloc_skb(dev, pkt_len + 2);
-		if (skb == NULL) {	/* Not enough memory->drop packet */
-#ifdef HP100_DEBUG
-			printk("hp100: %s: rx: couldn't allocate a sk_buff of size %d\n",
-					     dev->name, pkt_len);
-#endif
-			dev->stats.rx_dropped++;
-		} else {	/* skb successfully allocated */
-
-			u_char *ptr;
-
-			skb_reserve(skb,2);
-
-			/* ptr to start of the sk_buff data area */
-			skb_put(skb, pkt_len);
-			ptr = skb->data;
-
-			/* Now transfer the data from the card into that area */
-			if (lp->mode == 2)
-				memcpy_fromio(ptr, lp->mem_ptr_virt,pkt_len);
-			else	/* io mapped */
-				insl(ioaddr + HP100_REG_DATA32, ptr, pkt_len >> 2);
-
-			skb->protocol = eth_type_trans(skb, dev);
-
-#ifdef HP100_DEBUG_RX
-			printk("hp100: %s: rx: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
-					dev->name, ptr[0], ptr[1], ptr[2], ptr[3],
-		 			ptr[4], ptr[5], ptr[6], ptr[7], ptr[8],
-					ptr[9], ptr[10], ptr[11]);
-#endif
-			netif_rx(skb);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += pkt_len;
-		}
-
-		/* Indicate the card that we have got the packet */
-		hp100_outb(HP100_ADV_NXT_PKT | HP100_SET_LB, OPTION_MSW);
-
-		switch (header & 0x00070000) {
-		case (HP100_MULTI_ADDR_HASH << 16):
-		case (HP100_MULTI_ADDR_NO_HASH << 16):
-			dev->stats.multicast++;
-			break;
-		}
-	}			/* end of while(there are packets) loop */
-#ifdef HP100_DEBUG_RX
-	printk("hp100_rx: %s: end\n", dev->name);
-#endif
-}
-
-/*
- * Receive Function for Busmaster Mode
- */
-static void hp100_rx_bm(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	hp100_ring_t *ptr;
-	u_int header;
-	int pkt_len;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4214, TRACE);
-	printk("hp100: %s: rx_bm\n", dev->name);
-#endif
-
-#ifdef HP100_DEBUG
-	if (0 == lp->rxrcommit) {
-		printk("hp100: %s: rx_bm called although no PDLs were committed to adapter?\n", dev->name);
-		return;
-	} else
-		/* RX_PKT_CNT states how many PDLs are currently formatted and available to
-		 * the cards BM engine */
-	if ((hp100_inw(RX_PKT_CNT) & 0x00ff) >= lp->rxrcommit) {
-		printk("hp100: %s: More packets received than committed? RX_PKT_CNT=0x%x, commit=0x%x\n",
-				     dev->name, hp100_inw(RX_PKT_CNT) & 0x00ff,
-				     lp->rxrcommit);
-		return;
-	}
-#endif
-
-	while ((lp->rxrcommit > hp100_inb(RX_PDL))) {
-		/*
-		 * The packet was received into the pdl pointed to by lp->rxrhead (
-		 * the oldest pdl in the ring
-		 */
-
-		/* First we get the header, which contains information about the */
-		/* actual length of the received packet. */
-
-		ptr = lp->rxrhead;
-
-		header = *(ptr->pdl - 1);
-		pkt_len = (header & HP100_PKT_LEN_MASK);
-
-		/* Conversion to new PCI API : NOP */
-		pci_unmap_single(lp->pci_dev, (dma_addr_t) ptr->pdl[3], MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
-
-#ifdef HP100_DEBUG_BM
-		printk("hp100: %s: rx_bm: header@0x%x=0x%x length=%d, errors=0x%x, dest=0x%x\n",
-				dev->name, (u_int) (ptr->pdl - 1), (u_int) header,
-				pkt_len, (header >> 16) & 0xfff8, (header >> 16) & 7);
-		printk("hp100: %s: RX_PDL_COUNT:0x%x TX_PDL_COUNT:0x%x, RX_PKT_CNT=0x%x PDH=0x%x, Data@0x%x len=0x%x\n",
-		   		dev->name, hp100_inb(RX_PDL), hp100_inb(TX_PDL),
-				hp100_inb(RX_PKT_CNT), (u_int) * (ptr->pdl),
-				(u_int) * (ptr->pdl + 3), (u_int) * (ptr->pdl + 4));
-#endif
-
-		if ((pkt_len >= MIN_ETHER_SIZE) &&
-		    (pkt_len <= MAX_ETHER_SIZE)) {
-			if (ptr->skb == NULL) {
-				printk("hp100: %s: rx_bm: skb null\n", dev->name);
-				/* can happen if we only allocated room for the pdh due to memory shortage. */
-				dev->stats.rx_dropped++;
-			} else {
-				skb_trim(ptr->skb, pkt_len);	/* Shorten it */
-				ptr->skb->protocol =
-				    eth_type_trans(ptr->skb, dev);
-
-				netif_rx(ptr->skb);	/* Up and away... */
-
-				dev->stats.rx_packets++;
-				dev->stats.rx_bytes += pkt_len;
-			}
-
-			switch (header & 0x00070000) {
-			case (HP100_MULTI_ADDR_HASH << 16):
-			case (HP100_MULTI_ADDR_NO_HASH << 16):
-				dev->stats.multicast++;
-				break;
-			}
-		} else {
-#ifdef HP100_DEBUG
-			printk("hp100: %s: rx_bm: Received bad packet (length=%d)\n", dev->name, pkt_len);
-#endif
-			if (ptr->skb != NULL)
-				dev_kfree_skb_any(ptr->skb);
-			dev->stats.rx_errors++;
-		}
-
-		lp->rxrhead = lp->rxrhead->next;
-
-		/* Allocate a new rx PDL (so lp->rxrcommit stays the same) */
-		if (0 == hp100_build_rx_pdl(lp->rxrtail, dev)) {
-			/* No space for skb, header can still be received. */
-#ifdef HP100_DEBUG
-			printk("hp100: %s: rx_bm: No space for new PDL.\n", dev->name);
-#endif
-			return;
-		} else {	/* successfully allocated new PDL - put it in ringlist at tail. */
-			hp100_outl((u32) lp->rxrtail->pdl_paddr, RX_PDA);
-			lp->rxrtail = lp->rxrtail->next;
-		}
-
-	}
-}
-
-/*
- *  statistics
- */
-static struct net_device_stats *hp100_get_stats(struct net_device *dev)
-{
-	unsigned long flags;
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4215, TRACE);
-#endif
-
-	spin_lock_irqsave(&lp->lock, flags);
-	hp100_ints_off();	/* Useful ? Jean II */
-	hp100_update_stats(dev);
-	hp100_ints_on();
-	spin_unlock_irqrestore(&lp->lock, flags);
-	return &(dev->stats);
-}
-
-static void hp100_update_stats(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	u_short val;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4216, TRACE);
-	printk("hp100: %s: update-stats\n", dev->name);
-#endif
-
-	/* Note: Statistics counters clear when read. */
-	hp100_page(MAC_CTRL);
-	val = hp100_inw(DROPPED) & 0x0fff;
-	dev->stats.rx_errors += val;
-	dev->stats.rx_over_errors += val;
-	val = hp100_inb(CRC);
-	dev->stats.rx_errors += val;
-	dev->stats.rx_crc_errors += val;
-	val = hp100_inb(ABORT);
-	dev->stats.tx_errors += val;
-	dev->stats.tx_aborted_errors += val;
-	hp100_page(PERFORMANCE);
-}
-
-static void hp100_misc_interrupt(struct net_device *dev)
-{
-#ifdef HP100_DEBUG_B
-	int ioaddr = dev->base_addr;
-#endif
-
-#ifdef HP100_DEBUG_B
-	int ioaddr = dev->base_addr;
-	hp100_outw(0x4216, TRACE);
-	printk("hp100: %s: misc_interrupt\n", dev->name);
-#endif
-
-	/* Note: Statistics counters clear when read. */
-	dev->stats.rx_errors++;
-	dev->stats.tx_errors++;
-}
-
-static void hp100_clear_stats(struct hp100_private *lp, int ioaddr)
-{
-	unsigned long flags;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4217, TRACE);
-	printk("hp100: %s: clear_stats\n", dev->name);
-#endif
-
-	spin_lock_irqsave(&lp->lock, flags);
-	hp100_page(MAC_CTRL);	/* get all statistics bytes */
-	hp100_inw(DROPPED);
-	hp100_inb(CRC);
-	hp100_inb(ABORT);
-	hp100_page(PERFORMANCE);
-	spin_unlock_irqrestore(&lp->lock, flags);
-}
-
-
-/*
- *  multicast setup
- */
-
-/*
- *  Set or clear the multicast filter for this adapter.
- */
-
-static void hp100_set_multicast_list(struct net_device *dev)
-{
-	unsigned long flags;
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4218, TRACE);
-	printk("hp100: %s: set_mc_list\n", dev->name);
-#endif
-
-	spin_lock_irqsave(&lp->lock, flags);
-	hp100_ints_off();
-	hp100_page(MAC_CTRL);
-	hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);	/* stop rx/tx */
-
-	if (dev->flags & IFF_PROMISC) {
-		lp->mac2_mode = HP100_MAC2MODE6;	/* promiscuous mode = get all good */
-		lp->mac1_mode = HP100_MAC1MODE6;	/* packets on the net */
-		memset(&lp->hash_bytes, 0xff, 8);
-	} else if (!netdev_mc_empty(dev) || (dev->flags & IFF_ALLMULTI)) {
-		lp->mac2_mode = HP100_MAC2MODE5;	/* multicast mode = get packets for */
-		lp->mac1_mode = HP100_MAC1MODE5;	/* me, broadcasts and all multicasts */
-#ifdef HP100_MULTICAST_FILTER	/* doesn't work!!! */
-		if (dev->flags & IFF_ALLMULTI) {
-			/* set hash filter to receive all multicast packets */
-			memset(&lp->hash_bytes, 0xff, 8);
-		} else {
-			int i, idx;
-			u_char *addrs;
-			struct netdev_hw_addr *ha;
-
-			memset(&lp->hash_bytes, 0x00, 8);
-#ifdef HP100_DEBUG
-			printk("hp100: %s: computing hash filter - mc_count = %i\n",
-			       dev->name, netdev_mc_count(dev));
-#endif
-			netdev_for_each_mc_addr(ha, dev) {
-				addrs = ha->addr;
-#ifdef HP100_DEBUG
-				printk("hp100: %s: multicast = %pM, ",
-					     dev->name, addrs);
-#endif
-				for (i = idx = 0; i < 6; i++) {
-					idx ^= *addrs++ & 0x3f;
-					printk(":%02x:", idx);
-				}
-#ifdef HP100_DEBUG
-				printk("idx = %i\n", idx);
-#endif
-				lp->hash_bytes[idx >> 3] |= (1 << (idx & 7));
-			}
-		}
-#else
-		memset(&lp->hash_bytes, 0xff, 8);
-#endif
-	} else {
-		lp->mac2_mode = HP100_MAC2MODE3;	/* normal mode = get packets for me */
-		lp->mac1_mode = HP100_MAC1MODE3;	/* and broadcasts */
-		memset(&lp->hash_bytes, 0x00, 8);
-	}
-
-	if (((hp100_inb(MAC_CFG_1) & 0x0f) != lp->mac1_mode) ||
-	    (hp100_inb(MAC_CFG_2) != lp->mac2_mode)) {
-		int i;
-
-		hp100_outb(lp->mac2_mode, MAC_CFG_2);
-		hp100_andb(HP100_MAC1MODEMASK, MAC_CFG_1);	/* clear mac1 mode bits */
-		hp100_orb(lp->mac1_mode, MAC_CFG_1);	/* and set the new mode */
-
-		hp100_page(MAC_ADDRESS);
-		for (i = 0; i < 8; i++)
-			hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
-#ifdef HP100_DEBUG
-		printk("hp100: %s: mac1 = 0x%x, mac2 = 0x%x, multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-				     dev->name, lp->mac1_mode, lp->mac2_mode,
-				     lp->hash_bytes[0], lp->hash_bytes[1],
-				     lp->hash_bytes[2], lp->hash_bytes[3],
-				     lp->hash_bytes[4], lp->hash_bytes[5],
-				     lp->hash_bytes[6], lp->hash_bytes[7]);
-#endif
-
-		if (lp->lan_type == HP100_LAN_100) {
-#ifdef HP100_DEBUG
-			printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
-#endif
-			lp->hub_status = hp100_login_to_vg_hub(dev, 1);	/* force a relogin to the hub */
-		}
-	} else {
-		int i;
-		u_char old_hash_bytes[8];
-
-		hp100_page(MAC_ADDRESS);
-		for (i = 0; i < 8; i++)
-			old_hash_bytes[i] = hp100_inb(HASH_BYTE0 + i);
-		if (memcmp(old_hash_bytes, &lp->hash_bytes, 8)) {
-			for (i = 0; i < 8; i++)
-				hp100_outb(lp->hash_bytes[i], HASH_BYTE0 + i);
-#ifdef HP100_DEBUG
-			printk("hp100: %s: multicast hash = %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-					dev->name, lp->hash_bytes[0],
-					lp->hash_bytes[1], lp->hash_bytes[2],
-					lp->hash_bytes[3], lp->hash_bytes[4],
-					lp->hash_bytes[5], lp->hash_bytes[6],
-					lp->hash_bytes[7]);
-#endif
-
-			if (lp->lan_type == HP100_LAN_100) {
-#ifdef HP100_DEBUG
-				printk("hp100: %s: 100VG MAC settings have changed - relogin.\n", dev->name);
-#endif
-				lp->hub_status = hp100_login_to_vg_hub(dev, 1);	/* force a relogin to the hub */
-			}
-		}
-	}
-
-	hp100_page(MAC_CTRL);
-	hp100_orb(HP100_RX_EN | HP100_RX_IDLE |	/* enable rx */
-		  HP100_TX_EN | HP100_TX_IDLE, MAC_CFG_1);	/* enable tx */
-
-	hp100_page(PERFORMANCE);
-	hp100_ints_on();
-	spin_unlock_irqrestore(&lp->lock, flags);
-}
-
-/*
- *  hardware interrupt handling
- */
-
-static irqreturn_t hp100_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = (struct net_device *) dev_id;
-	struct hp100_private *lp = netdev_priv(dev);
-
-	int ioaddr;
-	u_int val;
-
-	if (dev == NULL)
-		return IRQ_NONE;
-	ioaddr = dev->base_addr;
-
-	spin_lock(&lp->lock);
-
-	hp100_ints_off();
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4219, TRACE);
-#endif
-
-	/*  hp100_page( PERFORMANCE ); */
-	val = hp100_inw(IRQ_STATUS);
-#ifdef HP100_DEBUG_IRQ
-	printk("hp100: %s: mode=%x,IRQ_STAT=0x%.4x,RXPKTCNT=0x%.2x RXPDL=0x%.2x TXPKTCNT=0x%.2x TXPDL=0x%.2x\n",
-			     dev->name, lp->mode, (u_int) val, hp100_inb(RX_PKT_CNT),
-			     hp100_inb(RX_PDL), hp100_inb(TX_PKT_CNT), hp100_inb(TX_PDL));
-#endif
-
-	if (val == 0) {		/* might be a shared interrupt */
-		spin_unlock(&lp->lock);
-		hp100_ints_on();
-		return IRQ_NONE;
-	}
-	/* We're only interested in those interrupts we really enabled. */
-	/* val &= hp100_inw( IRQ_MASK ); */
-
-	/*
-	 * RX_PDL_FILL_COMPL is set whenever a RX_PDL has been executed. A RX_PDL
-	 * is considered executed whenever the RX_PDL data structure is no longer
-	 * needed.
-	 */
-	if (val & HP100_RX_PDL_FILL_COMPL) {
-		if (lp->mode == 1)
-			hp100_rx_bm(dev);
-		else {
-			printk("hp100: %s: rx_pdl_fill_compl interrupt although not busmaster?\n", dev->name);
-		}
-	}
-
-	/*
-	 * The RX_PACKET interrupt is set, when the receive packet counter is
-	 * non zero. We use this interrupt for receiving in slave mode. In
-	 * busmaster mode, we use it to make sure we did not miss any rx_pdl_fill
-	 * interrupts. If rx_pdl_fill_compl is not set and rx_packet is set, then
-	 * we somehow have missed a rx_pdl_fill_compl interrupt.
-	 */
-
-	if (val & HP100_RX_PACKET) {	/* Receive Packet Counter is non zero */
-		if (lp->mode != 1)	/* non busmaster */
-			hp100_rx(dev);
-		else if (!(val & HP100_RX_PDL_FILL_COMPL)) {
-			/* Shouldn't happen - maybe we missed a RX_PDL_FILL Interrupt?  */
-			hp100_rx_bm(dev);
-		}
-	}
-
-	/*
-	 * Ack. that we have noticed the interrupt and thereby allow next one.
-	 * Note that this is now done after the slave rx function, since first
-	 * acknowledging and then setting ADV_NXT_PKT caused an extra interrupt
-	 * on the J2573.
-	 */
-	hp100_outw(val, IRQ_STATUS);
-
-	/*
-	 * RX_ERROR is set when a packet is dropped due to no memory resources on
-	 * the card or when a RCV_ERR occurs.
-	 * TX_ERROR is set when a TX_ABORT condition occurs in the MAC->exists
-	 * only in the 802.3 MAC and happens when 16 collisions occur during a TX
-	 */
-	if (val & (HP100_TX_ERROR | HP100_RX_ERROR)) {
-#ifdef HP100_DEBUG_IRQ
-		printk("hp100: %s: TX/RX Error IRQ\n", dev->name);
-#endif
-		hp100_update_stats(dev);
-		if (lp->mode == 1) {
-			hp100_rxfill(dev);
-			hp100_clean_txring(dev);
-		}
-	}
-
-	/*
-	 * RX_PDA_ZERO is set when the PDA count goes from non-zero to zero.
-	 */
-	if ((lp->mode == 1) && (val & (HP100_RX_PDA_ZERO)))
-		hp100_rxfill(dev);
-
-	/*
-	 * HP100_TX_COMPLETE interrupt occurs when packet transmitted on wire
-	 * is completed
-	 */
-	if ((lp->mode == 1) && (val & (HP100_TX_COMPLETE)))
-		hp100_clean_txring(dev);
-
-	/*
-	 * MISC_ERROR is set when either the LAN link goes down or a detected
-	 * bus error occurs.
-	 */
-	if (val & HP100_MISC_ERROR) {	/* New for J2585B */
-#ifdef HP100_DEBUG_IRQ
-		printk
-		    ("hp100: %s: Misc. Error Interrupt - Check cabling.\n",
-		     dev->name);
-#endif
-		if (lp->mode == 1) {
-			hp100_clean_txring(dev);
-			hp100_rxfill(dev);
-		}
-		hp100_misc_interrupt(dev);
-	}
-
-	spin_unlock(&lp->lock);
-	hp100_ints_on();
-	return IRQ_HANDLED;
-}
-
-/*
- *  some misc functions
- */
-
-static void hp100_start_interface(struct net_device *dev)
-{
-	unsigned long flags;
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4220, TRACE);
-	printk("hp100: %s: hp100_start_interface\n", dev->name);
-#endif
-
-	spin_lock_irqsave(&lp->lock, flags);
-
-	/* Ensure the adapter does not want to request an interrupt when */
-	/* enabling the IRQ line to be active on the bus (i.e. not tri-stated) */
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* ack all IRQs */
-	hp100_outw(HP100_FAKE_INT | HP100_INT_EN | HP100_RESET_LB,
-		   OPTION_LSW);
-	/* Un Tri-state int. TODO: Check if shared interrupts can be realised? */
-	hp100_outw(HP100_TRI_INT | HP100_RESET_HB, OPTION_LSW);
-
-	if (lp->mode == 1) {
-		/* Make sure BM bit is set... */
-		hp100_page(HW_MAP);
-		hp100_orb(HP100_BM_MASTER, BM);
-		hp100_rxfill(dev);
-	} else if (lp->mode == 2) {
-		/* Enable memory mapping. Note: Don't do this when busmaster. */
-		hp100_outw(HP100_MMAP_DIS | HP100_RESET_HB, OPTION_LSW);
-	}
-
-	hp100_page(PERFORMANCE);
-	hp100_outw(0xfefe, IRQ_MASK);	/* mask off all ints */
-	hp100_outw(0xffff, IRQ_STATUS);	/* ack IRQ */
-
-	/* enable a few interrupts: */
-	if (lp->mode == 1) {	/* busmaster mode */
-		hp100_outw(HP100_RX_PDL_FILL_COMPL |
-			   HP100_RX_PDA_ZERO | HP100_RX_ERROR |
-			   /* HP100_RX_PACKET    | */
-			   /* HP100_RX_EARLY_INT |  */ HP100_SET_HB |
-			   /* HP100_TX_PDA_ZERO  |  */
-			   HP100_TX_COMPLETE |
-			   /* HP100_MISC_ERROR   |  */
-			   HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
-	} else {
-		hp100_outw(HP100_RX_PACKET |
-			   HP100_RX_ERROR | HP100_SET_HB |
-			   HP100_TX_ERROR | HP100_SET_LB, IRQ_MASK);
-	}
-
-	/* Note : before hp100_set_multicast_list(), because it will play with
-	 * spinlock itself... Jean II */
-	spin_unlock_irqrestore(&lp->lock, flags);
-
-	/* Enable MAC Tx and RX, set MAC modes, ... */
-	hp100_set_multicast_list(dev);
-}
-
-static void hp100_stop_interface(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-	int ioaddr = dev->base_addr;
-	u_int val;
-
-#ifdef HP100_DEBUG_B
-	printk("hp100: %s: hp100_stop_interface\n", dev->name);
-	hp100_outw(0x4221, TRACE);
-#endif
-
-	if (lp->mode == 1)
-		hp100_BM_shutdown(dev);
-	else {
-		/* Note: MMAP_DIS will be reenabled by start_interface */
-		hp100_outw(HP100_INT_EN | HP100_RESET_LB |
-			   HP100_TRI_INT | HP100_MMAP_DIS | HP100_SET_HB,
-			   OPTION_LSW);
-		val = hp100_inw(OPTION_LSW);
-
-		hp100_page(MAC_CTRL);
-		hp100_andb(~(HP100_RX_EN | HP100_TX_EN), MAC_CFG_1);
-
-		if (!(val & HP100_HW_RST))
-			return;	/* If reset, imm. return ... */
-		/* ... else: busy wait until idle */
-		for (val = 0; val < 6000; val++)
-			if ((hp100_inb(MAC_CFG_1) & (HP100_TX_IDLE | HP100_RX_IDLE)) == (HP100_TX_IDLE | HP100_RX_IDLE)) {
-				hp100_page(PERFORMANCE);
-				return;
-			}
-		printk("hp100: %s: hp100_stop_interface - timeout\n", dev->name);
-		hp100_page(PERFORMANCE);
-	}
-}
-
-static void hp100_load_eeprom(struct net_device *dev, u_short probe_ioaddr)
-{
-	int i;
-	int ioaddr = probe_ioaddr > 0 ? probe_ioaddr : dev->base_addr;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4222, TRACE);
-#endif
-
-	hp100_page(EEPROM_CTRL);
-	hp100_andw(~HP100_EEPROM_LOAD, EEPROM_CTRL);
-	hp100_orw(HP100_EEPROM_LOAD, EEPROM_CTRL);
-	for (i = 0; i < 10000; i++)
-		if (!(hp100_inb(OPTION_MSW) & HP100_EE_LOAD))
-			return;
-	printk("hp100: %s: hp100_load_eeprom - timeout\n", dev->name);
-}
-
-/*  Sense connection status.
- *  return values: LAN_10  - Connected to 10Mbit/s network
- *                 LAN_100 - Connected to 100Mbit/s network
- *                 LAN_ERR - not connected or 100Mbit/s Hub down
- */
-static int hp100_sense_lan(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	u_short val_VG, val_10;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4223, TRACE);
-#endif
-
-	hp100_page(MAC_CTRL);
-	val_10 = hp100_inb(10_LAN_CFG_1);
-	val_VG = hp100_inb(VG_LAN_CFG_1);
-	hp100_page(PERFORMANCE);
-#ifdef HP100_DEBUG
-	printk("hp100: %s: sense_lan: val_VG = 0x%04x, val_10 = 0x%04x\n",
-	       dev->name, val_VG, val_10);
-#endif
-
-	if (val_10 & HP100_LINK_BEAT_ST)	/* 10Mb connection is active */
-		return HP100_LAN_10;
-
-	if (val_10 & HP100_AUI_ST) {	/* have we BNC or AUI onboard? */
-		/*
-		 * This can be overriden by dos utility, so if this has no effect,
-		 * perhaps you need to download that utility from HP and set card
-		 * back to "auto detect".
-		 */
-		val_10 |= HP100_AUI_SEL | HP100_LOW_TH;
-		hp100_page(MAC_CTRL);
-		hp100_outb(val_10, 10_LAN_CFG_1);
-		hp100_page(PERFORMANCE);
-		return HP100_LAN_COAX;
-	}
-
-	/* Those cards don't have a 100 Mbit connector */
-	if ( !strcmp(lp->id, "HWP1920")  ||
-	     (lp->pci_dev &&
-	      lp->pci_dev->vendor == PCI_VENDOR_ID &&
-	      (lp->pci_dev->device == PCI_DEVICE_ID_HP_J2970A ||
-	       lp->pci_dev->device == PCI_DEVICE_ID_HP_J2973A)))
-		return HP100_LAN_ERR;
-
-	if (val_VG & HP100_LINK_CABLE_ST)	/* Can hear the HUBs tone. */
-		return HP100_LAN_100;
-	return HP100_LAN_ERR;
-}
-
-static int hp100_down_vg_link(struct net_device *dev)
-{
-	struct hp100_private *lp = netdev_priv(dev);
-	int ioaddr = dev->base_addr;
-	unsigned long time;
-	long savelan, newlan;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4224, TRACE);
-	printk("hp100: %s: down_vg_link\n", dev->name);
-#endif
-
-	hp100_page(MAC_CTRL);
-	time = jiffies + (HZ / 4);
-	do {
-		if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
-			break;
-		if (!in_interrupt())
-			schedule_timeout_interruptible(1);
-	} while (time_after(time, jiffies));
-
-	if (time_after_eq(jiffies, time))	/* no signal->no logout */
-		return 0;
-
-	/* Drop the VG Link by clearing the link up cmd and load addr. */
-
-	hp100_andb(~(HP100_LOAD_ADDR | HP100_LINK_CMD), VG_LAN_CFG_1);
-	hp100_orb(HP100_VG_SEL, VG_LAN_CFG_1);
-
-	/* Conditionally stall for >250ms on Link-Up Status (to go down) */
-	time = jiffies + (HZ / 2);
-	do {
-		if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
-			break;
-		if (!in_interrupt())
-			schedule_timeout_interruptible(1);
-	} while (time_after(time, jiffies));
-
-#ifdef HP100_DEBUG
-	if (time_after_eq(jiffies, time))
-		printk("hp100: %s: down_vg_link: Link does not go down?\n", dev->name);
-#endif
-
-	/* To prevent condition where Rev 1 VG MAC and old hubs do not complete */
-	/* logout under traffic (even though all the status bits are cleared),  */
-	/* do this workaround to get the Rev 1 MAC in its idle state */
-	if (lp->chip == HP100_CHIPID_LASSEN) {
-		/* Reset VG MAC to insure it leaves the logoff state even if */
-		/* the Hub is still emitting tones */
-		hp100_andb(~HP100_VG_RESET, VG_LAN_CFG_1);
-		udelay(1500);	/* wait for >1ms */
-		hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);	/* Release Reset */
-		udelay(1500);
-	}
-
-	/* New: For lassen, switch to 10 Mbps mac briefly to clear training ACK */
-	/* to get the VG mac to full reset. This is not req.d with later chips */
-	/* Note: It will take the between 1 and 2 seconds for the VG mac to be */
-	/* selected again! This will be left to the connect hub function to */
-	/* perform if desired.  */
-	if (lp->chip == HP100_CHIPID_LASSEN) {
-		/* Have to write to 10 and 100VG control registers simultaneously */
-		savelan = newlan = hp100_inl(10_LAN_CFG_1);	/* read 10+100 LAN_CFG regs */
-		newlan &= ~(HP100_VG_SEL << 16);
-		newlan |= (HP100_DOT3_MAC) << 8;
-		hp100_andb(~HP100_AUTO_MODE, MAC_CFG_3);	/* Autosel off */
-		hp100_outl(newlan, 10_LAN_CFG_1);
-
-		/* Conditionally stall for 5sec on VG selected. */
-		time = jiffies + (HZ * 5);
-		do {
-			if (!(hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST))
-				break;
-			if (!in_interrupt())
-				schedule_timeout_interruptible(1);
-		} while (time_after(time, jiffies));
-
-		hp100_orb(HP100_AUTO_MODE, MAC_CFG_3);	/* Autosel back on */
-		hp100_outl(savelan, 10_LAN_CFG_1);
-	}
-
-	time = jiffies + (3 * HZ);	/* Timeout 3s */
-	do {
-		if ((hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST) == 0)
-			break;
-		if (!in_interrupt())
-			schedule_timeout_interruptible(1);
-	} while (time_after(time, jiffies));
-
-	if (time_before_eq(time, jiffies)) {
-#ifdef HP100_DEBUG
-		printk("hp100: %s: down_vg_link: timeout\n", dev->name);
-#endif
-		return -EIO;
-	}
-
-	time = jiffies + (2 * HZ);	/* This seems to take a while.... */
-	do {
-		if (!in_interrupt())
-			schedule_timeout_interruptible(1);
-	} while (time_after(time, jiffies));
-
-	return 0;
-}
-
-static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-	u_short val = 0;
-	unsigned long time;
-	int startst;
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4225, TRACE);
-	printk("hp100: %s: login_to_vg_hub\n", dev->name);
-#endif
-
-	/* Initiate a login sequence iff VG MAC is enabled and either Load Address
-	 * bit is zero or the force relogin flag is set (e.g. due to MAC address or
-	 * promiscuous mode change)
-	 */
-	hp100_page(MAC_CTRL);
-	startst = hp100_inb(VG_LAN_CFG_1);
-	if ((force_relogin == 1) || (hp100_inb(MAC_CFG_4) & HP100_MAC_SEL_ST)) {
-#ifdef HP100_DEBUG_TRAINING
-		printk("hp100: %s: Start training\n", dev->name);
-#endif
-
-		/* Ensure VG Reset bit is 1 (i.e., do not reset) */
-		hp100_orb(HP100_VG_RESET, VG_LAN_CFG_1);
-
-		/* If Lassen AND auto-select-mode AND VG tones were sensed on */
-		/* entry then temporarily put them into force 100Mbit mode */
-		if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST))
-			hp100_andb(~HP100_DOT3_MAC, 10_LAN_CFG_2);
-
-		/* Drop the VG link by zeroing Link Up Command and Load Address  */
-		hp100_andb(~(HP100_LINK_CMD /* |HP100_LOAD_ADDR */ ), VG_LAN_CFG_1);
-
-#ifdef HP100_DEBUG_TRAINING
-		printk("hp100: %s: Bring down the link\n", dev->name);
-#endif
-
-		/* Wait for link to drop */
-		time = jiffies + (HZ / 10);
-		do {
-			if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
-				break;
-			if (!in_interrupt())
-				schedule_timeout_interruptible(1);
-		} while (time_after(time, jiffies));
-
-		/* Start an addressed training and optionally request promiscuous port */
-		if ((dev->flags) & IFF_PROMISC) {
-			hp100_orb(HP100_PROM_MODE, VG_LAN_CFG_2);
-			if (lp->chip == HP100_CHIPID_LASSEN)
-				hp100_orw(HP100_MACRQ_PROMSC, TRAIN_REQUEST);
-		} else {
-			hp100_andb(~HP100_PROM_MODE, VG_LAN_CFG_2);
-			/* For ETR parts we need to reset the prom. bit in the training
-			 * register, otherwise promiscious mode won't be disabled.
-			 */
-			if (lp->chip == HP100_CHIPID_LASSEN) {
-				hp100_andw(~HP100_MACRQ_PROMSC, TRAIN_REQUEST);
-			}
-		}
-
-		/* With ETR parts, frame format request bits can be set. */
-		if (lp->chip == HP100_CHIPID_LASSEN)
-			hp100_orb(HP100_MACRQ_FRAMEFMT_EITHER, TRAIN_REQUEST);
-
-		hp100_orb(HP100_LINK_CMD | HP100_LOAD_ADDR | HP100_VG_RESET, VG_LAN_CFG_1);
-
-		/* Note: Next wait could be omitted for Hood and earlier chips under */
-		/* certain circumstances */
-		/* TODO: check if hood/earlier and skip wait. */
-
-		/* Wait for either short timeout for VG tones or long for login    */
-		/* Wait for the card hardware to signalise link cable status ok... */
-		hp100_page(MAC_CTRL);
-		time = jiffies + (1 * HZ);	/* 1 sec timeout for cable st */
-		do {
-			if (hp100_inb(VG_LAN_CFG_1) & HP100_LINK_CABLE_ST)
-				break;
-			if (!in_interrupt())
-				schedule_timeout_interruptible(1);
-		} while (time_before(jiffies, time));
-
-		if (time_after_eq(jiffies, time)) {
-#ifdef HP100_DEBUG_TRAINING
-			printk("hp100: %s: Link cable status not ok? Training aborted.\n", dev->name);
-#endif
-		} else {
-#ifdef HP100_DEBUG_TRAINING
-			printk
-			    ("hp100: %s: HUB tones detected. Trying to train.\n",
-			     dev->name);
-#endif
-
-			time = jiffies + (2 * HZ);	/* again a timeout */
-			do {
-				val = hp100_inb(VG_LAN_CFG_1);
-				if ((val & (HP100_LINK_UP_ST))) {
-#ifdef HP100_DEBUG_TRAINING
-					printk("hp100: %s: Passed training.\n", dev->name);
-#endif
-					break;
-				}
-				if (!in_interrupt())
-					schedule_timeout_interruptible(1);
-			} while (time_after(time, jiffies));
-		}
-
-		/* If LINK_UP_ST is set, then we are logged into the hub. */
-		if (time_before_eq(jiffies, time) && (val & HP100_LINK_UP_ST)) {
-#ifdef HP100_DEBUG_TRAINING
-			printk("hp100: %s: Successfully logged into the HUB.\n", dev->name);
-			if (lp->chip == HP100_CHIPID_LASSEN) {
-				val = hp100_inw(TRAIN_ALLOW);
-				printk("hp100: %s: Card supports 100VG MAC Version \"%s\" ",
-					     dev->name, (hp100_inw(TRAIN_REQUEST) & HP100_CARD_MACVER) ? "802.12" : "Pre");
-				printk("Driver will use MAC Version \"%s\"\n", (val & HP100_HUB_MACVER) ? "802.12" : "Pre");
-				printk("hp100: %s: Frame format is %s.\n", dev->name, (val & HP100_MALLOW_FRAMEFMT) ? "802.5" : "802.3");
-			}
-#endif
-		} else {
-			/* If LINK_UP_ST is not set, login was not successful */
-			printk("hp100: %s: Problem logging into the HUB.\n", dev->name);
-			if (lp->chip == HP100_CHIPID_LASSEN) {
-				/* Check allowed Register to find out why there is a problem. */
-				val = hp100_inw(TRAIN_ALLOW);	/* won't work on non-ETR card */
-#ifdef HP100_DEBUG_TRAINING
-				printk("hp100: %s: MAC Configuration requested: 0x%04x, HUB allowed: 0x%04x\n", dev->name, hp100_inw(TRAIN_REQUEST), val);
-#endif
-				if (val & HP100_MALLOW_ACCDENIED)
-					printk("hp100: %s: HUB access denied.\n", dev->name);
-				if (val & HP100_MALLOW_CONFIGURE)
-					printk("hp100: %s: MAC Configuration is incompatible with the Network.\n", dev->name);
-				if (val & HP100_MALLOW_DUPADDR)
-					printk("hp100: %s: Duplicate MAC Address on the Network.\n", dev->name);
-			}
-		}
-
-		/* If we have put the chip into forced 100 Mbit mode earlier, go back */
-		/* to auto-select mode */
-
-		if ((lp->chip == HP100_CHIPID_LASSEN) && (startst & HP100_LINK_CABLE_ST)) {
-			hp100_page(MAC_CTRL);
-			hp100_orb(HP100_DOT3_MAC, 10_LAN_CFG_2);
-		}
-
-		val = hp100_inb(VG_LAN_CFG_1);
-
-		/* Clear the MISC_ERROR Interrupt, which might be generated when doing the relogin */
-		hp100_page(PERFORMANCE);
-		hp100_outw(HP100_MISC_ERROR, IRQ_STATUS);
-
-		if (val & HP100_LINK_UP_ST)
-			return 0;	/* login was ok */
-		else {
-			printk("hp100: %s: Training failed.\n", dev->name);
-			hp100_down_vg_link(dev);
-			return -EIO;
-		}
-	}
-	/* no forced relogin & already link there->no training. */
-	return -EIO;
-}
-
-static void hp100_cascade_reset(struct net_device *dev, u_short enable)
-{
-	int ioaddr = dev->base_addr;
-	struct hp100_private *lp = netdev_priv(dev);
-
-#ifdef HP100_DEBUG_B
-	hp100_outw(0x4226, TRACE);
-	printk("hp100: %s: cascade_reset\n", dev->name);
-#endif
-
-	if (enable) {
-		hp100_outw(HP100_HW_RST | HP100_RESET_LB, OPTION_LSW);
-		if (lp->chip == HP100_CHIPID_LASSEN) {
-			/* Lassen requires a PCI transmit fifo reset */
-			hp100_page(HW_MAP);
-			hp100_andb(~HP100_PCI_RESET, PCICTRL2);
-			hp100_orb(HP100_PCI_RESET, PCICTRL2);
-			/* Wait for min. 300 ns */
-			/* we can't use jiffies here, because it may be */
-			/* that we have disabled the timer... */
-			udelay(400);
-			hp100_andb(~HP100_PCI_RESET, PCICTRL2);
-			hp100_page(PERFORMANCE);
-		}
-	} else {		/* bring out of reset */
-		hp100_outw(HP100_HW_RST | HP100_SET_LB, OPTION_LSW);
-		udelay(400);
-		hp100_page(PERFORMANCE);
-	}
-}
-
-#ifdef HP100_DEBUG
-void hp100_RegisterDump(struct net_device *dev)
-{
-	int ioaddr = dev->base_addr;
-	int Page;
-	int Register;
-
-	/* Dump common registers */
-	printk("hp100: %s: Cascade Register Dump\n", dev->name);
-	printk("hardware id #1: 0x%.2x\n", hp100_inb(HW_ID));
-	printk("hardware id #2/paging: 0x%.2x\n", hp100_inb(PAGING));
-	printk("option #1: 0x%.4x\n", hp100_inw(OPTION_LSW));
-	printk("option #2: 0x%.4x\n", hp100_inw(OPTION_MSW));
-
-	/* Dump paged registers */
-	for (Page = 0; Page < 8; Page++) {
-		/* Dump registers */
-		printk("page: 0x%.2x\n", Page);
-		outw(Page, ioaddr + 0x02);
-		for (Register = 0x8; Register < 0x22; Register += 2) {
-			/* Display Register contents except data port */
-			if (((Register != 0x10) && (Register != 0x12)) || (Page > 0)) {
-				printk("0x%.2x = 0x%.4x\n", Register, inw(ioaddr + Register));
-			}
-		}
-	}
-	hp100_page(PERFORMANCE);
-}
-#endif
-
-
-static void cleanup_dev(struct net_device *d)
-{
-	struct hp100_private *p = netdev_priv(d);
-
-	unregister_netdev(d);
-	release_region(d->base_addr, HP100_REGION_SIZE);
-
-	if (p->mode == 1)	/* busmaster */
-		pci_free_consistent(p->pci_dev, MAX_RINGSIZE + 0x0f,
-				    p->page_vaddr_algn,
-				    virt_to_whatever(d, p->page_vaddr_algn));
-	if (p->mem_ptr_virt)
-		iounmap(p->mem_ptr_virt);
-
-	free_netdev(d);
-}
-
-static int hp100_eisa_probe(struct device *gendev)
-{
-	struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
-	struct eisa_device *edev = to_eisa_device(gendev);
-	int err;
-
-	if (!dev)
-		return -ENOMEM;
-
-	SET_NETDEV_DEV(dev, &edev->dev);
-
-	err = hp100_probe1(dev, edev->base_addr + 0xC38, HP100_BUS_EISA, NULL);
-	if (err)
-		goto out1;
-
-#ifdef HP100_DEBUG
-	printk("hp100: %s: EISA adapter found at 0x%x\n", dev->name,
-	       dev->base_addr);
-#endif
-	dev_set_drvdata(gendev, dev);
-	return 0;
- out1:
-	free_netdev(dev);
-	return err;
-}
-
-static int hp100_eisa_remove(struct device *gendev)
-{
-	struct net_device *dev = dev_get_drvdata(gendev);
-	cleanup_dev(dev);
-	return 0;
-}
-
-static struct eisa_driver hp100_eisa_driver = {
-        .id_table = hp100_eisa_tbl,
-        .driver   = {
-                .name    = "hp100",
-                .probe   = hp100_eisa_probe,
-		.remove  = hp100_eisa_remove,
-        }
-};
-
-static int hp100_pci_probe(struct pci_dev *pdev,
-			   const struct pci_device_id *ent)
-{
-	struct net_device *dev;
-	int ioaddr;
-	u_short pci_command;
-	int err;
-
-	if (pci_enable_device(pdev))
-		return -ENODEV;
-
-	dev = alloc_etherdev(sizeof(struct hp100_private));
-	if (!dev) {
-		err = -ENOMEM;
-		goto out0;
-	}
-
-	SET_NETDEV_DEV(dev, &pdev->dev);
-
-	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
-	if (!(pci_command & PCI_COMMAND_IO)) {
-#ifdef HP100_DEBUG
-		printk("hp100: %s: PCI I/O Bit has not been set. Setting...\n", dev->name);
-#endif
-		pci_command |= PCI_COMMAND_IO;
-		pci_write_config_word(pdev, PCI_COMMAND, pci_command);
-	}
-
-	if (!(pci_command & PCI_COMMAND_MASTER)) {
-#ifdef HP100_DEBUG
-		printk("hp100: %s: PCI Master Bit has not been set. Setting...\n", dev->name);
-#endif
-		pci_command |= PCI_COMMAND_MASTER;
-		pci_write_config_word(pdev, PCI_COMMAND, pci_command);
-	}
-
-	ioaddr = pci_resource_start(pdev, 0);
-	err = hp100_probe1(dev, ioaddr, HP100_BUS_PCI, pdev);
-	if (err)
-		goto out1;
-
-#ifdef HP100_DEBUG
-	printk("hp100: %s: PCI adapter found at 0x%x\n", dev->name, ioaddr);
-#endif
-	pci_set_drvdata(pdev, dev);
-	return 0;
- out1:
-	free_netdev(dev);
- out0:
-	pci_disable_device(pdev);
-	return err;
-}
-
-static void hp100_pci_remove(struct pci_dev *pdev)
-{
-	struct net_device *dev = pci_get_drvdata(pdev);
-
-	cleanup_dev(dev);
-	pci_disable_device(pdev);
-}
-
-
-static struct pci_driver hp100_pci_driver = {
-	.name		= "hp100",
-	.id_table	= hp100_pci_tbl,
-	.probe		= hp100_pci_probe,
-	.remove		= hp100_pci_remove,
-};
-
-/*
- *  module section
- */
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, "
-              "Siegfried \"Frieder\" Loeffler (dg1sek) <floeff@mathematik.uni-stuttgart.de>");
-MODULE_DESCRIPTION("HP CASCADE Architecture Driver for 100VG-AnyLan Network Adapters");
-
-/*
- * Note: to register three isa devices, use:
- * option hp100 hp100_port=0,0,0
- *        to register one card at io 0x280 as eth239, use:
- * option hp100 hp100_port=0x280
- */
-#if defined(MODULE) && defined(CONFIG_ISA)
-#define HP100_DEVICES 5
-/* Parameters set by insmod */
-static int hp100_port[HP100_DEVICES] = { 0, [1 ... (HP100_DEVICES-1)] = -1 };
-module_param_hw_array(hp100_port, int, ioport, NULL, 0);
-
-/* List of devices */
-static struct net_device *hp100_devlist[HP100_DEVICES];
-
-static int __init hp100_isa_init(void)
-{
-	struct net_device *dev;
-	int i, err, cards = 0;
-
-	/* Don't autoprobe ISA bus */
-	if (hp100_port[0] == 0)
-		return -ENODEV;
-
-	/* Loop on all possible base addresses */
-	for (i = 0; i < HP100_DEVICES && hp100_port[i] != -1; ++i) {
-		dev = alloc_etherdev(sizeof(struct hp100_private));
-		if (!dev) {
-			while (cards > 0)
-				cleanup_dev(hp100_devlist[--cards]);
-
-			return -ENOMEM;
-		}
-
-		err = hp100_isa_probe(dev, hp100_port[i]);
-		if (!err)
-			hp100_devlist[cards++] = dev;
-		else
-			free_netdev(dev);
-	}
-
-	return cards > 0 ? 0 : -ENODEV;
-}
-
-static void hp100_isa_cleanup(void)
-{
-	int i;
-
-	for (i = 0; i < HP100_DEVICES; i++) {
-		struct net_device *dev = hp100_devlist[i];
-		if (dev)
-			cleanup_dev(dev);
-	}
-}
-#else
-#define hp100_isa_init()	(0)
-#define hp100_isa_cleanup()	do { } while(0)
-#endif
-
-static int __init hp100_module_init(void)
-{
-	int err;
-
-	err = hp100_isa_init();
-	if (err && err != -ENODEV)
-		goto out;
-	err = eisa_driver_register(&hp100_eisa_driver);
-	if (err && err != -ENODEV)
-		goto out2;
-	err = pci_register_driver(&hp100_pci_driver);
-	if (err && err != -ENODEV)
-		goto out3;
- out:
-	return err;
- out3:
-	eisa_driver_unregister (&hp100_eisa_driver);
- out2:
-	hp100_isa_cleanup();
-	goto out;
-}
-
-
-static void __exit hp100_module_exit(void)
-{
-	hp100_isa_cleanup();
-	eisa_driver_unregister (&hp100_eisa_driver);
-	pci_unregister_driver (&hp100_pci_driver);
-}
-
-module_init(hp100_module_init)
-module_exit(hp100_module_exit)
diff --git a/drivers/net/ethernet/hp/hp100.h b/drivers/net/ethernet/hp/hp100.h
deleted file mode 100644
index 7239b94c9de5..000000000000
--- a/drivers/net/ethernet/hp/hp100.h
+++ /dev/null
@@ -1,611 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * hp100.h: Hewlett Packard HP10/100VG ANY LAN ethernet driver for Linux.
- *
- * $Id: hp100.h,v 1.51 1997/04/08 14:26:42 floeff Exp floeff $
- *
- * Authors:  Jaroslav Kysela, <perex@pf.jcu.cz>
- *           Siegfried Loeffler <floeff@tunix.mathematik.uni-stuttgart.de>
- *
- * This driver is based on the 'hpfepkt' crynwr packet driver.
- */
-
-/****************************************************************************
- *  Hardware Constants
- ****************************************************************************/
-
-/*
- * Page Identifiers
- * (Swap Paging Register, PAGING, bits 3:0, Offset 0x02)
- */
-
-#define HP100_PAGE_PERFORMANCE	0x0	/* Page 0 */
-#define HP100_PAGE_MAC_ADDRESS	0x1	/* Page 1 */
-#define HP100_PAGE_HW_MAP	0x2	/* Page 2 */
-#define HP100_PAGE_EEPROM_CTRL	0x3	/* Page 3 */
-#define HP100_PAGE_MAC_CTRL	0x4	/* Page 4 */
-#define HP100_PAGE_MMU_CFG	0x5	/* Page 5 */
-#define HP100_PAGE_ID_MAC_ADDR	0x6	/* Page 6 */
-#define HP100_PAGE_MMU_POINTER	0x7	/* Page 7 */
-
-
-/* Registers that are present on all pages  */
-
-#define HP100_REG_HW_ID		0x00	/* R:  (16) Unique card ID           */
-#define HP100_REG_TRACE		0x00	/* W:  (16) Used for debug output    */
-#define HP100_REG_PAGING	0x02	/* R:  (16),15:4 Card ID             */
-					/* W:  (16),3:0 Switch pages         */
-#define HP100_REG_OPTION_LSW	0x04	/* RW: (16) Select card functions    */
-#define HP100_REG_OPTION_MSW	0x06	/* RW: (16) Select card functions    */
-
-/*  Page 0 - Performance  */
-
-#define HP100_REG_IRQ_STATUS	0x08	/* RW: (16) Which ints are pending   */
-#define HP100_REG_IRQ_MASK	0x0a	/* RW: (16) Select ints to allow     */
-#define HP100_REG_FRAGMENT_LEN	0x0c	/* W: (16)12:0 Current fragment len */
-/* Note: For 32 bit systems, fragment len and offset registers are available */
-/*       at offset 0x28 and 0x2c, where they can be written as 32bit values. */
-#define HP100_REG_OFFSET	0x0e	/* RW: (16)12:0 Offset to start read */
-#define HP100_REG_DATA32	0x10	/* RW: (32) I/O mode data port       */
-#define HP100_REG_DATA16	0x12	/* RW: WORDs must be read from here  */
-#define HP100_REG_TX_MEM_FREE	0x14	/* RD: (32) Amount of free Tx mem    */
-#define HP100_REG_TX_PDA_L      0x14	/* W: (32) BM: Ptr to PDL, Low Pri  */
-#define HP100_REG_TX_PDA_H      0x1c	/* W: (32) BM: Ptr to PDL, High Pri */
-#define HP100_REG_RX_PKT_CNT	0x18	/* RD: (8) Rx count of pkts on card  */
-#define HP100_REG_TX_PKT_CNT	0x19	/* RD: (8) Tx count of pkts on card  */
-#define HP100_REG_RX_PDL        0x1a	/* R: (8) BM: # rx pdl not executed */
-#define HP100_REG_TX_PDL        0x1b	/* R: (8) BM: # tx pdl not executed */
-#define HP100_REG_RX_PDA        0x18	/* W: (32) BM: Up to 31 addresses */
-					/*             which point to a PDL */
-#define HP100_REG_SL_EARLY      0x1c	/*    (32) Enhanced Slave Early Rx */
-#define HP100_REG_STAT_DROPPED  0x20	/* R (12) Dropped Packet Counter */
-#define HP100_REG_STAT_ERRORED  0x22	/* R (8) Errored Packet Counter */
-#define HP100_REG_STAT_ABORT    0x23	/* R (8) Abort Counter/OW Coll. Flag */
-#define HP100_REG_RX_RING       0x24	/* W (32) Slave: RX Ring Pointers */
-#define HP100_REG_32_FRAGMENT_LEN 0x28	/* W (13) Slave: Fragment Length Reg */
-#define HP100_REG_32_OFFSET     0x2c	/* W (16) Slave: Offset Register */
-
-/*  Page 1 - MAC Address/Hash Table  */
-
-#define HP100_REG_MAC_ADDR	0x08	/* RW: (8) Cards MAC address         */
-#define HP100_REG_HASH_BYTE0	0x10	/* RW: (8) Cards multicast filter    */
-
-/*  Page 2 - Hardware Mapping  */
-
-#define HP100_REG_MEM_MAP_LSW	0x08	/* RW: (16) LSW of cards mem addr    */
-#define HP100_REG_MEM_MAP_MSW	0x0a	/* RW: (16) MSW of cards mem addr    */
-#define HP100_REG_IO_MAP	0x0c	/* RW: (8) Cards I/O address         */
-#define HP100_REG_IRQ_CHANNEL	0x0d	/* RW: (8) IRQ and edge/level int    */
-#define HP100_REG_SRAM		0x0e	/* RW: (8) How much RAM on card      */
-#define HP100_REG_BM		0x0f	/* RW: (8) Controls BM functions     */
-
-/* New on Page 2 for ETR chips: */
-#define HP100_REG_MODECTRL1     0x10	/* RW: (8) Mode Control 1 */
-#define HP100_REG_MODECTRL2     0x11	/* RW: (8) Mode Control 2 */
-#define HP100_REG_PCICTRL1      0x12	/* RW: (8) PCI Cfg 1 */
-#define HP100_REG_PCICTRL2      0x13	/* RW: (8) PCI Cfg 2 */
-#define HP100_REG_PCIBUSMLAT    0x15	/* RW: (8) PCI Bus Master Latency */
-#define HP100_REG_EARLYTXCFG    0x16	/* RW: (16) Early TX Cfg/Cntrl Reg */
-#define HP100_REG_EARLYRXCFG    0x18	/* RW: (8) Early RX Cfg/Cntrl Reg */
-#define HP100_REG_ISAPNPCFG1    0x1a	/* RW: (8) ISA PnP Cfg/Cntrl Reg 1 */
-#define HP100_REG_ISAPNPCFG2    0x1b	/* RW: (8) ISA PnP Cfg/Cntrl Reg 2 */
-
-/*  Page 3 - EEPROM/Boot ROM  */
-
-#define HP100_REG_EEPROM_CTRL	0x08	/* RW: (16) Used to load EEPROM      */
-#define HP100_REG_BOOTROM_CTRL  0x0a
-
-/*  Page 4 - LAN Configuration  (MAC_CTRL) */
-
-#define HP100_REG_10_LAN_CFG_1	0x08	/* RW: (8) Set 10M XCVR functions   */
-#define HP100_REG_10_LAN_CFG_2  0x09	/* RW: (8)     10M XCVR functions   */
-#define HP100_REG_VG_LAN_CFG_1	0x0a	/* RW: (8) Set 100M XCVR functions  */
-#define HP100_REG_VG_LAN_CFG_2  0x0b	/* RW: (8) 100M LAN Training cfgregs */
-#define HP100_REG_MAC_CFG_1	0x0c	/* RW: (8) Types of pkts to accept   */
-#define HP100_REG_MAC_CFG_2	0x0d	/* RW: (8) Misc MAC functions        */
-#define HP100_REG_MAC_CFG_3     0x0e	/* RW: (8) Misc MAC functions */
-#define HP100_REG_MAC_CFG_4     0x0f	/* R:  (8) Misc MAC states */
-#define HP100_REG_DROPPED	0x10	/* R:  (16),11:0 Pkts can't fit in mem */
-#define HP100_REG_CRC		0x12	/* R:  (8) Pkts with CRC             */
-#define HP100_REG_ABORT		0x13	/* R:  (8) Aborted Tx pkts           */
-#define HP100_REG_TRAIN_REQUEST 0x14	/* RW: (16) Endnode MAC register. */
-#define HP100_REG_TRAIN_ALLOW   0x16	/* R:  (16) Hub allowed register */
-
-/*  Page 5 - MMU  */
-
-#define HP100_REG_RX_MEM_STOP	0x0c	/* RW: (16) End of Rx ring addr      */
-#define HP100_REG_TX_MEM_STOP	0x0e	/* RW: (16) End of Tx ring addr      */
-#define HP100_REG_PDL_MEM_STOP  0x10	/* Not used by 802.12 devices */
-#define HP100_REG_ECB_MEM_STOP  0x14	/* I've no idea what this is */
-
-/*  Page 6 - Card ID/Physical LAN Address  */
-
-#define HP100_REG_BOARD_ID	0x08	/* R:  (8) EISA/ISA card ID          */
-#define HP100_REG_BOARD_IO_CHCK 0x0c	/* R:  (8) Added to ID to get FFh    */
-#define HP100_REG_SOFT_MODEL	0x0d	/* R:  (8) Config program defined    */
-#define HP100_REG_LAN_ADDR	0x10	/* R:  (8) MAC addr of card          */
-#define HP100_REG_LAN_ADDR_CHCK 0x16	/* R:  (8) Added to addr to get FFh  */
-
-/*  Page 7 - MMU Current Pointers  */
-
-#define HP100_REG_PTR_RXSTART	0x08	/* R:  (16) Current begin of Rx ring */
-#define HP100_REG_PTR_RXEND	0x0a	/* R:  (16) Current end of Rx ring   */
-#define HP100_REG_PTR_TXSTART	0x0c	/* R:  (16) Current begin of Tx ring */
-#define HP100_REG_PTR_TXEND	0x0e	/* R:  (16) Current end of Rx ring   */
-#define HP100_REG_PTR_RPDLSTART 0x10
-#define HP100_REG_PTR_RPDLEND   0x12
-#define HP100_REG_PTR_RINGPTRS  0x14
-#define HP100_REG_PTR_MEMDEBUG  0x1a
-/* ------------------------------------------------------------------------ */
-
-
-/*
- * Hardware ID Register I (Always available, HW_ID, Offset 0x00)
- */
-#define HP100_HW_ID_CASCADE     0x4850	/* Identifies Cascade Chip */
-
-/*
- * Hardware ID Register 2 & Paging Register
- * (Always available, PAGING, Offset 0x02)
- * Bits 15:4 are for the Chip ID
- */
-#define HP100_CHIPID_MASK        0xFFF0
-#define HP100_CHIPID_SHASTA      0x5350	/* Not 802.12 compliant */
-					 /* EISA BM/SL, MCA16/32 SL, ISA SL */
-#define HP100_CHIPID_RAINIER     0x5360	/* Not 802.12 compliant EISA BM, */
-					 /* PCI SL, MCA16/32 SL, ISA SL */
-#define HP100_CHIPID_LASSEN      0x5370	/* 802.12 compliant PCI BM, PCI SL */
-					 /* LRF supported */
-
-/*
- *  Option Registers I and II
- * (Always available, OPTION_LSW, Offset 0x04-0x05)
- */
-#define HP100_DEBUG_EN		0x8000	/* 0:Dis., 1:Enable Debug Dump Ptr. */
-#define HP100_RX_HDR		0x4000	/* 0:Dis., 1:Enable putting pkt into */
-					/*   system mem. before Rx interrupt */
-#define HP100_MMAP_DIS		0x2000	/* 0:Enable, 1:Disable mem.mapping. */
-					/*   MMAP_DIS must be 0 and MEM_EN */
-					/*   must be 1 for memory-mapped */
-					/*   mode to be enabled */
-#define HP100_EE_EN		0x1000	/* 0:Disable,1:Enable EEPROM writing */
-#define HP100_BM_WRITE		0x0800	/* 0:Slave, 1:Bus Master for Tx data */
-#define HP100_BM_READ		0x0400	/* 0:Slave, 1:Bus Master for Rx data */
-#define HP100_TRI_INT		0x0200	/* 0:Don't, 1:Do tri-state the int */
-#define HP100_MEM_EN		0x0040	/* Config program set this to */
-					/*   0:Disable, 1:Enable mem map. */
-					/*   See MMAP_DIS. */
-#define HP100_IO_EN		0x0020	/* 1:Enable I/O transfers */
-#define HP100_BOOT_EN		0x0010	/* 1:Enable boot ROM access */
-#define HP100_FAKE_INT		0x0008	/* 1:int */
-#define HP100_INT_EN		0x0004	/* 1:Enable ints from card */
-#define HP100_HW_RST		0x0002	/* 0:Reset, 1:Out of reset */
-					/* NIC reset on 0 to 1 transition */
-
-/*
- *  Option Register III
- * (Always available, OPTION_MSW, Offset 0x06)
- */
-#define HP100_PRIORITY_TX	0x0080	/* 1:Do all Tx pkts as priority */
-#define HP100_EE_LOAD		0x0040	/* 1:EEPROM loading, 0 when done */
-#define HP100_ADV_NXT_PKT	0x0004	/* 1:Advance to next pkt in Rx queue */
-					/*   h/w will set to 0 when done */
-#define HP100_TX_CMD		0x0002	/* 1:Tell h/w download done, h/w */
-					/*   will set to 0 when done */
-
-/*
- * Interrupt Status Registers I and II
- * (Page PERFORMANCE, IRQ_STATUS, Offset 0x08-0x09)
- * Note: With old chips, these Registers will clear when 1 is written to them
- *       with new chips this depends on setting of CLR_ISMODE
- */
-#define HP100_RX_EARLY_INT      0x2000
-#define HP100_RX_PDA_ZERO       0x1000
-#define HP100_RX_PDL_FILL_COMPL 0x0800
-#define HP100_RX_PACKET		0x0400	/* 0:No, 1:Yes pkt has been Rx */
-#define HP100_RX_ERROR		0x0200	/* 0:No, 1:Yes Rx pkt had error */
-#define HP100_TX_PDA_ZERO       0x0020	/* 1 when PDA count goes to zero */
-#define HP100_TX_SPACE_AVAIL	0x0010	/* 0:<8192, 1:>=8192 Tx free bytes */
-#define HP100_TX_COMPLETE	0x0008	/* 0:No, 1:Yes a Tx has completed */
-#define HP100_MISC_ERROR        0x0004	/* 0:No, 1:Lan Link down or bus error */
-#define HP100_TX_ERROR		0x0002	/* 0:No, 1:Yes Tx pkt had error */
-
-/*
- * Xmit Memory Free Count
- * (Page PERFORMANCE, TX_MEM_FREE, Offset 0x14) (Read only, 32bit)
- */
-#define HP100_AUTO_COMPARE	0x80000000	/* Tx Space avail & pkts<255 */
-#define HP100_FREE_SPACE	0x7fffffe0	/* Tx free memory */
-
-/*
- *  IRQ Channel
- * (Page HW_MAP, IRQ_CHANNEL, Offset 0x0d)
- */
-#define HP100_ZERO_WAIT_EN	0x80	/* 0:No, 1:Yes asserts NOWS signal */
-#define HP100_IRQ_SCRAMBLE      0x40
-#define HP100_BOND_HP           0x20
-#define HP100_LEVEL_IRQ		0x10	/* 0:Edge, 1:Level type interrupts. */
-					/* (Only valid on EISA cards) */
-#define HP100_IRQMASK		0x0F	/* Isolate the IRQ bits */
-
-/*
- * SRAM Parameters
- * (Page HW_MAP, SRAM, Offset 0x0e)
- */
-#define HP100_RAM_SIZE_MASK	0xe0	/* AND to get SRAM size index */
-#define HP100_RAM_SIZE_SHIFT	0x05	/* Shift count(put index in lwr bits) */
-
-/*
- * Bus Master Register
- * (Page HW_MAP, BM, Offset 0x0f)
- */
-#define HP100_BM_BURST_RD       0x01	/* EISA only: 1=Use burst trans. fm system */
-					/* memory to chip (tx) */
-#define HP100_BM_BURST_WR       0x02	/* EISA only: 1=Use burst trans. fm system */
-					/* memory to chip (rx) */
-#define HP100_BM_MASTER		0x04	/* 0:Slave, 1:BM mode */
-#define HP100_BM_PAGE_CK        0x08	/* This bit should be set whenever in */
-					/* an EISA system */
-#define HP100_BM_PCI_8CLK       0x40	/* ... cycles 8 clocks apart */
-
-
-/*
- * Mode Control Register I
- * (Page HW_MAP, MODECTRL1, Offset0x10)
- */
-#define HP100_TX_DUALQ          0x10
-   /* If set and BM -> dual tx pda queues */
-#define HP100_ISR_CLRMODE       0x02	/* If set ISR will clear all pending */
-				       /* interrupts on read (etr only?) */
-#define HP100_EE_NOLOAD         0x04	/* Status whether res will be loaded */
-				       /* from the eeprom */
-#define HP100_TX_CNT_FLG        0x08	/* Controls Early TX Reg Cnt Field */
-#define HP100_PDL_USE3          0x10	/* If set BM engine will read only */
-				       /* first three data elements of a PDL */
-				       /* on the first access. */
-#define HP100_BUSTYPE_MASK      0xe0	/* Three bit bus type info */
-
-/*
- * Mode Control Register II
- * (Page HW_MAP, MODECTRL2, Offset0x11)
- */
-#define HP100_EE_MASK           0x0f	/* Tell EEPROM circuit not to load */
-				       /* certain resources */
-#define HP100_DIS_CANCEL        0x20	/* For tx dualq mode operation */
-#define HP100_EN_PDL_WB         0x40	/* 1: Status of PDL completion may be */
-				       /* written back to system mem */
-#define HP100_EN_BUS_FAIL       0x80	/* Enables bus-fail portion of misc */
-				       /* interrupt */
-
-/*
- * PCI Configuration and Control Register I
- * (Page HW_MAP, PCICTRL1, Offset 0x12)
- */
-#define HP100_LO_MEM            0x01	/* 1: Mapped Mem requested below 1MB */
-#define HP100_NO_MEM            0x02	/* 1: Disables Req for sysmem to PCI */
-				       /* bios */
-#define HP100_USE_ISA           0x04	/* 1: isa type decodes will occur */
-				       /* simultaneously with PCI decodes */
-#define HP100_IRQ_HI_MASK       0xf0	/* pgmed by pci bios */
-#define HP100_PCI_IRQ_HI_MASK   0x78	/* Isolate 4 bits for PCI IRQ  */
-
-/*
- * PCI Configuration and Control Register II
- * (Page HW_MAP, PCICTRL2, Offset 0x13)
- */
-#define HP100_RD_LINE_PDL       0x01	/* 1: PCI command Memory Read Line en */
-#define HP100_RD_TX_DATA_MASK   0x06	/* choose PCI memread cmds for TX */
-#define HP100_MWI               0x08	/* 1: en. PCI memory write invalidate */
-#define HP100_ARB_MODE          0x10	/* Select PCI arbitor type */
-#define HP100_STOP_EN           0x20	/* Enables PCI state machine to issue */
-				       /* pci stop if cascade not ready */
-#define HP100_IGNORE_PAR        0x40	/* 1: PCI state machine ignores parity */
-#define HP100_PCI_RESET         0x80	/* 0->1: Reset PCI block */
-
-/*
- * Early TX Configuration and Control Register
- * (Page HW_MAP, EARLYTXCFG, Offset 0x16)
- */
-#define HP100_EN_EARLY_TX       0x8000	/* 1=Enable Early TX */
-#define HP100_EN_ADAPTIVE       0x4000	/* 1=Enable adaptive mode */
-#define HP100_EN_TX_UR_IRQ      0x2000	/* reserved, must be 0 */
-#define HP100_EN_LOW_TX         0x1000	/* reserved, must be 0 */
-#define HP100_ET_CNT_MASK       0x0fff	/* bits 11..0: ET counters */
-
-/*
- * Early RX Configuration and Control Register
- * (Page HW_MAP, EARLYRXCFG, Offset 0x18)
- */
-#define HP100_EN_EARLY_RX       0x80	/* 1=Enable Early RX */
-#define HP100_EN_LOW_RX         0x40	/* reserved, must be 0 */
-#define HP100_RX_TRIP_MASK      0x1f	/* bits 4..0: threshold at which the
-					 * early rx circuit will start the
-					 * dma of received packet into system
-					 * memory for BM */
-
-/*
- *  Serial Devices Control Register
- * (Page EEPROM_CTRL, EEPROM_CTRL, Offset 0x08)
- */
-#define HP100_EEPROM_LOAD	0x0001	/* 0->1 loads EEPROM into registers. */
-					/* When it goes back to 0, load is   */
-					/* complete. This should take ~600us. */
-
-/*
- * 10MB LAN Control and Configuration Register I
- * (Page MAC_CTRL, 10_LAN_CFG_1, Offset 0x08)
- */
-#define HP100_MAC10_SEL		0xc0	/* Get bits to indicate MAC */
-#define HP100_AUI_SEL		0x20	/* Status of AUI selection */
-#define HP100_LOW_TH		0x10	/* 0:No, 1:Yes allow better cabling */
-#define HP100_LINK_BEAT_DIS	0x08	/* 0:Enable, 1:Disable link beat */
-#define HP100_LINK_BEAT_ST	0x04	/* 0:No, 1:Yes link beat being Rx */
-#define HP100_R_ROL_ST		0x02	/* 0:No, 1:Yes Rx twisted pair has */
-					/*             been reversed */
-#define HP100_AUI_ST		0x01	/* 0:No, 1:Yes use AUI on TP card */
-
-/*
- * 10 MB LAN Control and Configuration Register II
- * (Page MAC_CTRL, 10_LAN_CFG_2, Offset 0x09)
- */
-#define HP100_SQU_ST		0x01	/* 0:No, 1:Yes collision signal sent */
-					/*       after Tx.Only used for AUI. */
-#define HP100_FULLDUP           0x02	/* 1: LXT901 XCVR fullduplx enabled */
-#define HP100_DOT3_MAC          0x04	/* 1: DOT 3 Mac sel. unless Autosel */
-
-/*
- * MAC Selection, use with MAC10_SEL bits
- */
-#define HP100_AUTO_SEL_10	0x0	/* Auto select */
-#define HP100_XCVR_LXT901_10	0x1	/* LXT901 10BaseT transceiver */
-#define HP100_XCVR_7213		0x2	/* 7213 transceiver */
-#define HP100_XCVR_82503	0x3	/* 82503 transceiver */
-
-/*
- *  100MB LAN Training Register
- * (Page MAC_CTRL, VG_LAN_CFG_2, Offset 0x0b) (old, pre 802.12)
- */
-#define HP100_FRAME_FORMAT	0x08	/* 0:802.3, 1:802.5 frames */
-#define HP100_BRIDGE		0x04	/* 0:No, 1:Yes tell hub i am a bridge */
-#define HP100_PROM_MODE		0x02	/* 0:No, 1:Yes tell hub card is */
-					/*         promiscuous */
-#define HP100_REPEATER		0x01	/* 0:No, 1:Yes tell hub MAC wants to */
-					/*         be a cascaded repeater */
-
-/*
- * 100MB LAN Control and Configuration Register
- * (Page MAC_CTRL, VG_LAN_CFG_1, Offset 0x0a)
- */
-#define HP100_VG_SEL	        0x80	/* 0:No, 1:Yes use 100 Mbit MAC */
-#define HP100_LINK_UP_ST	0x40	/* 0:No, 1:Yes endnode logged in */
-#define HP100_LINK_CABLE_ST	0x20	/* 0:No, 1:Yes cable can hear tones */
-					/*         from  hub */
-#define HP100_LOAD_ADDR		0x10	/* 0->1 card addr will be sent  */
-					/* 100ms later the link status  */
-					/* bits are valid */
-#define HP100_LINK_CMD		0x08	/* 0->1 link will attempt to log in. */
-					/* 100ms later the link status */
-					/* bits are valid */
-#define HP100_TRN_DONE          0x04	/* NEW ETR-Chips only: Will be reset */
-					/* after LinkUp Cmd is given and set */
-					/* when training has completed. */
-#define HP100_LINK_GOOD_ST	0x02	/* 0:No, 1:Yes cable passed training */
-#define HP100_VG_RESET		0x01	/* 0:Yes, 1:No reset the 100VG MAC */
-
-
-/*
- *  MAC Configuration Register I
- * (Page MAC_CTRL, MAC_CFG_1, Offset 0x0c)
- */
-#define HP100_RX_IDLE		0x80	/* 0:Yes, 1:No currently receiving pkts */
-#define HP100_TX_IDLE		0x40	/* 0:Yes, 1:No currently Txing pkts */
-#define HP100_RX_EN		0x20	/* 1: allow receiving of pkts */
-#define HP100_TX_EN		0x10	/* 1: allow transmitting of pkts */
-#define HP100_ACC_ERRORED	0x08	/* 0:No, 1:Yes allow Rx of errored pkts */
-#define HP100_ACC_MC		0x04	/* 0:No, 1:Yes allow Rx of multicast pkts */
-#define HP100_ACC_BC		0x02	/* 0:No, 1:Yes allow Rx of broadcast pkts */
-#define HP100_ACC_PHY		0x01	/* 0:No, 1:Yes allow Rx of ALL phys. pkts */
-#define HP100_MAC1MODEMASK	0xf0	/* Hide ACC bits */
-#define HP100_MAC1MODE1		0x00	/* Receive nothing, must also disable RX */
-#define HP100_MAC1MODE2		0x00
-#define HP100_MAC1MODE3		HP100_MAC1MODE2 | HP100_ACC_BC
-#define HP100_MAC1MODE4		HP100_MAC1MODE3 | HP100_ACC_MC
-#define HP100_MAC1MODE5		HP100_MAC1MODE4	/* set mc hash to all ones also */
-#define HP100_MAC1MODE6		HP100_MAC1MODE5 | HP100_ACC_PHY	/* Promiscuous */
-/* Note MODE6 will receive all GOOD packets on the LAN. This really needs
-   a mode 7 defined to be LAN Analyzer mode, which will receive errored and
-   runt packets, and keep the CRC bytes. */
-#define HP100_MAC1MODE7		HP100_MAC1MODE6 | HP100_ACC_ERRORED
-
-/*
- *  MAC Configuration Register II
- * (Page MAC_CTRL, MAC_CFG_2, Offset 0x0d)
- */
-#define HP100_TR_MODE		0x80	/* 0:No, 1:Yes support Token Ring formats */
-#define HP100_TX_SAME		0x40	/* 0:No, 1:Yes Tx same packet continuous */
-#define HP100_LBK_XCVR		0x20	/* 0:No, 1:Yes loopback through MAC & */
-					/*   transceiver */
-#define HP100_LBK_MAC		0x10	/* 0:No, 1:Yes loopback through MAC */
-#define HP100_CRC_I		0x08	/* 0:No, 1:Yes inhibit CRC on Tx packets */
-#define HP100_ACCNA             0x04	/* 1: For 802.5: Accept only token ring
-					 * group addr that maches NA mask */
-#define HP100_KEEP_CRC		0x02	/* 0:No, 1:Yes keep CRC on Rx packets. */
-					/*   The length will reflect this. */
-#define HP100_ACCFA             0x01	/* 1: For 802.5: Accept only functional
-					 * addrs that match FA mask (page1) */
-#define HP100_MAC2MODEMASK	0x02
-#define HP100_MAC2MODE1		0x00
-#define HP100_MAC2MODE2		0x00
-#define HP100_MAC2MODE3		0x00
-#define HP100_MAC2MODE4		0x00
-#define HP100_MAC2MODE5		0x00
-#define HP100_MAC2MODE6		0x00
-#define HP100_MAC2MODE7		KEEP_CRC
-
-/*
- * MAC Configuration Register III
- * (Page MAC_CTRL, MAC_CFG_3, Offset 0x0e)
- */
-#define HP100_PACKET_PACE       0x03	/* Packet Pacing:
-					 * 00: No packet pacing
-					 * 01: 8 to 16 uS delay
-					 * 10: 16 to 32 uS delay
-					 * 11: 32 to 64 uS delay
-					 */
-#define HP100_LRF_EN            0x04	/* 1: External LAN Rcv Filter and
-					 * TCP/IP Checksumming enabled. */
-#define HP100_AUTO_MODE         0x10	/* 1: AutoSelect between 10/100 */
-
-/*
- * MAC Configuration Register IV
- * (Page MAC_CTRL, MAC_CFG_4, Offset 0x0f)
- */
-#define HP100_MAC_SEL_ST        0x01	/* (R): Status of external VGSEL
-					 * Signal, 1=100VG, 0=10Mbit sel. */
-#define HP100_LINK_FAIL_ST      0x02	/* (R): Status of Link Fail portion
-					 * of the Misc. Interrupt */
-
-/*
- *  100 MB LAN Training Request/Allowed Registers
- * (Page MAC_CTRL, TRAIN_REQUEST and TRAIN_ALLOW, Offset 0x14-0x16)(ETR parts only)
- */
-#define HP100_MACRQ_REPEATER         0x0001	/* 1: MAC tells HUB it wants to be
-						 *    a cascaded repeater
-						 * 0: ... wants to be a DTE */
-#define HP100_MACRQ_PROMSC           0x0006	/* 2 bits: Promiscious mode
-						 * 00: Rcv only unicast packets
-						 *     specifically addr to this
-						 *     endnode
-						 * 10: Rcv all pckts fwded by
-						 *     the local repeater */
-#define HP100_MACRQ_FRAMEFMT_EITHER  0x0018	/* 11: either format allowed */
-#define HP100_MACRQ_FRAMEFMT_802_3   0x0000	/* 00: 802.3 is requested */
-#define HP100_MACRQ_FRAMEFMT_802_5   0x0010	/* 10: 802.5 format is requested */
-#define HP100_CARD_MACVER            0xe000	/* R: 3 bit Cards 100VG MAC version */
-#define HP100_MALLOW_REPEATER        0x0001	/* If reset, requested access as an
-						 * end node is allowed */
-#define HP100_MALLOW_PROMSC          0x0004	/* 2 bits: Promiscious mode
-						 * 00: Rcv only unicast packets
-						 *     specifically addr to this
-						 *     endnode
-						 * 10: Rcv all pckts fwded by
-						 *     the local repeater */
-#define HP100_MALLOW_FRAMEFMT        0x00e0	/* 2 bits: Frame Format
-						 * 00: 802.3 format will be used
-						 * 10: 802.5 format will be used */
-#define HP100_MALLOW_ACCDENIED       0x0400	/* N bit */
-#define HP100_MALLOW_CONFIGURE       0x0f00	/* C bit */
-#define HP100_MALLOW_DUPADDR         0x1000	/* D bit */
-#define HP100_HUB_MACVER             0xe000	/* R: 3 bit 802.12 MAC/RMAC training */
-					     /*    protocol of repeater */
-
-/* ****************************************************************************** */
-
-/*
- *  Set/Reset bits
- */
-#define HP100_SET_HB		0x0100	/* 0:Set fields to 0 whose mask is 1 */
-#define HP100_SET_LB		0x0001	/* HB sets upper byte, LB sets lower byte */
-#define HP100_RESET_HB		0x0000	/* For readability when resetting bits */
-#define HP100_RESET_LB		0x0000	/* For readability when resetting bits */
-
-/*
- *  Misc. Constants
- */
-#define HP100_LAN_100		100	/* lan_type value for VG */
-#define HP100_LAN_10		10	/* lan_type value for 10BaseT */
-#define HP100_LAN_COAX		9	/* lan_type value for Coax */
-#define HP100_LAN_ERR		(-1)	/* lan_type value for link down */
-
-/*
- * Bus Master Data Structures  ----------------------------------------------
- */
-
-#define MAX_RX_PDL              30	/* Card limit = 31 */
-#define MAX_RX_FRAG             2	/* Don't need more... */
-#define MAX_TX_PDL              29
-#define MAX_TX_FRAG             2	/* Limit = 31 */
-
-/* Define total PDL area size in bytes (should be 4096) */
-/* This is the size of kernel (dma) memory that will be allocated. */
-#define MAX_RINGSIZE ((MAX_RX_FRAG*8+4+4)*MAX_RX_PDL+(MAX_TX_FRAG*8+4+4)*MAX_TX_PDL)+16
-
-/* Ethernet Packet Sizes */
-#define MIN_ETHER_SIZE          60
-#define MAX_ETHER_SIZE          1514	/* Needed for preallocation of */
-					/* skb buffer when busmastering */
-
-/* Tx or Rx Ring Entry */
-typedef struct hp100_ring {
-	u_int *pdl;		/* Address of PDLs PDH, dword before
-				 * this address is used for rx hdr */
-	u_int pdl_paddr;	/* Physical address of PDL */
-	struct sk_buff *skb;
-	struct hp100_ring *next;
-} hp100_ring_t;
-
-
-
-/* Mask for Header Descriptor */
-#define HP100_PKT_LEN_MASK	0x1FFF	/* AND with RxLength to get length */
-
-
-/* Receive Packet Status.  Note, the error bits are only valid if ACC_ERRORED
-   bit in the MAC Configuration Register 1 is set. */
-#define HP100_RX_PRI		0x8000	/* 0:No, 1:Yes packet is priority */
-#define HP100_SDF_ERR		0x4000	/* 0:No, 1:Yes start of frame error */
-#define HP100_SKEW_ERR		0x2000	/* 0:No, 1:Yes skew out of range */
-#define HP100_BAD_SYMBOL_ERR	0x1000	/* 0:No, 1:Yes invalid symbol received */
-#define HP100_RCV_IPM_ERR	0x0800	/* 0:No, 1:Yes pkt had an invalid packet */
-					/*   marker */
-#define HP100_SYMBOL_BAL_ERR	0x0400	/* 0:No, 1:Yes symbol balance error */
-#define HP100_VG_ALN_ERR	0x0200	/* 0:No, 1:Yes non-octet received */
-#define HP100_TRUNC_ERR		0x0100	/* 0:No, 1:Yes the packet was truncated */
-#define HP100_RUNT_ERR		0x0040	/* 0:No, 1:Yes pkt length < Min Pkt */
-					/*   Length Reg. */
-#define HP100_ALN_ERR		0x0010	/* 0:No, 1:Yes align error. */
-#define HP100_CRC_ERR		0x0008	/* 0:No, 1:Yes CRC occurred. */
-
-/* The last three bits indicate the type of destination address */
-
-#define HP100_MULTI_ADDR_HASH	0x0006	/* 110: Addr multicast, matched hash */
-#define HP100_BROADCAST_ADDR	0x0003	/* x11: Addr broadcast */
-#define HP100_MULTI_ADDR_NO_HASH 0x0002	/* 010: Addr multicast, didn't match hash */
-#define HP100_PHYS_ADDR_MATCH	0x0001	/* x01: Addr was physical and mine */
-#define HP100_PHYS_ADDR_NO_MATCH 0x0000	/* x00: Addr was physical but not mine */
-
-/*
- *  macros
- */
-
-#define hp100_inb( reg ) \
-        inb( ioaddr + HP100_REG_##reg )
-#define hp100_inw( reg ) \
-	inw( ioaddr + HP100_REG_##reg )
-#define hp100_inl( reg ) \
-	inl( ioaddr + HP100_REG_##reg )
-#define hp100_outb( data, reg ) \
-	outb( data, ioaddr + HP100_REG_##reg )
-#define hp100_outw( data, reg ) \
-	outw( data, ioaddr + HP100_REG_##reg )
-#define hp100_outl( data, reg ) \
-	outl( data, ioaddr + HP100_REG_##reg )
-#define hp100_orb( data, reg ) \
-	outb( inb( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
-#define hp100_orw( data, reg ) \
-	outw( inw( ioaddr + HP100_REG_##reg ) | (data), ioaddr + HP100_REG_##reg )
-#define hp100_andb( data, reg ) \
-	outb( inb( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
-#define hp100_andw( data, reg ) \
-	outw( inw( ioaddr + HP100_REG_##reg ) & (data), ioaddr + HP100_REG_##reg )
-
-#define hp100_page( page ) \
-	outw( HP100_PAGE_##page, ioaddr + HP100_REG_PAGING )
-#define hp100_ints_off() \
-	outw( HP100_INT_EN | HP100_RESET_LB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_ints_on() \
-	outw( HP100_INT_EN | HP100_SET_LB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_mem_map_enable() \
-	outw( HP100_MMAP_DIS | HP100_RESET_HB, ioaddr + HP100_REG_OPTION_LSW )
-#define hp100_mem_map_disable() \
-	outw( HP100_MMAP_DIS | HP100_SET_HB, ioaddr + HP100_REG_OPTION_LSW )
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
index 6e70658d50c4..db45373ea31c 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c
@@ -670,13 +670,10 @@ int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages)
 
 static int ehea_is_hugepage(unsigned long pfn)
 {
-	int page_order;
-
 	if (pfn & EHEA_HUGEPAGE_PFN_MASK)
 		return 0;
 
-	page_order = compound_order(pfn_to_page(pfn));
-	if (page_order + PAGE_SHIFT != EHEA_HUGEPAGESHIFT)
+	if (page_shift(pfn_to_page(pfn)) != EHEA_HUGEPAGESHIFT)
 		return 0;
 
 	return 1;
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 9e43c9ace9c2..2e40425d8a34 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2849,6 +2849,7 @@ static int emac_init_config(struct emac_instance *dev)
 {
 	struct device_node *np = dev->ofdev->dev.of_node;
 	const void *p;
+	int err;
 
 	/* Read config from device-tree */
 	if (emac_read_uint_prop(np, "mal-device", &dev->mal_ph, 1))
@@ -2897,8 +2898,8 @@ static int emac_init_config(struct emac_instance *dev)
 		dev->mal_burst_size = 256;
 
 	/* PHY mode needs some decoding */
-	dev->phy_mode = of_get_phy_mode(np);
-	if (dev->phy_mode < 0)
+	err = of_get_phy_mode(np, &dev->phy_mode);
+	if (err)
 		dev->phy_mode = PHY_INTERFACE_MODE_NA;
 
 	/* Check EMAC version */
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index e9cda024cbf6..89a1b0fea158 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -171,7 +171,7 @@ struct emac_instance {
 	struct mal_commac		commac;
 
 	/* PHY infos */
-	int				phy_mode;
+	phy_interface_t			phy_mode;
 	u32				phy_map;
 	u32				phy_address;
 	u32				phy_feat_exc;
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index b9e821de2ac6..57a25c7a9e70 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -78,7 +78,8 @@ static inline u32 zmii_mode_mask(int mode, int input)
 	}
 }
 
-int zmii_attach(struct platform_device *ofdev, int input, int *mode)
+int zmii_attach(struct platform_device *ofdev, int input,
+		phy_interface_t *mode)
 {
 	struct zmii_instance *dev = platform_get_drvdata(ofdev);
 	struct zmii_regs __iomem *p = dev->base;
diff --git a/drivers/net/ethernet/ibm/emac/zmii.h b/drivers/net/ethernet/ibm/emac/zmii.h
index 41d46e9b87ba..65daedc78594 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.h
+++ b/drivers/net/ethernet/ibm/emac/zmii.h
@@ -50,7 +50,8 @@ struct zmii_instance {
 
 int zmii_init(void);
 void zmii_exit(void);
-int zmii_attach(struct platform_device *ofdev, int input, int *mode);
+int zmii_attach(struct platform_device *ofdev, int input,
+		phy_interface_t *mode);
 void zmii_detach(struct platform_device *ofdev, int input);
 void zmii_get_mdio(struct platform_device *ofdev, int input);
 void zmii_put_mdio(struct platform_device *ofdev, int input);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 86493fea56e4..416da9619928 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3565,8 +3565,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 	     (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)))
 		adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
 
-	pr_info("%s changing MTU from %d to %d\n",
-		netdev->name, netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index de8c5818a305..adce7e319b9e 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -894,8 +894,9 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
 	case e1000_pch2lan:
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
-		/* fall through */
 	case e1000_pch_cnp:
+		/* fall through */
+	case e1000_pch_tgp:
 		mask |= BIT(18);
 		break;
 	default:
@@ -1559,6 +1560,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
 	switch (hw->mac.type) {
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		fext_nvm11 = er32(FEXTNVM11);
 		fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
 		ew32(FEXTNVM11, fext_nvm11);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index eff75bd8a8f0..f556163481cb 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -86,6 +86,17 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_ICP_I219_V8		0x15E0
 #define E1000_DEV_ID_PCH_ICP_I219_LM9		0x15E1
 #define E1000_DEV_ID_PCH_ICP_I219_V9		0x15E2
+#define E1000_DEV_ID_PCH_CMP_I219_LM10		0x0D4E
+#define E1000_DEV_ID_PCH_CMP_I219_V10		0x0D4F
+#define E1000_DEV_ID_PCH_CMP_I219_LM11		0x0D4C
+#define E1000_DEV_ID_PCH_CMP_I219_V11		0x0D4D
+#define E1000_DEV_ID_PCH_CMP_I219_LM12		0x0D53
+#define E1000_DEV_ID_PCH_CMP_I219_V12		0x0D55
+#define E1000_DEV_ID_PCH_TGP_I219_LM13		0x15FB
+#define E1000_DEV_ID_PCH_TGP_I219_V13		0x15FC
+#define E1000_DEV_ID_PCH_TGP_I219_LM14		0x15F9
+#define E1000_DEV_ID_PCH_TGP_I219_V14		0x15FA
+#define E1000_DEV_ID_PCH_TGP_I219_LM15		0x15F4
 
 #define E1000_REVISION_4	4
 
@@ -109,6 +120,7 @@ enum e1000_mac_type {
 	e1000_pch_lpt,
 	e1000_pch_spt,
 	e1000_pch_cnp,
+	e1000_pch_tgp,
 };
 
 enum e1000_media_type {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index a1fab77b2096..b4135c50e905 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -316,6 +316,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		if (e1000_phy_is_accessible_pchlan(hw))
 			break;
 
@@ -458,6 +459,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 		case e1000_pch_lpt:
 		case e1000_pch_spt:
 		case e1000_pch_cnp:
+		case e1000_pch_tgp:
 			/* In case the PHY needs to be in mdio slow mode,
 			 * set slow mode and try to get the PHY id again.
 			 */
@@ -700,6 +702,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 	case e1000_pchlan:
 		/* check management mode */
 		mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -1638,6 +1641,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		rc = e1000_init_phy_params_pchlan(hw);
 		break;
 	default:
@@ -2090,6 +2094,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
 		break;
 	default:
@@ -3127,6 +3132,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
 	switch (hw->mac.type) {
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		bank1_offset = nvm->flash_bank_size;
 		act_offset = E1000_ICH_NVM_SIG_WORD;
 
@@ -4070,6 +4076,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		word = NVM_COMPAT;
 		valid_csum_mask = NVM_COMPAT_VALID_CSUM;
 		break;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index d7d56e42a6aa..fe7997c18a10 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3538,6 +3538,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
 		adapter->cc.shift = shift;
 		break;
 	case e1000_pch_cnp:
+	case e1000_pch_tgp:
 		if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
 			/* Stable 24MHz frequency */
 			incperiod = INCPERIOD_24MHZ;
@@ -4049,6 +4050,8 @@ void e1000e_reset(struct e1000_adapter *adapter)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+		/* fall-through */
+	case e1000_pch_tgp:
 		fc->refresh_time = 0xFFFF;
 		fc->pause_time = 0xFFFF;
 
@@ -4715,12 +4718,12 @@ int e1000e_close(struct net_device *netdev)
 
 	pm_runtime_get_sync(&pdev->dev);
 
-	if (!test_bit(__E1000_DOWN, &adapter->state)) {
+	if (netif_device_present(netdev)) {
 		e1000e_down(adapter, true);
 		e1000_free_irq(adapter);
 
 		/* Link status message must follow this format */
-		pr_info("%s NIC Link is Down\n", adapter->netdev->name);
+		pr_info("%s NIC Link is Down\n", netdev->name);
 	}
 
 	napi_disable(&adapter->napi);
@@ -6028,7 +6031,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 		usleep_range(1000, 1100);
 	/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
 	adapter->max_frame_size = max_frame;
-	e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	pm_runtime_get_sync(netdev->dev.parent);
@@ -6294,14 +6298,188 @@ fl_out:
 	pm_runtime_put_sync(netdev->dev.parent);
 }
 
+#ifdef CONFIG_PM_SLEEP
+/* S0ix implementation */
+static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mac_data;
+	u16 phy_data;
+
+	/* Disable the periodic inband message,
+	 * don't request PCIe clock in K1 page770_17[10:9] = 10b
+	 */
+	e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+	phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
+	phy_data |= BIT(10);
+	e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+	/* Make sure we don't exit K1 every time a new packet arrives
+	 * 772_29[5] = 1 CS_Mode_Stay_In_K1
+	 */
+	e1e_rphy(hw, I217_CGFREG, &phy_data);
+	phy_data |= BIT(5);
+	e1e_wphy(hw, I217_CGFREG, phy_data);
+
+	/* Change the MAC/PHY interface to SMBus
+	 * Force the SMBus in PHY page769_23[0] = 1
+	 * Force the SMBus in MAC CTRL_EXT[11] = 1
+	 */
+	e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+	phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
+	e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+	mac_data = er32(CTRL_EXT);
+	mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
+	ew32(CTRL_EXT, mac_data);
+
+	/* DFT control: PHY bit: page769_20[0] = 1
+	 * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
+	 */
+	e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
+	phy_data |= BIT(0);
+	e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+
+	mac_data = er32(EXTCNF_CTRL);
+	mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+	ew32(EXTCNF_CTRL, mac_data);
+
+	/* Check MAC Tx/Rx packet buffer pointers.
+	 * Reset MAC Tx/Rx packet buffer pointers to suppress any
+	 * pending traffic indication that would prevent power gating.
+	 */
+	mac_data = er32(TDFH);
+	if (mac_data)
+		ew32(TDFH, 0);
+	mac_data = er32(TDFT);
+	if (mac_data)
+		ew32(TDFT, 0);
+	mac_data = er32(TDFHS);
+	if (mac_data)
+		ew32(TDFHS, 0);
+	mac_data = er32(TDFTS);
+	if (mac_data)
+		ew32(TDFTS, 0);
+	mac_data = er32(TDFPC);
+	if (mac_data)
+		ew32(TDFPC, 0);
+	mac_data = er32(RDFH);
+	if (mac_data)
+		ew32(RDFH, 0);
+	mac_data = er32(RDFT);
+	if (mac_data)
+		ew32(RDFT, 0);
+	mac_data = er32(RDFHS);
+	if (mac_data)
+		ew32(RDFHS, 0);
+	mac_data = er32(RDFTS);
+	if (mac_data)
+		ew32(RDFTS, 0);
+	mac_data = er32(RDFPC);
+	if (mac_data)
+		ew32(RDFPC, 0);
+
+	/* Enable the Dynamic Power Gating in the MAC */
+	mac_data = er32(FEXTNVM7);
+	mac_data |= BIT(22);
+	ew32(FEXTNVM7, mac_data);
+
+	/* Disable the time synchronization clock */
+	mac_data = er32(FEXTNVM7);
+	mac_data |= BIT(31);
+	mac_data &= ~BIT(0);
+	ew32(FEXTNVM7, mac_data);
+
+	/* Dynamic Power Gating Enable */
+	mac_data = er32(CTRL_EXT);
+	mac_data |= BIT(3);
+	ew32(CTRL_EXT, mac_data);
+
+	/* Enable the Dynamic Clock Gating in the DMA and MAC */
+	mac_data = er32(CTRL_EXT);
+	mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+	ew32(CTRL_EXT, mac_data);
+
+	/* No MAC DPG gating SLP_S0 in modern standby
+	 * Switch the logic of the lanphypc to use PMC counter
+	 */
+	mac_data = er32(FEXTNVM5);
+	mac_data |= BIT(7);
+	ew32(FEXTNVM5, mac_data);
+}
+
+static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mac_data;
+	u16 phy_data;
+
+	/* Disable the Dynamic Power Gating in the MAC */
+	mac_data = er32(FEXTNVM7);
+	mac_data &= 0xFFBFFFFF;
+	ew32(FEXTNVM7, mac_data);
+
+	/* Enable the time synchronization clock */
+	mac_data = er32(FEXTNVM7);
+	mac_data |= BIT(0);
+	ew32(FEXTNVM7, mac_data);
+
+	/* Disable Dynamic Power Gating */
+	mac_data = er32(CTRL_EXT);
+	mac_data &= 0xFFFFFFF7;
+	ew32(CTRL_EXT, mac_data);
+
+	/* Disable the Dynamic Clock Gating in the DMA and MAC */
+	mac_data = er32(CTRL_EXT);
+	mac_data &= 0xFFF7FFFF;
+	ew32(CTRL_EXT, mac_data);
+
+	/* Revert the lanphypc logic to use the internal Gbe counter
+	 * and not the PMC counter
+	 */
+	mac_data = er32(FEXTNVM5);
+	mac_data &= 0xFFFFFF7F;
+	ew32(FEXTNVM5, mac_data);
+
+	/* Enable the periodic inband message,
+	 * Request PCIe clock in K1 page770_17[10:9] =01b
+	 */
+	e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+	phy_data &= 0xFBFF;
+	phy_data |= HV_PM_CTRL_K1_CLK_REQ;
+	e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+	/* Return back configuration
+	 * 772_29[5] = 0 CS_Mode_Stay_In_K1
+	 */
+	e1e_rphy(hw, I217_CGFREG, &phy_data);
+	phy_data &= 0xFFDF;
+	e1e_wphy(hw, I217_CGFREG, phy_data);
+
+	/* Change the MAC/PHY interface to Kumeran
+	 * Unforce the SMBus in PHY page769_23[0] = 0
+	 * Unforce the SMBus in MAC CTRL_EXT[11] = 0
+	 */
+	e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+	phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
+	e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+	mac_data = er32(CTRL_EXT);
+	mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+	ew32(CTRL_EXT, mac_data);
+}
+#endif /* CONFIG_PM_SLEEP */
+
 static int e1000e_pm_freeze(struct device *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
+	bool present;
+
+	rtnl_lock();
 
+	present = netif_device_present(netdev);
 	netif_device_detach(netdev);
 
-	if (netif_running(netdev)) {
+	if (present && netif_running(netdev)) {
 		int count = E1000_CHECK_RESET_COUNT;
 
 		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
@@ -6313,6 +6491,8 @@ static int e1000e_pm_freeze(struct device *dev)
 		e1000e_down(adapter, false);
 		e1000_free_irq(adapter);
 	}
+	rtnl_unlock();
+
 	e1000e_reset_interrupt_capability(adapter);
 
 	/* Allow time for pending master requests to run */
@@ -6560,6 +6740,30 @@ static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
 	__e1000e_disable_aspm(pdev, state, 1);
 }
 
+static int e1000e_pm_thaw(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	e1000e_set_interrupt_capability(adapter);
+
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		rc = e1000_request_irq(adapter);
+		if (rc)
+			goto err_irq;
+
+		e1000e_up(adapter);
+	}
+
+	netif_device_attach(netdev);
+err_irq:
+	rtnl_unlock();
+
+	return rc;
+}
+
 #ifdef CONFIG_PM
 static int __e1000_resume(struct pci_dev *pdev)
 {
@@ -6627,29 +6831,12 @@ static int __e1000_resume(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int e1000e_pm_thaw(struct device *dev)
-{
-	struct net_device *netdev = dev_get_drvdata(dev);
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-
-	e1000e_set_interrupt_capability(adapter);
-	if (netif_running(netdev)) {
-		u32 err = e1000_request_irq(adapter);
-
-		if (err)
-			return err;
-
-		e1000e_up(adapter);
-	}
-
-	netif_device_attach(netdev);
-
-	return 0;
-}
-
 static int e1000e_pm_suspend(struct device *dev)
 {
+	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct e1000_hw *hw = &adapter->hw;
 	int rc;
 
 	e1000e_flush_lpic(pdev);
@@ -6660,14 +6847,25 @@ static int e1000e_pm_suspend(struct device *dev)
 	if (rc)
 		e1000e_pm_thaw(dev);
 
+	/* Introduce S0ix implementation */
+	if (hw->mac.type >= e1000_pch_cnp)
+		e1000e_s0ix_entry_flow(adapter);
+
 	return rc;
 }
 
 static int e1000e_pm_resume(struct device *dev)
 {
+	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct e1000_hw *hw = &adapter->hw;
 	int rc;
 
+	/* Introduce S0ix implementation */
+	if (hw->mac.type >= e1000_pch_cnp)
+		e1000e_s0ix_exit_flow(adapter);
+
 	rc = __e1000_resume(pdev);
 	if (rc)
 		return rc;
@@ -6818,16 +7016,11 @@ static void e1000_netpoll(struct net_device *netdev)
 static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 						pci_channel_state_t state)
 {
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-
-	netif_device_detach(netdev);
+	e1000e_pm_freeze(&pdev->dev);
 
 	if (state == pci_channel_io_perm_failure)
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	if (netif_running(netdev))
-		e1000e_down(adapter, true);
 	pci_disable_device(pdev);
 
 	/* Request a slot slot reset. */
@@ -6893,10 +7086,7 @@ static void e1000_io_resume(struct pci_dev *pdev)
 
 	e1000_init_manageability_pt(adapter);
 
-	if (netif_running(netdev))
-		e1000e_up(adapter);
-
-	netif_device_attach(netdev);
+	e1000e_pm_thaw(&pdev->dev);
 
 	/* If the controller has AMT, do not set DRV_LOAD until the interface
 	 * is up.  For all other cases, let the f/w know that the h/w is now
@@ -7407,15 +7597,13 @@ static void e1000_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
-	bool down = test_bit(__E1000_DOWN, &adapter->state);
 
 	e1000e_ptp_remove(adapter);
 
 	/* The timers may be rescheduled, so explicitly disable them
 	 * from being rescheduled.
 	 */
-	if (!down)
-		set_bit(__E1000_DOWN, &adapter->state);
+	set_bit(__E1000_DOWN, &adapter->state);
 	del_timer_sync(&adapter->phy_info_timer);
 
 	cancel_work_sync(&adapter->reset_task);
@@ -7435,9 +7623,6 @@ static void e1000_remove(struct pci_dev *pdev)
 		}
 	}
 
-	/* Don't lie to e1000_close() down the road. */
-	if (!down)
-		clear_bit(__E1000_DOWN, &adapter->state);
 	unregister_netdev(netdev);
 
 	if (pci_dev_run_wake(pdev))
@@ -7567,6 +7752,17 @@ static const struct pci_device_id e1000_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM10), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V10), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM11), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp },
 
 	{ 0, 0, 0, 0, 0, 0, 0 }	/* terminate list */
 };
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 1a4c65d9feb4..eaa5a0fb99f0 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -295,6 +295,8 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 	case e1000_pch_cnp:
+		/* fall-through */
+	case e1000_pch_tgp:
 		if ((hw->mac.type < e1000_pch_lpt) ||
 		    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
 			adapter->ptp_clock_info.max_adj = 24000000 - 1;
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 47f5ca793970..df59fd1d660c 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -18,6 +18,7 @@
 #define E1000_FEXTNVM	0x00028	/* Future Extended NVM - RW */
 #define E1000_FEXTNVM3	0x0003C	/* Future Extended NVM 3 - RW */
 #define E1000_FEXTNVM4	0x00024	/* Future Extended NVM 4 - RW */
+#define E1000_FEXTNVM5	0x00014	/* Future Extended NVM 5 - RW */
 #define E1000_FEXTNVM6	0x00010	/* Future Extended NVM 6 - RW */
 #define E1000_FEXTNVM7	0x000E4	/* Future Extended NVM 7 - RW */
 #define E1000_FEXTNVM9	0x5BB4	/* Future Extended NVM 9 - RW */
@@ -234,4 +235,7 @@
 #define E1000_RXMTRL	0x0B634	/* Time sync Rx EtherType and Msg Type - RW */
 #define E1000_RXUDP	0x0B638	/* Time Sync Rx UDP Port - RW */
 
+/* PHY registers */
+#define I82579_DFT_CTRL	PHY_REG(769, 20)
+
 #endif
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index b14441944b4b..f306084ca12c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -534,6 +534,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev);
 int fm10k_iov_resume(struct pci_dev *pdev);
 void fm10k_iov_disable(struct pci_dev *pdev);
 int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs);
+void fm10k_iov_update_stats(struct fm10k_intfc *interface);
 s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
@@ -542,6 +543,8 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
 			int __always_unused min_rate, int max_rate);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
 			    int vf_idx, struct ifla_vf_info *ivi);
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+			   int vf_idx, struct ifla_vf_stats *stats);
 
 /* DebugFS */
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index afe1fafd2447..8c50a128df29 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -520,6 +520,27 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 	return num_vfs;
 }
 
+/**
+ * fm10k_iov_update_stats - Update stats for all VFs
+ * @interface: device private structure
+ *
+ * Updates the VF statistics for all enabled VFs. Expects to be called by
+ * fm10k_update_stats and assumes that locking via the __FM10K_UPDATING_STATS
+ * bit is already handled.
+ */
+void fm10k_iov_update_stats(struct fm10k_intfc *interface)
+{
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	int i;
+
+	if (!iov_data)
+		return;
+
+	for (i = 0; i < iov_data->num_vfs; i++)
+		hw->iov.ops.update_stats(hw, iov_data->vf_info[i].stats, i);
+}
+
 static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface,
 				       struct fm10k_vf_info *vf_info)
 {
@@ -650,3 +671,30 @@ int fm10k_ndo_get_vf_config(struct net_device *netdev,
 
 	return 0;
 }
+
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+			   int vf_idx, struct ifla_vf_stats *stats)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_hw_stats_q *hw_stats;
+	u32 idx, qpp;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	qpp = fm10k_queues_per_pool(hw);
+	hw_stats = iov_data->vf_info[vf_idx].stats;
+
+	for (idx = 0; idx < qpp; idx++) {
+		stats->rx_packets += hw_stats[idx].rx_packets.count;
+		stats->tx_packets += hw_stats[idx].tx_packets.count;
+		stats->rx_bytes += hw_stats[idx].rx_bytes.count;
+		stats->tx_bytes += hw_stats[idx].tx_bytes.count;
+		stats->rx_dropped += hw_stats[idx].rx_drops.count;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 2be9222510e7..17738b0a9873 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -11,7 +11,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.26.1-k"
+#define DRV_VERSION	"0.27.1-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 09f7a246e134..68baee04dc58 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1643,6 +1643,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_set_vf_vlan	= fm10k_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= fm10k_ndo_set_vf_bw,
 	.ndo_get_vf_config	= fm10k_ndo_get_vf_config,
+	.ndo_get_vf_stats	= fm10k_ndo_get_vf_stats,
 	.ndo_udp_tunnel_add	= fm10k_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= fm10k_udp_tunnel_del,
 	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index bb236fa44048..d122d0087191 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -630,6 +630,9 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 	net_stats->rx_errors = rx_errors;
 	net_stats->rx_dropped = interface->stats.nodesc_drop.count;
 
+	/* Update VF statistics */
+	fm10k_iov_update_stats(interface);
+
 	clear_bit(__FM10K_UPDATING_STATS, interface->state);
 }
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
index 160bc5b78f99..ceb9b791f799 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
 
 #ifndef _FM10K_TLV_H_
 #define _FM10K_TLV_H_
@@ -76,8 +76,8 @@ struct fm10k_tlv_attr {
 #define FM10K_TLV_ATTR_S32(id)		    { id, FM10K_TLV_SIGNED, 4 }
 #define FM10K_TLV_ATTR_S64(id)		    { id, FM10K_TLV_SIGNED, 8 }
 #define FM10K_TLV_ATTR_LE_STRUCT(id, len)   { id, FM10K_TLV_LE_STRUCT, len }
-#define FM10K_TLV_ATTR_NESTED(id)	    { id, FM10K_TLV_NESTED }
-#define FM10K_TLV_ATTR_LAST		    { FM10K_TLV_ERROR }
+#define FM10K_TLV_ATTR_NESTED(id)	    { id, FM10K_TLV_NESTED, 0 }
+#define FM10K_TLV_ATTR_LAST		    { FM10K_TLV_ERROR, 0, 0 }
 
 struct fm10k_msg_data {
 	unsigned int		    id;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 15ac1c7885bc..63968c5d7c5d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -581,6 +581,7 @@ struct fm10k_vf_info {
 	 * at the same offset as the mailbox
 	 */
 	struct fm10k_mbx_info	mbx;		/* PF side of VF mailbox */
+	struct fm10k_hw_stats_q	stats[FM10K_MAX_QUEUES_POOL];
 	int			rate;		/* Tx BW cap as defined by OS */
 	u16			glort;		/* resource tag for this VF */
 	u16			sw_vid;		/* Switch API assigned VLAN */
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2af9f6308f84..cb6367334ca7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1118,6 +1118,7 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
 					    const u8 *macaddr);
 int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
+int i40e_count_filters(struct i40e_vsi *vsi);
 struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
 #ifdef CONFIG_I40E_DCB
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 72c04881d290..9f0a4e92a231 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -508,6 +508,59 @@ shutdown_arq_out:
 }
 
 /**
+ *  i40e_set_hw_flags - set HW flags
+ *  @hw: pointer to the hardware structure
+ **/
+static void i40e_set_hw_flags(struct i40e_hw *hw)
+{
+	struct i40e_adminq_info *aq = &hw->aq;
+
+	hw->flags = 0;
+
+	switch (hw->mac.type) {
+	case I40E_MAC_XL710:
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
+			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+			/* The ability to RX (not drop) 802.1ad frames */
+			hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+		}
+		break;
+	case I40E_MAC_X722:
+		hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
+			     I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
+			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+		/* fall through */
+	default:
+		break;
+	}
+
+	/* Newer versions of firmware require lock when reading the NVM */
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 5))
+		hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 8)) {
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
+		hw->flags |= I40E_HW_FLAG_DROP_MODE;
+	}
+
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 9))
+		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED;
+}
+
+/**
  *  i40e_init_adminq - main initialization routine for Admin Queue
  *  @hw: pointer to the hardware structure
  *
@@ -571,6 +624,11 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
 	if (ret_code != I40E_SUCCESS)
 		goto init_adminq_free_arq;
 
+	/* Some features were introduced in different FW API version
+	 * for different MAC type.
+	 */
+	i40e_set_hw_flags(hw);
+
 	/* get the NVM version info */
 	i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
 			   &hw->nvm.version);
@@ -596,25 +654,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
 		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
 	}
 
-	/* Newer versions of firmware require lock when reading the NVM */
-	if (hw->aq.api_maj_ver > 1 ||
-	    (hw->aq.api_maj_ver == 1 &&
-	     hw->aq.api_min_ver >= 5))
-		hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
-
 	/* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
 	if (hw->aq.api_maj_ver > 1 ||
 	    (hw->aq.api_maj_ver == 1 &&
 	     hw->aq.api_min_ver >= 7))
 		hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
 
-	if (hw->aq.api_maj_ver > 1 ||
-	    (hw->aq.api_maj_ver == 1 &&
-	     hw->aq.api_min_ver >= 8)) {
-		hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
-		hw->flags |= I40E_HW_FLAG_DROP_MODE;
-	}
-
 	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
 		ret_code = I40E_ERR_FIRMWARE_API_VERSION;
 		goto init_adminq_free_arq;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 69a2daaca5c5..aa5f1c0aa721 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -2251,7 +2251,13 @@ struct i40e_aqc_phy_register_access {
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL	1
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE	2
 	u8	dev_address;
-	u8	reserved1[2];
+	u8	cmd_flags;
+#define I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE	0x01
+#define I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER	0x02
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT	2
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK	(0x3 << \
+		I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT)
+	u8	reserved1;
 	__le32	reg_address;
 	__le32	reg_value;
 	u8	reserved2[4];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 7560f06768e0..d4055037af89 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -29,6 +29,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 		case I40E_DEV_ID_QSFP_C:
 		case I40E_DEV_ID_10G_BASE_T:
 		case I40E_DEV_ID_10G_BASE_T4:
+		case I40E_DEV_ID_10G_BASE_T_BC:
 		case I40E_DEV_ID_10G_B:
 		case I40E_DEV_ID_10G_SFP:
 		case I40E_DEV_ID_20G_KR2:
@@ -933,10 +934,6 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw)
 	else
 		hw->pf_id = (u8)(func_rid & 0x7);
 
-	if (hw->mac.type == I40E_MAC_X722)
-		hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
-			     I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
-
 	status = i40e_init_nvm(hw);
 	return status;
 }
@@ -1441,9 +1438,9 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
 	u32 gpio_val = 0;
 	u32 port;
 
-	if (!hw->func_caps.led[idx])
+	if (!I40E_IS_X710TL_DEVICE(hw->device_id) &&
+	    !hw->func_caps.led[idx])
 		return 0;
-
 	gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx));
 	port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >>
 		I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
@@ -1462,8 +1459,15 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
 #define I40E_FILTER_ACTIVITY 0xE
 #define I40E_LINK_ACTIVITY 0xC
 #define I40E_MAC_ACTIVITY 0xD
+#define I40E_FW_LED BIT(4)
+#define I40E_LED_MODE_VALID (I40E_GLGEN_GPIO_CTL_LED_MODE_MASK >> \
+			     I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+
 #define I40E_LED0 22
 
+#define I40E_PIN_FUNC_SDP 0x0
+#define I40E_PIN_FUNC_LED 0x1
+
 /**
  * i40e_led_get - return current on/off mode
  * @hw: pointer to the hw struct
@@ -1508,8 +1512,10 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
 {
 	int i;
 
-	if (mode & 0xfffffff0)
+	if (mode & ~I40E_LED_MODE_VALID) {
 		hw_dbg(hw, "invalid mode passed in %X\n", mode);
+		return;
+	}
 
 	/* as per the documentation GPIO 22-29 are the LED
 	 * GPIO pins named LED0..LED7
@@ -1519,6 +1525,20 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
 
 		if (!gpio_val)
 			continue;
+
+		if (I40E_IS_X710TL_DEVICE(hw->device_id)) {
+			u32 pin_func = 0;
+
+			if (mode & I40E_FW_LED)
+				pin_func = I40E_PIN_FUNC_SDP;
+			else
+				pin_func = I40E_PIN_FUNC_LED;
+
+			gpio_val &= ~I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK;
+			gpio_val |= ((pin_func <<
+				     I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) &
+				     I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK);
+		}
 		gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
 		/* this & is a bit of paranoia, but serves as a range check */
 		gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
@@ -2571,9 +2591,16 @@ noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
 		if (status)
 			return status;
 
-		hw->phy.link_info.req_fec_info =
-			abilities.fec_cfg_curr_mod_ext_info &
-			(I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS);
+		if (abilities.fec_cfg_curr_mod_ext_info &
+		    I40E_AQ_ENABLE_FEC_AUTO)
+			hw->phy.link_info.req_fec_info =
+				(I40E_AQ_REQUEST_FEC_KR |
+				 I40E_AQ_REQUEST_FEC_RS);
+		else
+			hw->phy.link_info.req_fec_info =
+				abilities.fec_cfg_curr_mod_ext_info &
+				(I40E_AQ_REQUEST_FEC_KR |
+				 I40E_AQ_REQUEST_FEC_RS);
 
 		memcpy(hw->phy.link_info.module_type, &abilities.module_type,
 		       sizeof(hw->phy.link_info.module_type));
@@ -4885,6 +4912,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
 		break;
 	case I40E_DEV_ID_10G_BASE_T:
 	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_BC:
 	case I40E_DEV_ID_10G_BASE_T_X722:
 	case I40E_DEV_ID_25G_B:
 	case I40E_DEV_ID_25G_SFP28:
@@ -5044,7 +5072,7 @@ static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
 		status =
 		       i40e_aq_get_phy_register(hw,
 						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-						I40E_PHY_COM_REG_PAGE,
+						I40E_PHY_COM_REG_PAGE, true,
 						I40E_PHY_LED_PROV_REG_1,
 						reg_val, NULL);
 	} else {
@@ -5077,7 +5105,7 @@ static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
 		status =
 		       i40e_aq_set_phy_register(hw,
 						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-						I40E_PHY_COM_REG_PAGE,
+						I40E_PHY_COM_REG_PAGE, true,
 						I40E_PHY_LED_PROV_REG_1,
 						reg_val, NULL);
 	} else {
@@ -5116,7 +5144,7 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
 		status =
 		      i40e_aq_get_phy_register(hw,
 					       I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-					       I40E_PHY_COM_REG_PAGE,
+					       I40E_PHY_COM_REG_PAGE, true,
 					       I40E_PHY_LED_PROV_REG_1,
 					       &reg_val_aq, NULL);
 		if (status == I40E_SUCCESS)
@@ -5321,20 +5349,49 @@ do_retry:
 }
 
 /**
- * i40e_aq_set_phy_register
+ * i40e_mdio_if_number_selection - MDIO I/F number selection
+ * @hw: pointer to the hw struct
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
+ * @cmd: pointer to PHY Register command structure
+ **/
+static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
+					  u8 mdio_num,
+					  struct i40e_aqc_phy_register_access *cmd)
+{
+	if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) {
+		if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED)
+			cmd->cmd_flags |=
+				I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER |
+				((mdio_num <<
+				I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) &
+				I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK);
+		else
+			i40e_debug(hw, I40E_DEBUG_PHY,
+				   "MDIO I/F number selection not supported by current FW version.\n");
+	}
+}
+
+/**
+ * i40e_aq_set_phy_register_ext
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
  * @reg_val: new register value
  * @cmd_details: pointer to command details structure or NULL
  *
  * Write the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_set_phy_register.
  **/
-i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 reg_val,
-				     struct i40e_asq_cmd_details *cmd_details)
+enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 reg_val,
+			     struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
 	struct i40e_aqc_phy_register_access *cmd =
@@ -5349,26 +5406,36 @@ i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
 	cmd->reg_address = cpu_to_le32(reg_addr);
 	cmd->reg_value = cpu_to_le32(reg_val);
 
+	i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+	if (!page_change)
+		cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
 	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
 	return status;
 }
 
 /**
- * i40e_aq_get_phy_register
+ * i40e_aq_get_phy_register_ext
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
  * @reg_addr: PHY register address
  * @reg_val: read register value
  * @cmd_details: pointer to command details structure or NULL
  *
  * Read the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_get_phy_register.
  **/
-i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 *reg_val,
-				     struct i40e_asq_cmd_details *cmd_details)
+enum i40e_status_code i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 *reg_val,
+			     struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
 	struct i40e_aqc_phy_register_access *cmd =
@@ -5382,6 +5449,11 @@ i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
 	cmd->dev_address = dev_addr;
 	cmd->reg_address = cpu_to_le32(reg_addr);
 
+	i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+	if (!page_change)
+		cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
 	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 	if (!status)
 		*reg_val = le32_to_cpu(cmd->reg_value);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 200a1cb3b536..9de503c5f99b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -889,7 +889,9 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
 
 		ret = i40e_read_nvm_module_data(hw,
 						I40E_SR_EMP_SR_SETTINGS_PTR,
-						offset, 1,
+						offset,
+						I40E_LLDP_CURRENT_STATUS_OFFSET,
+						I40E_LLDP_CURRENT_STATUS_SIZE,
 						&lldp_cfg.adminstatus);
 	} else {
 		ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 2a80c5daa376..ba86ad833bee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -32,6 +32,9 @@
 #define I40E_CEE_MAX_FEAT_TYPE		3
 #define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET	0x2B
 #define I40E_LLDP_CURRENT_STATUS_X722_OFFSET	0x31
+#define I40E_LLDP_CURRENT_STATUS_OFFSET		1
+#define I40E_LLDP_CURRENT_STATUS_SIZE		1
+
 /* Defines for LLDP TLV header */
 #define I40E_LLDP_TLV_LEN_SHIFT		0
 #define I40E_LLDP_TLV_LEN_MASK		(0x01FF << I40E_LLDP_TLV_LEN_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index bac4da031f9b..bf15a868292f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -23,6 +23,8 @@
 #define I40E_DEV_ID_10G_BASE_T_BC	0x15FF
 #define I40E_DEV_ID_10G_B		0x104F
 #define I40E_DEV_ID_10G_SFP		0x104E
+#define I40E_IS_X710TL_DEVICE(d) \
+	((d) == I40E_DEV_ID_10G_BASE_T_BC)
 #define I40E_DEV_ID_KX_X722		0x37CE
 #define I40E_DEV_ID_QSFP_X722		0x37CF
 #define I40E_DEV_ID_SFP_X722		0x37D0
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 41e1240acaea..d24d8731bef0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -722,7 +722,14 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info,
 	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
 	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
 
-	if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
+	if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) &&
+	    (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_NONE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+	} else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
 		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
 	} else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) {
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -730,12 +737,6 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info,
 	} else {
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     FEC_NONE);
-		if (I40E_AQ_SET_FEC_AUTO & req_fec_info) {
-			ethtool_link_ksettings_add_link_mode(ks, advertising,
-							     FEC_RS);
-			ethtool_link_ksettings_add_link_mode(ks, advertising,
-							     FEC_BASER);
-		}
 	}
 }
 
@@ -1437,6 +1438,7 @@ static int i40e_get_fec_param(struct net_device *netdev,
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status status = 0;
 	int err = 0;
+	u8 fec_cfg;
 
 	/* Get the current phy config */
 	memset(&abilities, 0, sizeof(abilities));
@@ -1448,18 +1450,16 @@ static int i40e_get_fec_param(struct net_device *netdev,
 	}
 
 	fecparam->fec = 0;
-	if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO)
+	fec_cfg = abilities.fec_cfg_curr_mod_ext_info;
+	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
 		fecparam->fec |= ETHTOOL_FEC_AUTO;
-	if ((abilities.fec_cfg_curr_mod_ext_info &
-	     I40E_AQ_SET_FEC_REQUEST_RS) ||
-	    (abilities.fec_cfg_curr_mod_ext_info &
-	     I40E_AQ_SET_FEC_ABILITY_RS))
+	else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS |
+		 I40E_AQ_SET_FEC_ABILITY_RS))
 		fecparam->fec |= ETHTOOL_FEC_RS;
-	if ((abilities.fec_cfg_curr_mod_ext_info &
-	     I40E_AQ_SET_FEC_REQUEST_KR) ||
-	    (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR))
+	else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR |
+		 I40E_AQ_SET_FEC_ABILITY_KR))
 		fecparam->fec |= ETHTOOL_FEC_BASER;
-	if (abilities.fec_cfg_curr_mod_ext_info == 0)
+	if (fec_cfg == 0)
 		fecparam->fec |= ETHTOOL_FEC_OFF;
 
 	if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA)
@@ -5112,7 +5112,7 @@ static int i40e_get_module_info(struct net_device *netdev,
 	case I40E_MODULE_TYPE_SFP:
 		status = i40e_aq_get_phy_register(hw,
 				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-				I40E_I2C_EEPROM_DEV_ADDR,
+				I40E_I2C_EEPROM_DEV_ADDR, true,
 				I40E_MODULE_SFF_8472_COMP,
 				&sff8472_comp, NULL);
 		if (status)
@@ -5120,7 +5120,7 @@ static int i40e_get_module_info(struct net_device *netdev,
 
 		status = i40e_aq_get_phy_register(hw,
 				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-				I40E_I2C_EEPROM_DEV_ADDR,
+				I40E_I2C_EEPROM_DEV_ADDR, true,
 				I40E_MODULE_SFF_8472_SWAP,
 				&sff8472_swap, NULL);
 		if (status)
@@ -5152,7 +5152,7 @@ static int i40e_get_module_info(struct net_device *netdev,
 		/* Read from memory page 0. */
 		status = i40e_aq_get_phy_register(hw,
 				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-				0,
+				0, true,
 				I40E_MODULE_REVISION_ADDR,
 				&sff8636_rev, NULL);
 		if (status)
@@ -5223,7 +5223,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
 
 		status = i40e_aq_get_phy_register(hw,
 				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-				addr, offset, &value, NULL);
+				true, addr, offset, &value, NULL);
 		if (status)
 			return -EIO;
 		data[i] = value;
@@ -5242,6 +5242,7 @@ static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
 }
 
 static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
+	.get_drvinfo		= i40e_get_drvinfo,
 	.set_eeprom		= i40e_set_eeprom,
 	.get_eeprom_len		= i40e_get_eeprom_len,
 	.get_eeprom		= i40e_get_eeprom,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6031223eafab..1ccabeafa44c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1110,6 +1110,25 @@ void i40e_update_stats(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_count_filters - counts VSI mac filters
+ * @vsi: the VSI to be searched
+ *
+ * Returns count of mac filters
+ **/
+int i40e_count_filters(struct i40e_vsi *vsi)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
+	int cnt = 0;
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+		++cnt;
+
+	return cnt;
+}
+
+/**
  * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
@@ -2645,8 +2664,8 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 			return -EINVAL;
 	}
 
-	netdev_info(netdev, "changing MTU from %d to %d\n",
-		    netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 	if (netif_running(netdev))
 		i40e_vsi_reinit_locked(vsi);
@@ -3534,14 +3553,14 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 		q_vector->rx.target_itr =
 			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
-		     q_vector->rx.target_itr);
+		     q_vector->rx.target_itr >> 1);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
 
 		q_vector->tx.next_update = jiffies + 1;
 		q_vector->tx.target_itr =
 			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
 		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
-		     q_vector->tx.target_itr);
+		     q_vector->tx.target_itr >> 1);
 		q_vector->tx.current_itr = q_vector->tx.target_itr;
 
 		wr32(hw, I40E_PFINT_RATEN(vector - 1),
@@ -3646,11 +3665,11 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 	/* set the ITR configuration */
 	q_vector->rx.next_update = jiffies + 1;
 	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
-	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1);
 	q_vector->rx.current_itr = q_vector->rx.target_itr;
 	q_vector->tx.next_update = jiffies + 1;
 	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
-	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1);
 	q_vector->tx.current_itr = q_vector->tx.target_itr;
 
 	i40e_enable_misc_int_causes(pf);
@@ -7168,6 +7187,7 @@ static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
 		ch->num_queue_pairs = qcnt;
 		if (!i40e_setup_channel(pf, vsi, ch)) {
 			ret = -EINVAL;
+			kfree(ch);
 			goto err_free;
 		}
 		ch->parent_vsi = vsi;
@@ -11396,7 +11416,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 
 	/* associate no queues to the misc vector */
 	wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
-	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
 
 	i40e_flush(hw);
 
@@ -12850,6 +12870,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_set_features	= i40e_set_features,
 	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
+	.ndo_get_vf_stats	= i40e_get_vf_stats,
 	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
 	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
@@ -12911,6 +12932,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 			  NETIF_F_GSO_IPXIP6		|
 			  NETIF_F_GSO_UDP_TUNNEL	|
 			  NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+			  NETIF_F_GSO_UDP_L4		|
 			  NETIF_F_SCTP_CRC		|
 			  NETIF_F_RXHASH		|
 			  NETIF_F_RXCSUM		|
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index e4d8d20baf3b..7164f4ad8120 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -323,20 +323,24 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 
 /**
  * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location
- * @hw: pointer to the HW structure
+ * @hw: Pointer to the HW structure
  * @module_ptr: Pointer to module in words with respect to NVM beginning
- * @offset: offset in words from module start
+ * @module_offset: Offset in words from module start
+ * @data_offset: Offset in words from reading data area start
  * @words_data_size: Words to read from NVM
  * @data_ptr: Pointer to memory location where resulting buffer will be stored
  **/
-i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
-				      u8 module_ptr, u16 offset,
-				      u16 words_data_size,
-				      u16 *data_ptr)
+enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+						u8 module_ptr,
+						u16 module_offset,
+						u16 data_offset,
+						u16 words_data_size,
+						u16 *data_ptr)
 {
 	i40e_status status;
+	u16 specific_ptr = 0;
 	u16 ptr_value = 0;
-	u32 flat_offset;
+	u32 offset = 0;
 
 	if (module_ptr != 0) {
 		status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
@@ -352,36 +356,35 @@ i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
 
 	/* Pointer not initialized */
 	if (ptr_value == I40E_NVM_INVALID_PTR_VAL ||
-	    ptr_value == I40E_NVM_INVALID_VAL)
+	    ptr_value == I40E_NVM_INVALID_VAL) {
+		i40e_debug(hw, I40E_DEBUG_ALL, "Pointer not initialized.\n");
 		return I40E_ERR_BAD_PTR;
+	}
 
 	/* Check whether the module is in SR mapped area or outside */
 	if (ptr_value & I40E_PTR_TYPE) {
 		/* Pointer points outside of the Shared RAM mapped area */
-		ptr_value &= ~I40E_PTR_TYPE;
+		i40e_debug(hw, I40E_DEBUG_ALL,
+			   "Reading nvm data failed. Pointer points outside of the Shared RAM mapped area.\n");
 
-		/* PtrValue in 4kB units, need to convert to words */
-		ptr_value /= 2;
-		flat_offset = ((u32)ptr_value * 0x1000) + (u32)offset;
-		status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-		if (!status) {
-			status = i40e_aq_read_nvm(hw, 0, 2 * flat_offset,
-						  2 * words_data_size,
-						  data_ptr, true, NULL);
-			i40e_release_nvm(hw);
-			if (status) {
-				i40e_debug(hw, I40E_DEBUG_ALL,
-					   "Reading nvm aq failed.Error code: %d.\n",
-					   status);
-				return I40E_ERR_NVM;
-			}
-		} else {
-			return I40E_ERR_NVM;
-		}
+		return I40E_ERR_PARAM;
 	} else {
 		/* Read from the Shadow RAM */
-		status = i40e_read_nvm_buffer(hw, ptr_value + offset,
-					      &words_data_size, data_ptr);
+
+		status = i40e_read_nvm_word(hw, ptr_value + module_offset,
+					    &specific_ptr);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm word failed.Error code: %d.\n",
+				   status);
+			return I40E_ERR_NVM;
+		}
+
+		offset = ptr_value + module_offset + specific_ptr +
+			data_offset;
+
+		status = i40e_read_nvm_buffer(hw, offset, &words_data_size,
+					      data_ptr);
 		if (status) {
 			i40e_debug(hw, I40E_DEBUG_ALL,
 				   "Reading nvm buffer failed.Error code: %d.\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 5250441bf75b..bbb478f09093 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -315,10 +315,12 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 void i40e_release_nvm(struct i40e_hw *hw);
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 					 u16 *data);
-i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw,
-				      u8 module_ptr, u16 offset,
-				      u16 words_data_size,
-				      u16 *data_ptr);
+enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+						u8 module_ptr,
+						u16 module_offset,
+						u16 data_offset,
+						u16 words_data_size,
+						u16 *data_ptr);
 i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 				 u16 *words, u16 *data);
 i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
@@ -409,14 +411,24 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
 				u32 reg_addr, u32 reg_val,
 				struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
-i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 reg_val,
-				     struct i40e_asq_cmd_details *cmd_details);
-i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
-				     u8 phy_select, u8 dev_addr,
-				     u32 reg_addr, u32 *reg_val,
-				     struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 reg_val,
+			     struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 *reg_val,
+			     struct i40e_asq_cmd_details *cmd_details);
+
+/* Convenience wrappers for most common use case */
+#define i40e_aq_set_phy_register(hw, ps, da, pc, ra, rv, cd)		\
+	i40e_aq_set_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
+#define i40e_aq_get_phy_register(hw, ps, da, pc, ra, rv, cd)		\
+	i40e_aq_get_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
 
 i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
 					    u16 reg, u8 phy_addr, u16 *value);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e3f29dc8b290..b8496037ef7f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2960,10 +2960,16 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
 
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
-	/* compute length of segmentation header */
-	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen));
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+	}
 
 	/* pull values out of skb_shinfo */
 	gso_size = skb_shinfo(skb)->gso_size;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index b43ec94a0f29..6ea2867ff60f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -624,6 +624,7 @@ struct i40e_hw {
 #define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
 #define I40E_HW_FLAG_FW_LLDP_STOPPABLE      BIT_ULL(4)
 #define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
 #define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
 	u64 flags;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 3d2440838822..6a3f0fc56c3b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -955,7 +955,6 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 		i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
 		vf->lan_vsi_idx = 0;
 		vf->lan_vsi_id = 0;
-		vf->num_mac = 0;
 	}
 
 	/* do the accounting and remove additional ADq VSI's */
@@ -2548,20 +2547,12 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
 					   struct virtchnl_ether_addr_list *al)
 {
 	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	int mac2add_cnt = 0;
 	int i;
 
-	/* If this VF is not privileged, then we can't add more than a limited
-	 * number of addresses. Check to make sure that the additions do not
-	 * push us over the limit.
-	 */
-	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-	    (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
-		dev_err(&pf->pdev->dev,
-			"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
-		return -EPERM;
-	}
-
 	for (i = 0; i < al->num_elements; i++) {
+		struct i40e_mac_filter *f;
 		u8 *addr = al->list[i].addr;
 
 		if (is_broadcast_ether_addr(addr) ||
@@ -2585,8 +2576,24 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
 				"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
 			return -EPERM;
 		}
+
+		/*count filters that really will be added*/
+		f = i40e_find_mac(vsi, addr);
+		if (!f)
+			++mac2add_cnt;
 	}
 
+	/* If this VF is not privileged, then we can't add more than a limited
+	 * number of addresses. Check to make sure that the additions do not
+	 * push us over the limit.
+	 */
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+	    (i40e_count_filters(vsi) + mac2add_cnt) >
+		    I40E_VC_MAX_MAC_ADDR_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+		return -EPERM;
+	}
 	return 0;
 }
 
@@ -2640,8 +2647,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 				ret = I40E_ERR_PARAM;
 				spin_unlock_bh(&vsi->mac_filter_hash_lock);
 				goto error_param;
-			} else {
-				vf->num_mac++;
 			}
 		}
 	}
@@ -2689,16 +2694,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 			ret = I40E_ERR_INVALID_MAC_ADDR;
 			goto error_param;
 		}
-
-		if (vf->pf_set_mac &&
-		    ether_addr_equal(al->list[i].addr,
-				     vf->default_lan_addr.addr)) {
-			dev_err(&pf->pdev->dev,
-				"MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n",
-				vf->default_lan_addr.addr, vf->vf_id);
-			ret = I40E_ERR_PARAM;
-			goto error_param;
-		}
 	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
@@ -2709,8 +2704,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 			ret = I40E_ERR_INVALID_MAC_ADDR;
 			spin_unlock_bh(&vsi->mac_filter_hash_lock);
 			goto error_param;
-		} else {
-			vf->num_mac--;
 		}
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -4531,3 +4524,51 @@ out:
 	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
 	return ret;
 }
+
+/**
+ * i40e_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-127)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+		      struct ifla_vf_stats *vf_stats)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_eth_stats *stats;
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+
+	/* validate the request */
+	if (i40e_validate_vf(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi)
+		return -EINVAL;
+
+	i40e_update_eth_stats(vsi);
+	stats = &vsi->eth_stats;
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+
+	vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+		stats->rx_multicast;
+	vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+		stats->tx_multicast;
+	vf_stats->rx_bytes   = stats->rx_bytes;
+	vf_stats->tx_bytes   = stats->tx_bytes;
+	vf_stats->broadcast  = stats->rx_broadcast;
+	vf_stats->multicast  = stats->rx_multicast;
+	vf_stats->rx_dropped = stats->rx_discards;
+	vf_stats->tx_dropped = stats->tx_discards;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 7164b9bb294f..631248c0981a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -101,7 +101,6 @@ struct i40e_vf {
 	bool link_up;		/* only valid if VF link is forced */
 	bool queues_enabled;	/* true if the VF queues are enabled */
 	bool spoofchk;
-	u16 num_mac;
 	u16 num_vlan;
 
 	/* ADq related variables */
@@ -139,5 +138,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
 
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+		      struct ifla_vf_stats *vf_stats);
 
 #endif /* _I40E_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 9edde960b4f2..7cb829132d28 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -13,9 +13,12 @@ ice-y := ice_main.o	\
 	 ice_nvm.o	\
 	 ice_switch.o	\
 	 ice_sched.o	\
+	 ice_base.o	\
 	 ice_lib.o	\
+	 ice_txrx_lib.o	\
 	 ice_txrx.o	\
 	 ice_flex_pipe.o	\
 	 ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
-ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 45e100666049..8d7e8fc55585 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -29,10 +29,13 @@
 #include <linux/ip.h>
 #include <linux/sctp.h>
 #include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
+#include <linux/bpf.h>
 #include <linux/avf/virtchnl.h>
 #include <net/ipv6.h>
+#include <net/xdp_sock.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
@@ -42,6 +45,7 @@
 #include "ice_sched.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
+#include "ice_xsk.h"
 
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
@@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
-			(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
 
 #define ICE_UP_TABLE_TRANSLATE(val, i) \
 		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -127,6 +130,14 @@ extern const char ice_drv_ver[];
 				     ICE_PROMISC_VLAN_TX  | \
 				     ICE_PROMISC_VLAN_RX)
 
+struct ice_txq_meta {
+	u32 q_teid;	/* Tx-scheduler element identifier */
+	u16 q_id;	/* Entry in VSI's txq_map bitmap */
+	u16 q_handle;	/* Relative index of Tx queue within TC */
+	u16 vsi_idx;	/* VSI index that Tx queue belongs to */
+	u8 tc;		/* TC number that Tx queue belongs to */
+};
+
 struct ice_tc_info {
 	u16 qoffset;
 	u16 qcount_tx;
@@ -169,6 +180,7 @@ enum ice_state {
 	__ICE_NEEDS_RESTART,
 	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
 	__ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
+	__ICE_DCBNL_DEVRESET,		/* set by dcbnl devreset */
 	__ICE_PFR_REQ,			/* set by driver and peers */
 	__ICE_CORER_REQ,		/* set by driver and peers */
 	__ICE_GLOBR_REQ,		/* set by driver and peers */
@@ -274,6 +286,13 @@ struct ice_vsi {
 	u16 num_rx_desc;
 	u16 num_tx_desc;
 	struct ice_tc_cfg tc_cfg;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring **xdp_rings;	 /* XDP ring array */
+	u16 num_xdp_txq;		 /* Used XDP queues */
+	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	struct xdp_umem **xsk_umems;
+	u16 num_xsk_umems_used;
+	u16 num_xsk_umems;
 } ____cacheline_internodealigned_in_smp;
 
 /* struct that defines an interrupt vector */
@@ -313,6 +332,7 @@ enum ice_pf_flags {
 	ICE_FLAG_NO_MEDIA,
 	ICE_FLAG_FW_LLDP_AGENT,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
+	ICE_FLAG_LEGACY_RX,
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
 
@@ -346,6 +366,7 @@ struct ice_pf {
 	struct work_struct serv_task;
 	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
+	struct mutex tc_mutex;		/* lock to protect TC changes */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
 	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
@@ -417,6 +438,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
 	return np->vsi->back;
 }
 
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+	return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+	ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+	struct xdp_umem **umems = ring->vsi->xsk_umems;
+	int qid = ring->q_index;
+
+	if (ice_ring_is_xdp(ring))
+		qid -= ring->vsi->num_xdp_txq;
+
+	if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+		return NULL;
+
+	return umems[qid];
+}
+
 /**
  * ice_get_main_vsi - Get the PF VSI
  * @pf: PF instance
@@ -443,14 +495,15 @@ int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
 int ice_vsi_cfg(struct ice_vsi *vsi);
 struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
-#endif /* CONFIG_DCB */
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 023e3d2fee5f..5421fc413f94 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -742,6 +742,10 @@ struct ice_aqc_add_elem {
 	struct ice_aqc_txsched_elem_data generic[1];
 };
 
+struct ice_aqc_conf_elem {
+	struct ice_aqc_txsched_elem_data generic[1];
+};
+
 struct ice_aqc_get_elem {
 	struct ice_aqc_txsched_elem_data generic[1];
 };
@@ -783,6 +787,44 @@ struct ice_aqc_port_ets_elem {
 	__le32 tc_node_teid[8]; /* Used for response, reserved in command */
 };
 
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+	__le16 num_profiles;
+	__le16 num_processed; /* Only for response. Reserved in Command. */
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+	u8 level;
+	u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S	0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M	(0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR	0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR	1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL	2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S	0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M	(0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+	__le16 profile_id;
+	__le16 max_burst_size;
+	__le16 rl_multiply;
+	__le16 wake_up_calc;
+	__le16 rl_encode;
+};
+
+struct ice_aqc_rl_profile_generic_elem {
+	struct ice_aqc_rl_profile_elem generic[1];
+};
+
 /* Query Scheduler Resource Allocation (indirect 0x0412)
  * This indirect command retrieves the scheduler resources allocated by
  * EMP Firmware to the given PF.
@@ -1044,6 +1086,10 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
 #define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
 #define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT	BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT	BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA	BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA	BIT(7)
 	u8 reserved1;
 	u8 link_info;
 #define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
@@ -1147,6 +1193,33 @@ struct ice_aqc_set_port_id_led {
 	u8 rsvd[13];
 };
 
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID	BIT(0)
+	__le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M	0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M	0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M	BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT	0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT	ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S	11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M	(0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE	0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH	1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH	2
+#define ICE_AQC_SFF_IS_WRITE		BIT(15)
+	__le16 i2c_mem_addr;
+	__le16 eeprom_page;
+#define  ICE_AQC_SFF_EEPROM_BANK_S 0
+#define  ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define  ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define  ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -1618,6 +1691,7 @@ struct ice_aq_desc {
 		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_set_phy_cfg set_phy;
 		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_sff_eeprom read_write_sff_param;
 		struct ice_aqc_set_port_id_led set_port_id_led;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
 		struct ice_aqc_sw_rules sw_rules;
@@ -1625,6 +1699,7 @@ struct ice_aq_desc {
 		struct ice_aqc_sched_elem_cmd sched_elem_cmd;
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_query_port_ets port_ets;
+		struct ice_aqc_rl_profile rl_profile;
 		struct ice_aqc_nvm nvm;
 		struct ice_aqc_nvm_checksum nvm_checksum;
 		struct ice_aqc_pf_vf_msg virt;
@@ -1726,12 +1801,15 @@ enum ice_adminq_opc {
 	/* transmit scheduler commands */
 	ice_aqc_opc_get_dflt_topo			= 0x0400,
 	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_cfg_sched_elems			= 0x0403,
 	ice_aqc_opc_get_sched_elems			= 0x0404,
 	ice_aqc_opc_suspend_sched_elems			= 0x0409,
 	ice_aqc_opc_resume_sched_elems			= 0x040A,
 	ice_aqc_opc_query_port_ets			= 0x040E,
 	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_add_rl_profiles			= 0x0410,
 	ice_aqc_opc_query_sched_res			= 0x0412,
+	ice_aqc_opc_remove_rl_profiles			= 0x0415,
 
 	/* PHY commands */
 	ice_aqc_opc_get_phy_caps			= 0x0600,
@@ -1741,6 +1819,7 @@ enum ice_adminq_opc {
 	ice_aqc_opc_set_event_mask			= 0x0613,
 	ice_aqc_opc_set_mac_lb				= 0x0620,
 	ice_aqc_opc_set_port_id_led			= 0x06E9,
+	ice_aqc_opc_sff_eeprom				= 0x06EE,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000000..69d2da14fe5c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,857 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_base.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+	int offset, i;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+					    0, qs_cfg->q_count, 0);
+	if (offset >= qs_cfg->pf_map_size) {
+		mutex_unlock(qs_cfg->qs_mutex);
+		return -ENOMEM;
+	}
+
+	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+	for (i = 0; i < qs_cfg->q_count; i++)
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+	int i, index = 0;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	for (i = 0; i < qs_cfg->q_count; i++) {
+		index = find_next_zero_bit(qs_cfg->pf_map,
+					   qs_cfg->pf_map_size, index);
+		if (index >= qs_cfg->pf_map_size)
+			goto err_scatter;
+		set_bit(index, qs_cfg->pf_map);
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+err_scatter:
+	for (index = 0; index < i; index++) {
+		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+			      QRX_CTRL_QENA_STAT_M))
+			return 0;
+
+		usleep_range(20, 40);
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	if (vsi->type == ICE_VSI_VF)
+		goto out;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* This will not be called in the driver load path because the netdev
+	 * will not be created yet. All other cases with register the NAPI
+	 * handler here (i.e. resume, reset/rebuild, etc.)
+	 */
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
+
+out:
+	/* tie q_vector and VSI together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
+			v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(&pf->pdev->dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+	u32 regval = rd32(hw, GLINT_CTL);
+
+	/* no need to update global register if ITR gran is already set */
+	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+		return;
+
+	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+		  GLINT_CTL_ITR_GRAN_200_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+		  GLINT_CTL_ITR_GRAN_100_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+		  GLINT_CTL_ITR_GRAN_50_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+		  GLINT_CTL_ITR_GRAN_25_M);
+	wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+	WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+		  "XDP ring can't belong to TC other than 0");
+
+	/* Idea here for calculation is that we subtract the number of queue
+	 * count from TC that ring belongs to from it's absolute queue index
+	 * and as a result we get the queue's index within TC.
+	 */
+	return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	ice_set_cgd_num(tlan_ctx, ring);
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* Firmware expects vmvf_num to be absolute VF ID */
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+	struct ice_vsi *vsi = ring->vsi;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	struct ice_hw *hw;
+	u32 regval;
+	u16 pf_q;
+	int err;
+
+	hw = &vsi->back->hw;
+
+	/* what is Rx queue number in global space of 2K Rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	ring->rx_buf_len = vsi->rx_buf_len;
+
+	if (ring->vsi->type == ICE_VSI_PF) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->q_index);
+
+		ring->xsk_umem = ice_xsk_umem(ring);
+		if (ring->xsk_umem) {
+			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+			ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+					   XDP_PACKET_HEADROOM;
+			/* For AF_XDP ZC, we disallow packets to span on
+			 * multiple buffers, thus letting us skip that
+			 * handling in the fast-path.
+			 */
+			chain_len = 1;
+			ring->zca.free = ice_zca_free;
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_ZERO_COPY,
+							 &ring->zca);
+			if (err)
+				return err;
+
+			dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+				 ring->q_index);
+		} else {
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+				xdp_rxq_info_reg(&ring->xdp_rxq,
+						 ring->netdev,
+						 ring->q_index);
+
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (err)
+				return err;
+		}
+	}
+	/* Receive Queue Base Address.
+	 * Indicates the starting address of the descriptor queue defined in
+	 * 128 Byte units.
+	 */
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+			       chain_len * ring->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	 /* Enable Flexible Descriptors in the queue context which
+	  * allows this driver to select a specific receive descriptor format
+	  */
+	if (vsi->type != ICE_VSI_VF) {
+		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+			QRXFLXP_CNTXT_RXDID_IDX_M;
+
+		/* increasing context priority to pick up profile ID;
+		 * default is 0x01; setting to 0x03 to ensure profile
+		 * is programming if prev context is of same priority
+		 */
+		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+			QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+	}
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	/* configure Rx buffer alignment */
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		ice_clear_ring_build_skb_ena(ring);
+	else
+		ice_set_ring_build_skb_ena(ring);
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+
+	err = ring->xsk_umem ?
+	      ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+	      ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+	if (err)
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+			 ring->xsk_umem ? "UMEM enabled " : "",
+			 ring->q_index, pf_q);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+	int ret = 0;
+
+	ret = __ice_vsi_get_qs_contig(qs_cfg);
+	if (ret) {
+		/* contig failed, so try with scatter approach */
+		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+					qs_cfg->scatter_count);
+		ret = __ice_vsi_get_qs_sc(qs_cfg);
+	}
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int ret = 0;
+	u32 rx_reg;
+
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
+
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+	/* wait for the change to finish */
+	ret = ice_pf_rxq_wait(pf, pf_q, ena);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"VSI idx %d Rx ring %d %sable timeout\n",
+			vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+	return ret;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int v_idx = 0, num_q_vectors;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+			vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	num_q_vectors = vsi->num_q_vectors;
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(&pf->pdev->dev,
+		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	int tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_vector->tx.itr_idx = ICE_TX_ITR;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_vector->rx.itr_idx = ICE_RX_ITR;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	u8 buf_len = sizeof(*qg_buf);
+	enum ice_status status;
+	u16 pf_q;
+	u8 tc;
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+				 1, qg_buf, buf_len, NULL);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set LAN Tx queue context, error: %d\n",
+			status);
+		return -ENODEV;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	ice_cfg_itr_gran(hw);
+
+	if (q_vector->num_ring_rx) {
+		struct ice_ring_container *rc = &q_vector->rx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_RX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+
+	if (q_vector->num_ring_tx) {
+		struct ice_ring_container *rc = &q_vector->tx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_TX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+		wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+		     val);
+	}
+	ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 val;
+
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
+
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector)
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == ICE_ERR_RESET_ONGOING) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n", status);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	u8 tc;
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	txq_meta->vsi_idx = vsi->idx;
+	txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000000..407995e8e944
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3a6b3950eb0e..36be501ae623 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -855,6 +855,9 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 		goto err_unroll_sched;
 	}
 	INIT_LIST_HEAD(&hw->agg_list);
+	/* Initialize max burst size */
+	if (!hw->max_burst_size)
+		ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
 
 	status = ice_init_fltr_mgmt_struct(hw);
 	if (status)
@@ -1067,6 +1070,72 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
 }
 
 /**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type)
+{
+	enum ice_status status;
+	u16 pfa_len, pfa_ptr;
+	u16 next_tlv;
+
+	status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+		return status;
+	}
+	/* Starting with first TLV after PFA length, iterate through the list
+	 * of TLVs to find the requested one.
+	 */
+	next_tlv = pfa_ptr + 1;
+	while (next_tlv < pfa_ptr + pfa_len) {
+		u16 tlv_sub_module_type;
+		u16 tlv_len;
+
+		/* Read TLV type */
+		status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+			break;
+		}
+		/* Read TLV length */
+		status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+			break;
+		}
+		if (tlv_sub_module_type == module_type) {
+			if (tlv_len) {
+				*module_tlv = next_tlv;
+				*module_tlv_len = tlv_len;
+				return 0;
+			}
+			return ICE_ERR_INVAL_SIZE;
+		}
+		/* Check next TLV, i.e. current TLV pointer + length + 2 words
+		 * (for current TLV's type and length)
+		 */
+		next_tlv = next_tlv + tlv_len + 2;
+	}
+	/* Module does not exist */
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
  * ice_copy_rxq_ctx_to_hw
  * @hw: pointer to the hardware structure
  * @ice_rxq_ctx: pointer to the rxq context
@@ -1182,56 +1251,6 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
 	{ 0 }
 };
 
-/**
- * ice_debug_cq
- * @hw: pointer to the hardware structure
- * @mask: debug mask
- * @desc: pointer to control queue descriptor
- * @buf: pointer to command buffer
- * @buf_len: max length of buf
- *
- * Dumps debug log about control command with descriptor contents.
- */
-void
-ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf,
-	     u16 buf_len)
-{
-	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
-	u16 len;
-
-#ifndef CONFIG_DYNAMIC_DEBUG
-	if (!(mask & hw->debug_mask))
-		return;
-#endif
-
-	if (!desc)
-		return;
-
-	len = le16_to_cpu(cq_desc->datalen);
-
-	ice_debug(hw, mask,
-		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
-		  le16_to_cpu(cq_desc->opcode),
-		  le16_to_cpu(cq_desc->flags),
-		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
-	ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->cookie_high),
-		  le32_to_cpu(cq_desc->cookie_low));
-	ice_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->params.generic.param0),
-		  le32_to_cpu(cq_desc->params.generic.param1));
-	ice_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->params.generic.addr_high),
-		  le32_to_cpu(cq_desc->params.generic.addr_low));
-	if (buf && cq_desc->datalen != 0) {
-		ice_debug(hw, mask, "Buffer:\n");
-		if (buf_len < len)
-			len = buf_len;
-
-		ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
-	}
-}
-
 /* FW Admin Queue command wrappers */
 
 /* Software lock/mutex that is meant to be held while the Global Config Lock
@@ -2556,6 +2575,52 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 }
 
 /**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sff_eeprom *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!data || (mem_addr & 0xff00))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+	cmd = &desc.params.read_write_sff_param;
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
+	cmd->lport_num = (u8)(lport & 0xff);
+	cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+	cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+					 ICE_AQC_SFF_I2CBUS_7BIT_M) |
+					((set_page <<
+					  ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+					 ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+	cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+	cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+	if (write)
+		cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+	return status;
+}
+
+/**
  * __ice_aq_get_set_rss_lut
  * @hw: pointer to the hardware structure
  * @vsi_id: VSI FW index
@@ -3148,7 +3213,7 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
  * @tc: TC number
  * @q_handle: software queue handle
  */
-static struct ice_q_ctx *
+struct ice_q_ctx *
 ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
 {
 	struct ice_vsi_ctx *vsi;
@@ -3245,9 +3310,12 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
 	node.node_teid = buf->txqs[0].q_teid;
 	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
 	q_ctx->q_handle = q_handle;
+	q_ctx->q_teid = le32_to_cpu(node.node_teid);
 
-	/* add a leaf node into schduler tree queue layer */
+	/* add a leaf node into scheduler tree queue layer */
 	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+	if (!status)
+		status = ice_sched_replay_q_bw(pi, q_ctx);
 
 ena_txq_exit:
 	mutex_unlock(&pi->sched_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index c3df92f57777..b22aa561e253 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -6,16 +6,18 @@
 
 #include "ice.h"
 #include "ice_type.h"
+#include "ice_nvm.h"
 #include "ice_flex_pipe.h"
 #include "ice_switch.h"
 #include <linux/avf/virtchnl.h>
 
 enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
 
-void
-ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
 enum ice_status ice_init_hw(struct ice_hw *hw);
 void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type);
 enum ice_status ice_check_reset(struct ice_hw *hw);
 enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
 enum ice_status ice_create_all_ctrlq(struct ice_hw *hw);
@@ -117,6 +119,10 @@ ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
 enum ice_status
 ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 		       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd);
 
 enum ice_status
 ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
@@ -133,6 +139,8 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
 enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
 void ice_replay_post(struct ice_hw *hw);
 void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
 void
 ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
 		  u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 2353166c654e..dd946866d7b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -810,6 +810,52 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 }
 
 /**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+	u16 len;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+	    !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+		return;
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC,
+		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+	}
+}
+
+/**
  * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
  * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
@@ -934,10 +980,10 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	}
 
 	/* Debug desc and buffer */
-	ice_debug(hw, ICE_DBG_AQ_MSG,
+	ice_debug(hw, ICE_DBG_AQ_DESC,
 		  "ATQ: Control Send queue desc and buffer:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
+	ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
 
 	(cq->sq.next_to_use)++;
 	if (cq->sq.next_to_use == cq->sq.count)
@@ -948,7 +994,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		if (ice_sq_done(hw, cq))
 			break;
 
-		mdelay(1);
+		udelay(ICE_CTL_Q_SQ_CMD_USEC);
 		total_delay++;
 	} while (total_delay < cq->sq_cmd_timeout);
 
@@ -971,7 +1017,8 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		retval = le16_to_cpu(desc->retval);
 		if (retval) {
 			ice_debug(hw, ICE_DBG_AQ_MSG,
-				  "Control Send Queue command completed with error 0x%x\n",
+				  "Control Send Queue command 0x%04X completed with error 0x%X\n",
+				  le16_to_cpu(desc->opcode),
 				  retval);
 
 			/* strip off FW internal code */
@@ -986,7 +1033,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	ice_debug(hw, ICE_DBG_AQ_MSG,
 		  "ATQ: desc and buffer writeback:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
+	ice_debug_cq(hw, (void *)desc, buf, buf_size);
 
 	/* save writeback AQ if requested */
 	if (details->wb_desc)
@@ -1075,7 +1122,8 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	if (flags & ICE_AQ_FLAG_ERR) {
 		ret_code = ICE_ERR_AQ_ERROR;
 		ice_debug(hw, ICE_DBG_AQ_MSG,
-			  "Control Receive Queue Event received with error 0x%x\n",
+			  "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+			  le16_to_cpu(desc->opcode),
 			  cq->rq_last_status);
 	}
 	memcpy(&e->desc, desc, sizeof(e->desc));
@@ -1084,10 +1132,9 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	if (e->msg_buf && e->msg_len)
 		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
 
-	ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
+	ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
-		     cq->rq_buf_size);
+	ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
 
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message size
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 44945c2165d8..4df9da359135 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -31,8 +31,9 @@ enum ice_ctl_q {
 	ICE_CTL_Q_MAILBOX,
 };
 
-/* Control Queue default settings */
-#define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
+/* Control Queue timeout settings - max delay 250ms */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
+#define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
 
 struct ice_ctl_q_ring {
 	void *dma_head;			/* Virtual address to DMA head */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index dd7efff121bd..713e8a892e14 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -965,9 +965,9 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
 	    pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
 		/* Get current DCBX configuration */
 		ret = ice_get_dcb_cfg(pi);
-		pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM);
 		if (ret)
 			return ret;
+		pi->is_sw_lldp = false;
 	} else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
 		return ICE_ERR_NOT_READY;
 	}
@@ -975,8 +975,8 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
 	/* Configure the LLDP MIB change event */
 	if (enable_mib_change) {
 		ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
-		if (!ret)
-			pi->is_sw_lldp = false;
+		if (ret)
+			pi->is_sw_lldp = true;
 	}
 
 	return ret;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index dd47869c4ad4..1150dbd98d0b 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -2,6 +2,9 @@
 /* Copyright (c) 2019, Intel Corporation. */
 
 #include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+
+static void ice_pf_dcb_recfg(struct ice_pf *pf);
 
 /**
  * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
@@ -100,6 +103,16 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
 }
 
 /**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+	return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
  * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
  * @vsi: VSI owner of rings being updated
  */
@@ -138,56 +151,24 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
 }
 
 /**
- * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
- * @pf: pointer to the PF struct
- *
- * Assumed caller has already disabled all VSIs before
- * calling this function. Reconfiguring DCB based on
- * local_dcbx_cfg.
- */
-static void ice_pf_dcb_recfg(struct ice_pf *pf)
-{
-	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
-	u8 tc_map = 0;
-	int v, ret;
-
-	/* Update each VSI */
-	ice_for_each_vsi(pf, v) {
-		if (!pf->vsi[v])
-			continue;
-
-		if (pf->vsi[v]->type == ICE_VSI_PF)
-			tc_map = ice_dcb_get_ena_tc(dcbcfg);
-		else
-			tc_map = ICE_DFLT_TRAFFIC_CLASS;
-
-		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
-		if (ret) {
-			dev_err(&pf->pdev->dev,
-				"Failed to config TC for VSI index: %d\n",
-				pf->vsi[v]->idx);
-			continue;
-		}
-
-		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
-	}
-}
-
-/**
  * ice_pf_dcb_cfg - Apply new DCB configuration
  * @pf: pointer to the PF struct
  * @new_cfg: DCBX config to apply
  * @locked: is the RTNL held
  */
-static
 int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 {
-	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
 	struct ice_aqc_port_ets_elem buf = { 0 };
-	int ret = 0;
+	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+	int ret = ICE_DCB_NO_HW_CHG;
+	struct ice_vsi *pf_vsi;
 
 	curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
 
+	/* FW does not care if change happened */
+	if (!pf->hw.port_info->is_sw_lldp)
+		ret = ICE_DCB_HW_CHG_RST;
+
 	/* Enable DCB tagging only when more than one TC */
 	if (ice_dcb_get_num_tc(new_cfg) > 1) {
 		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
@@ -203,18 +184,28 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 	}
 
 	/* Store old config in case FW config fails */
-	old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL);
-	memcpy(old_cfg, curr_cfg, sizeof(*old_cfg));
+	old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+	if (!old_cfg)
+		return -ENOMEM;
+
+	dev_info(&pf->pdev->dev, "Commit DCB Configuration to the hardware\n");
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(&pf->pdev->dev, "PF VSI doesn't exist\n");
+		ret = -EINVAL;
+		goto free_cfg;
+	}
 
 	/* avoid race conditions by holding the lock while disabling and
 	 * re-enabling the VSI
 	 */
 	if (!locked)
 		rtnl_lock();
-	ice_pf_dis_all_vsi(pf, true);
+	ice_dis_vsi(pf_vsi, true);
 
 	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
 	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+	memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
 
 	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
 	 * the new config came from the HW in the first place.
@@ -238,10 +229,11 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 	ice_pf_dcb_recfg(pf);
 
 out:
-	ice_pf_ena_all_vsi(pf, true);
+	ice_ena_vsi(pf_vsi, true);
 	if (!locked)
 		rtnl_unlock();
-	devm_kfree(&pf->pdev->dev, old_cfg);
+free_cfg:
+	kfree(old_cfg);
 	return ret;
 }
 
@@ -437,9 +429,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
 /**
  * ice_dcb_sw_default_config - Apply a default DCB config
  * @pf: PF to apply config to
+ * @ets_willing: configure ets willing
  * @locked: was this function called with RTNL held
  */
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
 {
 	struct ice_aqc_port_ets_elem buf = { 0 };
 	struct ice_dcbx_cfg *dcbcfg;
@@ -454,7 +447,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 	memset(dcbcfg, 0, sizeof(*dcbcfg));
 	memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
 
-	dcbcfg->etscfg.willing = 1;
+	dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
 	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
 	dcbcfg->etscfg.tcbwtable[0] = 100;
 	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
@@ -480,6 +473,104 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 }
 
 /**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+	u8 max_tc = 0;
+	int i;
+
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+		u8 cur_tc = prio_table[i];
+
+		if (cur_tc > max_tc)
+			return false;
+		else if (cur_tc == max_tc)
+			max_tc++;
+	}
+
+	return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+	int ret;
+
+	/* Configure SW DCB default with ETS non-willing */
+	ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set local DCB config %d\n", ret);
+		return ret;
+	}
+
+	/* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+	dcbcfg->etscfg.willing = 1;
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret)
+		dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n");
+
+	return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+static void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+	u8 tc_map = 0;
+	int v, ret;
+
+	/* Update each VSI */
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+
+		if (pf->vsi[v]->type == ICE_VSI_PF) {
+			tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+			/* If DCBX request non-contiguous TC, then configure
+			 * default TC
+			 */
+			if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+				tc_map = ICE_DFLT_TRAFFIC_CLASS;
+				ice_dcb_noncontig_cfg(pf);
+			}
+		} else {
+			tc_map = ICE_DFLT_TRAFFIC_CLASS;
+		}
+
+		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"Failed to config TC for VSI index: %d\n",
+				pf->vsi[v]->idx);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+		if (pf->vsi[v]->type == ICE_VSI_PF)
+			ice_dcbnl_set_all(pf->vsi[v]);
+	}
+}
+
+/**
  * ice_init_pf_dcb - initialize DCB for a PF
  * @pf: PF to initialize DCB for
  * @locked: Was function called with RTNL held
@@ -503,11 +594,13 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 		 "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
 		 pf->hw.func_caps.common_cap.maxtc);
 	if (err) {
+		struct ice_vsi *pf_vsi;
+
 		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
 		dev_info(&pf->pdev->dev,
 			 "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
 		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
-		err = ice_dcb_sw_dflt_cfg(pf, locked);
+		err = ice_dcb_sw_dflt_cfg(pf, true, locked);
 		if (err) {
 			dev_err(&pf->pdev->dev,
 				"Failed to set local DCB config %d\n", err);
@@ -515,6 +608,19 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 			goto dcb_init_err;
 		}
 
+		/* If the FW DCBX engine is not running then Rx LLDP packets
+		 * need to be redirected up the stack.
+		 */
+		pf_vsi = ice_get_main_vsi(pf);
+		if (!pf_vsi) {
+			dev_err(&pf->pdev->dev,
+				"Failed to set local DCB config\n");
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		ice_cfg_sw_lldp(pf_vsi, false, true);
+
 		pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
 		return 0;
 	}
@@ -627,6 +733,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 	struct ice_dcbx_cfg tmp_dcbx_cfg;
 	bool need_reconfig = false;
 	struct ice_port_info *pi;
+	struct ice_vsi *pf_vsi;
 	u8 type;
 	int ret;
 
@@ -686,6 +793,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 
 	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
 					   &pi->local_dcbx_cfg);
+	ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
 	if (!need_reconfig)
 		return;
 
@@ -698,8 +806,14 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	}
 
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(&pf->pdev->dev, "PF VSI doesn't exist\n");
+		return;
+	}
+
 	rtnl_lock();
-	ice_pf_dis_all_vsi(pf, true);
+	ice_dis_vsi(pf_vsi, true);
 
 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
 	if (ret) {
@@ -711,6 +825,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 	/* changes in configuration update VSI */
 	ice_pf_dcb_recfg(pf);
 
-	ice_pf_ena_all_vsi(pf, true);
+	ice_ena_vsi(pf_vsi, true);
 	rtnl_unlock();
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 661a6f7bca64..e90e25b7da77 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -5,14 +5,21 @@
 #define _ICE_DCB_LIB_H_
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 #ifdef CONFIG_DCB
-#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+#define ICE_TC_MAX_BW		100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST	0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG	1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG		2 /* DCB configuration changed, no reset */
 
 void ice_dcb_rebuild(struct ice_pf *pf);
 u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
 int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
 void ice_update_dcb_stats(struct ice_pf *pf);
@@ -41,6 +48,13 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
 	return 1;
 }
 
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+	       int __always_unused queue_index)
+{
+	return 0;
+}
+
 static inline int
 ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
 {
@@ -49,6 +63,14 @@ ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
 }
 
 static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+	       struct ice_dcbx_cfg __always_unused *new_cfg,
+	       bool __always_unused locked)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
 ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
 			      struct ice_tx_buf __always_unused *first)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 000000000000..3c90fc0a3feb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,933 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+#define ICE_APP_PROT_ID_ROCE	0x8915
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	while (ice_is_reset_in_progress(pf->state))
+		usleep_range(1000, 2000);
+
+	set_bit(__ICE_DCBNL_DEVRESET, pf->state);
+	dev_close(netdev);
+	netdev_state_change(netdev);
+	dev_open(netdev, NULL);
+	netdev_state_change(netdev);
+	clear_bit(__ICE_DCBNL_DEVRESET, pf->state);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+	dcbxcfg = &pi->local_dcbx_cfg;
+
+	ets->willing = dcbxcfg->etscfg.willing;
+	ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+	ets->cbs = dcbxcfg->etscfg.cbs;
+	memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+	memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+	       sizeof(ets->tc_reco_bw));
+	memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+	       sizeof(ets->tc_reco_tsa));
+	memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+	       sizeof(ets->reco_prio_tc));
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int bwcfg = 0, bwrec = 0;
+	int err, i, max_tc = 0;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg->etscfg.willing = ets->willing;
+	new_cfg->etscfg.cbs = ets->cbs;
+	ice_for_each_traffic_class(i) {
+		new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+		bwcfg += ets->tc_tx_bw[i];
+		new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+		new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+		new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+		bwrec += ets->tc_reco_bw[i];
+		new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+		new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+	}
+
+	/* max_tc is a 1-8 value count of number of TC's, not a 0-7 value
+	 * for the TC's index number.  Add one to value if not zero, and
+	 * for zero set it to the FW's default value
+	 */
+	if (max_tc)
+		max_tc++;
+	else
+		max_tc = IEEE_8021QAZ_MAX_TCS;
+
+	new_cfg->etscfg.maxtcs = max_tc;
+
+	if (!bwcfg)
+		new_cfg->etscfg.tcbwtable[0] = 100;
+
+	if (!bwrec)
+		new_cfg->etsrec.tcbwtable[0] = 100;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return -EINVAL;
+
+	*num = IEEE_8021QAZ_MAX_TCS;
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	/* No support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Already set to the given mode no change */
+	if (mode == pf->dcbx_cap)
+		return ICE_DCB_NO_HW_CHG;
+
+	pf->dcbx_cap = mode;
+	if (mode & DCB_CAP_DCBX_VER_CEE)
+		pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+	else
+		pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+	dev_info(&pf->pdev->dev, "DCBx mode = 0x%x\n", mode);
+	return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = pi->mac.perm_addr[i];
+
+	for (j = 0; j < netdev->addr_len; j++, i++)
+		perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+	u32 val;
+
+	val = rd32(hw, PRTDCB_GENC);
+	*delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	struct ice_dcbx_cfg *dcbxcfg;
+	int i;
+
+	dcbxcfg = &pi->local_dcbx_cfg;
+	pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+	pfc->pfc_en = dcbxcfg->pfc.pfcena;
+	pfc->mbc = dcbxcfg->pfc.mbc;
+	ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+	ice_for_each_traffic_class(i) {
+		pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+		pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	if (pfc->pfc_cap)
+		new_cfg->pfc.pfccap = pfc->pfc_cap;
+	else
+		new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+	new_cfg->pfc.pfcena = pfc->pfc_en;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+	dev_dbg(&pf->pdev->dev,
+		"Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+		prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+	if (set)
+		new_cfg->pfc.pfcena |= BIT(prio);
+	else
+		new_cfg->pfc.pfcena &= ~BIT(prio);
+
+	dev_dbg(&pf->pdev->dev, "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+		prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	/* Return enabled if any UP enabled for PFC */
+	if (pi->local_dcbx_cfg.pfc.pfcena)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	u8 state = 0;
+
+	state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+	dev_dbg(&pf->pdev->dev, "DCB enabled state = %d\n", state);
+	return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Nothing to do */
+	if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (state) {
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+		memcpy(&pf->hw.port_info->desired_dcbx_cfg,
+		       &pf->hw.port_info->local_dcbx_cfg,
+		       sizeof(struct ice_dcbx_cfg));
+	} else {
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+	dev_dbg(&pf->pdev->dev,
+		"Get PG config prio=%d tc=%d\n", prio, *pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused bwg_id,
+			   u8 __always_unused bw_pct, u8 up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int i;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (tc >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	/* prio_type, bwg_id and bw_pct per UP are not supported */
+
+	ice_for_each_traffic_class(i) {
+		if (up_map & BIT(i))
+			new_cfg->etscfg.prio_table[i] = tc;
+	}
+	new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	*bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid];
+	dev_dbg(&pf->pdev->dev, "Get PG BW config tc=%d bw_pct=%d\n",
+		pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	*bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return ICE_DCB_NO_HW_CHG;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = pf->dcbx_cap;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	dev_dbg(&pf->pdev->dev, "DCBX Get Capability cap=%d capval=0x%x\n",
+		capid, *cap);
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+		   struct ice_dcb_app_priority_table *app)
+{
+	int i;
+
+	for (i = 0; i < cfg->numapps; i++) {
+		if (app->selector == cfg->app[i].selector &&
+		    app->prot_id == cfg->app[i].prot_id &&
+		    app->priority == cfg->app[i].priority)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcb_app_priority_table new_app;
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	int ret;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
+		ret = -EINVAL;
+		goto setapp_out;
+	}
+
+	ret = dcb_ieee_setapp(netdev, app);
+	if (ret)
+		goto setapp_out;
+
+	new_app.selector = app->selector;
+	new_app.prot_id = app->protocol;
+	new_app.priority = app->priority;
+	if (ice_dcbnl_find_app(old_cfg, &new_app)) {
+		ret = 0;
+		goto setapp_out;
+	}
+
+	new_cfg->app[new_cfg->numapps++] = new_app;
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+setapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	int i, j, ret = 0;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+	ret = dcb_ieee_delapp(netdev, app);
+	if (ret)
+		goto delapp_out;
+
+	old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	if (old_cfg->numapps == 1)
+		goto delapp_out;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	for (i = 1; i < new_cfg->numapps; i++) {
+		if (app->selector == new_cfg->app[i].selector &&
+		    app->protocol == new_cfg->app[i].prot_id &&
+		    app->priority == new_cfg->app[i].priority) {
+			new_cfg->app[i].selector = 0;
+			new_cfg->app[i].prot_id = 0;
+			new_cfg->app[i].priority = 0;
+			break;
+		}
+	}
+
+	/* Did not find DCB App */
+	if (i == new_cfg->numapps) {
+		ret = -EINVAL;
+		goto delapp_out;
+	}
+
+	new_cfg->numapps--;
+
+	for (j = i; j < new_cfg->numapps; j++) {
+		new_cfg->app[i].selector = old_cfg->app[j + 1].selector;
+		new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id;
+		new_cfg->app[i].priority = old_cfg->app[j + 1].priority;
+	}
+
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+delapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+	mutex_unlock(&pf->tc_mutex);
+	return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	/* IEEE 802.1Qaz std */
+	.ieee_getets = ice_dcbnl_getets,
+	.ieee_setets = ice_dcbnl_setets,
+	.ieee_getpfc = ice_dcbnl_getpfc,
+	.ieee_setpfc = ice_dcbnl_setpfc,
+	.ieee_setapp = ice_dcbnl_setapp,
+	.ieee_delapp = ice_dcbnl_delapp,
+
+	/* CEE std */
+	.getstate = ice_dcbnl_getstate,
+	.setstate = ice_dcbnl_setstate,
+	.getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+	.getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg = ice_dcbnl_set_pfc_cfg,
+	.getpfccfg = ice_dcbnl_get_pfc_cfg,
+	.setall = ice_dcbnl_cee_set_all,
+	.getcap = ice_dcbnl_get_cap,
+	.getnumtcs = ice_dcbnl_getnumtcs,
+	.getpfcstate = ice_dcbnl_getpfcstate,
+	.getapp = ice_dcbnl_getapp,
+
+	/* DCBX configuration */
+	.getdcbx = ice_dcbnl_getdcbx,
+	.setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct dcb_app sapp;
+	struct ice_pf *pf;
+	int i;
+
+	if (!netdev)
+		return;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+
+	/* SW DCB taken care of by SW Default Config */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+		return;
+
+	/* DCB not enabled */
+	if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return;
+
+	dcbxcfg = &pi->local_dcbx_cfg;
+
+	for (i = 0; i < dcbxcfg->numapps; i++) {
+		u8 prio, tc_map;
+
+		prio = dcbxcfg->app[i].priority;
+		tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+		/* Add APP only if the TC is enabled for this VSI */
+		if (tc_map & vsi->tc_cfg.ena_tc) {
+			sapp.selector = dcbxcfg->app[i].selector;
+			sapp.protocol = dcbxcfg->app[i].prot_id;
+			sapp.priority = prio;
+			dcb_ieee_setapp(netdev, &sapp);
+		}
+	}
+	/* Notify user-space of the changes */
+	dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+		      struct ice_dcb_app_priority_table *app)
+{
+	struct dcb_app sapp;
+	int err;
+
+	sapp.selector = app->selector;
+	sapp.protocol = app->prot_id;
+	sapp.priority = app->priority;
+	err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+	dev_dbg(&vsi->back->pdev->dev,
+		"Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+		vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg)
+{
+	struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+	int i;
+
+	if (!main_vsi)
+		return;
+
+	for (i = 0; i < old_cfg->numapps; i++) {
+		struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+		/* The APP is not available anymore delete it */
+		if (!ice_dcbnl_find_app(new_cfg, &app))
+			ice_dcbnl_vsi_del_app(main_vsi, &app);
+	}
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return;
+
+	netdev->dcbnl_ops = &dcbnl_ops;
+	ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 000000000000..6c630a362293
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg);
+#else
+#define ice_dcbnl_setup(vsi) do {} while (0)
+#define ice_dcbnl_set_all(vsi) do {} while (0)
+#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0)
+#endif /* CONFIG_DCB */
+
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7e23034df955..1f00091f7906 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -156,6 +156,7 @@ struct ice_priv_flag {
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
 	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
 	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+	ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
 };
 
 #define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -623,7 +624,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
 			continue;
 
 		rx_buf = &rx_ring->rx_buf[i];
-		received_buf = page_address(rx_buf->page);
+		received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
 
 		if (ice_lbtest_check_frame(received_buf))
 			valid_frames++;
@@ -1205,11 +1206,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			status = ice_init_pf_dcb(pf, true);
 			if (status)
 				dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
-
-			/* Forward LLDP packets to default VSI so that they
-			 * are passed up the stack
-			 */
-			ice_cfg_sw_lldp(vsi, false, true);
 		} else {
 			enum ice_status status;
 			bool dcbx_agent_status;
@@ -1256,6 +1252,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 					"Fail to enable MIB change events\n");
 		}
 	}
+	if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+		/* down and up VSI so that changes of Rx cfg are reflected. */
+		ice_down(vsi);
+		ice_up(vsi);
+	}
 	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
 	return ret;
 }
@@ -2577,6 +2578,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 {
 	struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_ring *xdp_rings = NULL;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	int i, timeout = 50, err = 0;
@@ -2611,6 +2613,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 		return 0;
 	}
 
+	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (ice_xsk_any_rx_ring_ena(vsi))
+		return -EBUSY;
+
 	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout)
@@ -2624,6 +2633,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 			vsi->tx_rings[i]->count = new_tx_cnt;
 		for (i = 0; i < vsi->alloc_rxq; i++)
 			vsi->rx_rings[i]->count = new_rx_cnt;
+		if (ice_is_xdp_ena_vsi(vsi))
+			for (i = 0; i < vsi->num_xdp_txq; i++)
+				vsi->xdp_rings[i]->count = new_tx_cnt;
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
 		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
 		goto done;
 	}
@@ -2635,14 +2649,14 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
 		    vsi->tx_rings[0]->count, new_tx_cnt);
 
-	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_txq,
 				sizeof(*tx_rings), GFP_KERNEL);
 	if (!tx_rings) {
 		err = -ENOMEM;
 		goto done;
 	}
 
-	for (i = 0; i < vsi->alloc_txq; i++) {
+	ice_for_each_txq(vsi, i) {
 		/* clone ring and setup updated count */
 		tx_rings[i] = *vsi->tx_rings[i];
 		tx_rings[i].count = new_tx_cnt;
@@ -2650,15 +2664,43 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 		tx_rings[i].tx_buf = NULL;
 		err = ice_setup_tx_ring(&tx_rings[i]);
 		if (err) {
-			while (i) {
-				i--;
+			while (i--)
 				ice_clean_tx_ring(&tx_rings[i]);
-			}
 			devm_kfree(&pf->pdev->dev, tx_rings);
 			goto done;
 		}
 	}
 
+	if (!ice_is_xdp_ena_vsi(vsi))
+		goto process_rx;
+
+	/* alloc updated XDP resources */
+	netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+		    vsi->xdp_rings[0]->count, new_tx_cnt);
+
+	xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+				 sizeof(*xdp_rings), GFP_KERNEL);
+	if (!xdp_rings) {
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		/* clone ring and setup updated count */
+		xdp_rings[i] = *vsi->xdp_rings[i];
+		xdp_rings[i].count = new_tx_cnt;
+		xdp_rings[i].desc = NULL;
+		xdp_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&xdp_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&xdp_rings[i]);
+			devm_kfree(&pf->pdev->dev, xdp_rings);
+			goto free_tx;
+		}
+		ice_set_ring_xdp(&xdp_rings[i]);
+	}
+
 process_rx:
 	if (new_rx_cnt == vsi->rx_rings[0]->count)
 		goto process_link;
@@ -2667,14 +2709,14 @@ process_rx:
 	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
 		    vsi->rx_rings[0]->count, new_rx_cnt);
 
-	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_rxq,
 				sizeof(*rx_rings), GFP_KERNEL);
 	if (!rx_rings) {
 		err = -ENOMEM;
 		goto done;
 	}
 
-	for (i = 0; i < vsi->alloc_rxq; i++) {
+	ice_for_each_rxq(vsi, i) {
 		/* clone ring and setup updated count */
 		rx_rings[i] = *vsi->rx_rings[i];
 		rx_rings[i].count = new_rx_cnt;
@@ -2712,7 +2754,7 @@ process_link:
 		ice_down(vsi);
 
 		if (tx_rings) {
-			for (i = 0; i < vsi->alloc_txq; i++) {
+			ice_for_each_txq(vsi, i) {
 				ice_free_tx_ring(vsi->tx_rings[i]);
 				*vsi->tx_rings[i] = tx_rings[i];
 			}
@@ -2720,7 +2762,7 @@ process_link:
 		}
 
 		if (rx_rings) {
-			for (i = 0; i < vsi->alloc_rxq; i++) {
+			ice_for_each_rxq(vsi, i) {
 				ice_free_rx_ring(vsi->rx_rings[i]);
 				/* copy the real tail offset */
 				rx_rings[i].tail = vsi->rx_rings[i]->tail;
@@ -2737,6 +2779,16 @@ process_link:
 			devm_kfree(&pf->pdev->dev, rx_rings);
 		}
 
+		if (xdp_rings) {
+			for (i = 0; i < vsi->num_xdp_txq; i++) {
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+				*vsi->xdp_rings[i] = xdp_rings[i];
+			}
+			devm_kfree(&pf->pdev->dev, xdp_rings);
+		}
+
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
 		ice_up(vsi);
 	}
 	goto done;
@@ -2744,7 +2796,7 @@ process_link:
 free_tx:
 	/* error cleanup if the Rx allocations failed after getting Tx */
 	if (tx_rings) {
-		for (i = 0; i < vsi->alloc_txq; i++)
+		ice_for_each_txq(vsi, i)
 			ice_free_tx_ring(&tx_rings[i]);
 		devm_kfree(&pf->pdev->dev, tx_rings);
 	}
@@ -3398,6 +3450,151 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
 	return __ice_set_coalesce(netdev, ec, q_num);
 }
 
+#define ICE_I2C_EEPROM_DEV_ADDR		0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2	0xA2
+#define ICE_MODULE_TYPE_SFP		0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS	0x0D
+#define ICE_MODULE_TYPE_QSFP28		0x11
+#define ICE_MODULE_SFF_ADDR_MODE	0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB	0x40
+#define ICE_MODULE_REVISION_ADDR	0x01
+#define ICE_MODULE_SFF_8472_COMP	0x5E
+#define ICE_MODULE_SFF_8472_SWAP	0x5C
+#define ICE_MODULE_QSFP_MAX_LEN		640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+		    struct ethtool_modinfo *modinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 sff8472_comp = 0;
+	u8 sff8472_swap = 0;
+	u8 sff8636_rev = 0;
+	u8 value = 0;
+
+	status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+				   0, &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	switch (value) {
+	case ICE_MODULE_TYPE_SFP:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+					   &sff8472_comp, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+					   &sff8472_swap, 1, 0, NULL);
+		if (status)
+			return -EIO;
+
+		if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp &&
+			   (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		}
+		break;
+	case ICE_MODULE_TYPE_QSFP_PLUS:
+	case ICE_MODULE_TYPE_QSFP28:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_REVISION_ADDR, 0x00, 0,
+					   &sff8636_rev, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		/* Check revision compliance */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	default:
+		netdev_warn(netdev,
+			    "SFF Module Type not recognized.\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+		      struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	bool is_sfp = false;
+	u16 offset = 0;
+	u8 value = 0;
+	u8 page = 0;
+	int i;
+
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
+				   &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	if (value == ICE_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	for (i = 0; i < ee->len; i++) {
+		offset = i + ee->offset;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = ICE_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				page++;
+			}
+		}
+
+		status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
+					   &value, 1, 0, NULL);
+		if (status)
+			value = 0;
+		data[i] = value;
+	}
+	return 0;
+}
+
 static const struct ethtool_ops ice_ethtool_ops = {
 	.get_link_ksettings	= ice_get_link_ksettings,
 	.set_link_ksettings	= ice_set_link_ksettings,
@@ -3433,6 +3630,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.set_per_queue_coalesce = ice_set_per_q_coalesce,
 	.get_fecparam		= ice_get_fecparam,
 	.set_fecparam		= ice_set_fecparam,
+	.get_module_info	= ice_get_module_info,
+	.get_module_eeprom	= ice_get_module_eeprom,
 };
 
 static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 152fbd556e9b..e8f32350fed2 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -52,6 +52,9 @@
 #define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_MBX_ATQT				0x0022E300
+#define PRTDCB_GENC				0x00083000
+#define PRTDCB_GENC_PFCLDA_S			16
+#define PRTDCB_GENC_PFCLDA_M			ICE_M(0xFFFF, 16)
 #define PRTDCB_GENS				0x00083020
 #define PRTDCB_GENS_DCBX_STATUS_S		0
 #define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 2aac8f13daeb..ad34f22d44ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -211,7 +211,7 @@ enum ice_flex_rx_mdid {
 /* Rx/Tx Flag64 packet flag bits */
 enum ice_flg64_bits {
 	ICE_FLG_PKT_DSI		= 0,
-	ICE_FLG_EVLAN_x8100	= 15,
+	ICE_FLG_EVLAN_x8100	= 14,
 	ICE_FLG_EVLAN_x9100,
 	ICE_FLG_VLAN_x8100,
 	ICE_FLG_TNL_MAC		= 22,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index cc755382df25..d71f7ce0a265 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2,232 +2,26 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
 /**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
- */
-static int ice_setup_rx_ctx(struct ice_ring *ring)
-{
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-	u32 rxdid = ICE_RXDID_FLEX_NIC;
-	struct ice_rlan_ctx rlan_ctx;
-	u32 regval;
-	u16 pf_q;
-	int err;
-
-	/* what is Rx queue number in global space of 2K Rx queues */
-	pf_q = vsi->rxq_map[ring->q_index];
-
-	/* clear the context structure first */
-	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
-	rlan_ctx.base = ring->dma >> 7;
-
-	rlan_ctx.qlen = ring->count;
-
-	/* Receive Packet Data Buffer Size.
-	 * The Packet Data Buffer Size is defined in 128 byte units.
-	 */
-	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
-	/* use 32 byte descriptors */
-	rlan_ctx.dsize = 1;
-
-	/* Strip the Ethernet CRC bytes before the packet is posted to host
-	 * memory.
-	 */
-	rlan_ctx.crcstrip = 1;
-
-	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
-	rlan_ctx.l2tsel = 1;
-
-	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
-	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
-	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
-	/* This controls whether VLAN is stripped from inner headers
-	 * The VLAN in the inner L2 header is stripped to the receive
-	 * descriptor if enabled by this flag.
-	 */
-	rlan_ctx.showiv = 0;
-
-	/* Max packet size for this queue - must not be set to a larger value
-	 * than 5 x DBUF
-	 */
-	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
-			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
-	/* Rx queue threshold in units of 64 */
-	rlan_ctx.lrxqthresh = 1;
-
-	 /* Enable Flexible Descriptors in the queue context which
-	  * allows this driver to select a specific receive descriptor format
-	  */
-	if (vsi->type != ICE_VSI_VF) {
-		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
-		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
-			QRXFLXP_CNTXT_RXDID_IDX_M;
-
-		/* increasing context priority to pick up profile ID;
-		 * default is 0x01; setting to 0x03 to ensure profile
-		 * is programming if prev context is of same priority
-		 */
-		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
-			QRXFLXP_CNTXT_RXDID_PRIO_M;
-
-		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
-	}
-
-	/* Absolute queue number out of 2K needs to be passed */
-	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
-	if (err) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
-			pf_q, err);
-		return -EIO;
-	}
-
-	if (vsi->type == ICE_VSI_VF)
-		return 0;
-
-	/* init queue specific tail register */
-	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
-	writel(0, ring->tail);
-	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
-	return 0;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @type: VSI type enum
  */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+const char *ice_vsi_type_str(enum ice_vsi_type type)
 {
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-
-	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
-	tlan_ctx->port_num = vsi->port_info->lport;
-
-	/* Transmit Queue Length */
-	tlan_ctx->qlen = ring->count;
-
-	ice_set_cgd_num(tlan_ctx, ring);
-
-	/* PF number */
-	tlan_ctx->pf_num = hw->pf_id;
-
-	/* queue belongs to a specific VSI type
-	 * VF / VM index should be programmed per vmvf_type setting:
-	 * for vmvf_type = VF, it is VF number between 0-256
-	 * for vmvf_type = VM, it is VM number between 0-767
-	 * for PF or EMP this field should be set to zero
-	 */
-	switch (vsi->type) {
-	case ICE_VSI_LB:
-		/* fall through */
+	switch (type) {
 	case ICE_VSI_PF:
-		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
-		break;
+		return "ICE_VSI_PF";
 	case ICE_VSI_VF:
-		/* Firmware expects vmvf_num to be absolute VF ID */
-		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
-		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
-		break;
+		return "ICE_VSI_VF";
+	case ICE_VSI_LB:
+		return "ICE_VSI_LB";
 	default:
-		return;
+		return "unknown";
 	}
-
-	/* make sure the context is associated with the right VSI */
-	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
-
-	tlan_ctx->tso_ena = ICE_TX_LEGACY;
-	tlan_ctx->tso_qnum = pf_q;
-
-	/* Legacy or Advanced Host Interface:
-	 * 0: Advanced Host Interface
-	 * 1: Legacy Host Interface
-	 */
-	tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
-	int i;
-
-	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
-		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
-			      QRX_CTRL_QENA_STAT_M))
-			return 0;
-
-		usleep_range(20, 40);
-	}
-
-	return -ETIMEDOUT;
-}
-
-/**
- * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
- * @vsi: the VSI being configured
- * @ena: start or stop the Rx rings
- * @rxq_idx: Rx queue index
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
-{
-	int pf_q = vsi->rxq_map[rxq_idx];
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	int ret = 0;
-	u32 rx_reg;
-
-	rx_reg = rd32(hw, QRX_CTRL(pf_q));
-
-	/* Skip if the queue is already in the requested state */
-	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-		return 0;
-
-	/* turn on/off the queue */
-	if (ena)
-		rx_reg |= QRX_CTRL_QENA_REQ_M;
-	else
-		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
-	wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
-	/* wait for the change to finish */
-	ret = ice_pf_rxq_wait(pf, pf_q, ena);
-	if (ret)
-		dev_err(&pf->pdev->dev,
-			"VSI idx %d Rx ring %d %sable timeout\n",
-			vsi->idx, pf_q, (ena ? "en" : "dis"));
-
-	return ret;
 }
 
 /**
@@ -270,7 +64,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->rx_rings)
 		goto err_rings;
 
-	vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+	/* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+	vsi->txq_map = devm_kcalloc(&pf->pdev->dev, (2 * vsi->alloc_txq),
 				    sizeof(*vsi->txq_map), GFP_KERNEL);
 
 	if (!vsi->txq_map)
@@ -281,7 +76,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->rxq_map)
 		goto err_rxq_map;
 
-
 	/* There is no need to allocate q_vectors for a loopback VSI. */
 	if (vsi->type == ICE_VSI_LB)
 		return 0;
@@ -606,88 +400,6 @@ unlock_pf:
 }
 
 /**
- * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @qs_cfg: gathered variables needed for PF->VSI queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
-{
-	int offset, i;
-
-	mutex_lock(qs_cfg->qs_mutex);
-	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
-					    0, qs_cfg->q_count, 0);
-	if (offset >= qs_cfg->pf_map_size) {
-		mutex_unlock(qs_cfg->qs_mutex);
-		return -ENOMEM;
-	}
-
-	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
-	for (i = 0; i < qs_cfg->q_count; i++)
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return 0;
-}
-
-/**
- * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
-{
-	int i, index = 0;
-
-	mutex_lock(qs_cfg->qs_mutex);
-	for (i = 0; i < qs_cfg->q_count; i++) {
-		index = find_next_zero_bit(qs_cfg->pf_map,
-					   qs_cfg->pf_map_size, index);
-		if (index >= qs_cfg->pf_map_size)
-			goto err_scatter;
-		set_bit(index, qs_cfg->pf_map);
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
-	}
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return 0;
-err_scatter:
-	for (index = 0; index < i; index++) {
-		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
-		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
-	}
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return -ENOMEM;
-}
-
-/**
- * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * This function first tries to find contiguous space. If it is not successful,
- * it tries with the scatter approach.
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
-{
-	int ret = 0;
-
-	ret = __ice_vsi_get_qs_contig(qs_cfg);
-	if (ret) {
-		/* contig failed, so try with scatter approach */
-		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
-		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
-					qs_cfg->scatter_count);
-		ret = __ice_vsi_get_qs_sc(qs_cfg);
-	}
-	return ret;
-}
-
-/**
  * ice_vsi_get_qs - Assign queues from PF to VSI
  * @vsi: the VSI to assign queues to
  *
@@ -1006,7 +718,8 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
 		break;
 	case ICE_VSI_LB:
-		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
 		return;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
@@ -1098,129 +811,6 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 }
 
 /**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_q_vector *q_vector;
-	struct ice_pf *pf = vsi->back;
-	struct ice_ring *ring;
-
-	if (!vsi->q_vectors[v_idx]) {
-		dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
-			v_idx);
-		return;
-	}
-	q_vector = vsi->q_vectors[v_idx];
-
-	ice_for_each_ring(ring, q_vector->tx)
-		ring->q_vector = NULL;
-	ice_for_each_ring(ring, q_vector->rx)
-		ring->q_vector = NULL;
-
-	/* only VSI with an associated netdev is set up with NAPI */
-	if (vsi->netdev)
-		netif_napi_del(&q_vector->napi);
-
-	devm_kfree(&pf->pdev->dev, q_vector);
-	vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
-{
-	int v_idx;
-
-	ice_for_each_q_vector(vsi, v_idx)
-		ice_free_q_vector(vsi, v_idx);
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the VSI struct
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_q_vector *q_vector;
-
-	/* allocate q_vector */
-	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
-	if (!q_vector)
-		return -ENOMEM;
-
-	q_vector->vsi = vsi;
-	q_vector->v_idx = v_idx;
-	if (vsi->type == ICE_VSI_VF)
-		goto out;
-	/* only set affinity_mask if the CPU is online */
-	if (cpu_online(v_idx))
-		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
-	/* This will not be called in the driver load path because the netdev
-	 * will not be created yet. All other cases with register the NAPI
-	 * handler here (i.e. resume, reset/rebuild, etc.)
-	 */
-	if (vsi->netdev)
-		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
-			       NAPI_POLL_WEIGHT);
-
-out:
-	/* tie q_vector and VSI together */
-	vsi->q_vectors[v_idx] = q_vector;
-
-	return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int v_idx = 0, num_q_vectors;
-	int err;
-
-	if (vsi->q_vectors[0]) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
-			vsi->vsi_num);
-		return -EEXIST;
-	}
-
-	num_q_vectors = vsi->num_q_vectors;
-
-	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = ice_vsi_alloc_q_vector(vsi, v_idx);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	while (v_idx--)
-		ice_free_q_vector(vsi, v_idx);
-
-	dev_err(&pf->pdev->dev,
-		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
-		vsi->num_q_vectors, vsi->vsi_num, err);
-	vsi->num_q_vectors = 0;
-	return err;
-}
-
-/**
  * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
  * @vsi: ptr to the VSI
  *
@@ -1341,66 +931,6 @@ err_out:
 }
 
 /**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
- */
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#else
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#endif /* CONFIG_DCB */
-{
-	int q_vectors = vsi->num_q_vectors;
-	int tx_rings_rem, rx_rings_rem;
-	int v_id;
-
-	/* initially assigning remaining rings count to VSIs num queue value */
-	tx_rings_rem = vsi->num_txq;
-	rx_rings_rem = vsi->num_rxq;
-
-	for (v_id = 0; v_id < q_vectors; v_id++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
-		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
-
-		/* Tx rings mapping to vector */
-		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_tx = tx_rings_per_v;
-		q_vector->tx.ring = NULL;
-		q_vector->tx.itr_idx = ICE_TX_ITR;
-		q_base = vsi->num_txq - tx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
-			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
-			tx_ring->q_vector = q_vector;
-			tx_ring->next = q_vector->tx.ring;
-			q_vector->tx.ring = tx_ring;
-		}
-		tx_rings_rem -= tx_rings_per_v;
-
-		/* Rx rings mapping to vector */
-		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_rx = rx_rings_per_v;
-		q_vector->rx.ring = NULL;
-		q_vector->rx.itr_idx = ICE_RX_ITR;
-		q_base = vsi->num_rxq - rx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
-			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
-			rx_ring->q_vector = q_vector;
-			rx_ring->next = q_vector->rx.ring;
-			q_vector->rx.ring = rx_ring;
-		}
-		rx_rings_rem -= rx_rings_per_v;
-	}
-}
-
-/**
  * ice_vsi_manage_rss_lut - disable/enable RSS
  * @vsi: the VSI being changed
  * @ena: boolean value indicating if this is an enable or disable request
@@ -1674,6 +1204,31 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 }
 
 /**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+	} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+		vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+		vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+	} else {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+		vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+	}
+}
+
+/**
  * ice_vsi_cfg_rxqs - Configure the VSI for Rx
  * @vsi: the VSI being configured
  *
@@ -1687,13 +1242,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
 	if (vsi->type == ICE_VSI_VF)
 		goto setup_rings;
 
-	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
-		vsi->max_frame = vsi->netdev->mtu +
-			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-	else
-		vsi->max_frame = ICE_RXBUF_2048;
-
-	vsi->rx_buf_len = ICE_RXBUF_2048;
+	ice_vsi_cfg_frame_size(vsi);
 setup_rings:
 	/* set up individual rings */
 	for (i = 0; i < vsi->num_rxq; i++) {
@@ -1712,101 +1261,34 @@ setup_rings:
 }
 
 /**
- * ice_vsi_cfg_txq - Configure single Tx queue
- * @vsi: the VSI that queue belongs to
- * @ring: Tx ring to be configured
- * @tc_q_idx: queue index within given TC
- * @qg_buf: queue group buffer
- * @tc: TC that Tx ring belongs to
- */
-static int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
-		struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
-{
-	struct ice_tlan_ctx tlan_ctx = { 0 };
-	struct ice_aqc_add_txqs_perq *txq;
-	struct ice_pf *pf = vsi->back;
-	u8 buf_len = sizeof(*qg_buf);
-	enum ice_status status;
-	u16 pf_q;
-
-	pf_q = ring->reg_idx;
-	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
-	/* copy context contents into the qg_buf */
-	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
-		    ice_tlan_ctx_info);
-
-	/* init queue specific tail reg. It is referred as
-	 * transmit comm scheduler queue doorbell.
-	 */
-	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-
-	/* Add unique software queue handle of the Tx queue per
-	 * TC into the VSI Tx ring
-	 */
-	ring->q_handle = tc_q_idx;
-
-	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
-				 1, qg_buf, buf_len, NULL);
-	if (status) {
-		dev_err(&pf->pdev->dev,
-			"Failed to set LAN Tx queue context, error: %d\n",
-			status);
-		return -ENODEV;
-	}
-
-	/* Add Tx Queue TEID into the VSI Tx ring from the
-	 * response. This will complete configuring and
-	 * enabling the queue.
-	 */
-	txq = &qg_buf->txqs[0];
-	if (pf_q == le16_to_cpu(txq->txq_id))
-		ring->txq_teid = le32_to_cpu(txq->q_teid);
-
-	return 0;
-}
-
-/**
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
- * @offset: offset within vsi->txq_map
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 {
 	struct ice_aqc_add_tx_qgrp *qg_buf;
-	struct ice_pf *pf = vsi->back;
-	u16 q_idx = 0, i;
+	u16 q_idx = 0;
 	int err = 0;
-	u8 tc;
 
-	qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+	qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
 	if (!qg_buf)
 		return -ENOMEM;
 
 	qg_buf->num_txqs = 1;
 
-	/* set up and configure the Tx queues for each enabled TC */
-	ice_for_each_traffic_class(tc) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-			break;
-
-		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
-					      qg_buf, tc);
-			if (err)
-				goto err_cfg_txqs;
-
-			q_idx++;
-		}
+	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+		err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+		if (err)
+			goto err_cfg_txqs;
 	}
+
 err_cfg_txqs:
-	devm_kfree(&pf->pdev->dev, qg_buf);
+	kfree(qg_buf);
 	return err;
 }
 
@@ -1819,159 +1301,46 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
-}
-
-/**
- * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
- * @intrl: interrupt rate limit in usecs
- * @gran: interrupt rate limit granularity in usecs
- *
- * This function converts a decimal interrupt rate limit in usecs to the format
- * expected by firmware.
- */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
-{
-	u32 val = intrl / gran;
-
-	if (val)
-		return val | GLINT_RATE_INTRL_ENA_M;
-	return 0;
-}
-
-/**
- * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
- * @hw: board specific structure
- */
-static void ice_cfg_itr_gran(struct ice_hw *hw)
-{
-	u32 regval = rd32(hw, GLINT_CTL);
-
-	/* no need to update global register if ITR gran is already set */
-	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
-	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
-	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
-	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
-	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
-		return;
-
-	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
-		  GLINT_CTL_ITR_GRAN_200_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
-		  GLINT_CTL_ITR_GRAN_100_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
-		  GLINT_CTL_ITR_GRAN_50_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
-		  GLINT_CTL_ITR_GRAN_25_M);
-	wr32(hw, GLINT_CTL, regval);
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
 }
 
 /**
- * ice_cfg_itr - configure the initial interrupt throttle values
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector that's being configured
- *
- * Configure interrupt throttling values for the ring containers that are
- * associated with the interrupt vector passed in.
- */
-static void
-ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-	ice_cfg_itr_gran(hw);
-
-	if (q_vector->num_ring_rx) {
-		struct ice_ring_container *rc = &q_vector->rx;
-
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_RX_ITR;
-
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
-
-	if (q_vector->num_ring_tx) {
-		struct ice_ring_container *rc = &q_vector->tx;
-
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_TX_ITR;
-
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
-}
-
-/**
- * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
  * @vsi: the VSI being configured
- * @txq: Tx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
  *
- * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
- * within the function space.
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	u32 val;
+	int ret;
+	int i;
 
-	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+	if (ret)
+		return ret;
 
-	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
 
-	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	return ret;
 }
 
 /**
- * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
- * @vsi: the VSI being configured
- * @rxq: Rx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
  *
- * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
- * within the function space.
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	u32 val;
-
-	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
-
-	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
-
-	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+	u32 val = intrl / gran;
 
-	ice_flush(hw);
+	if (val)
+		return val | GLINT_RATE_INTRL_ENA_M;
+	return 0;
 }
 
 /**
@@ -2134,109 +1503,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
 }
 
 /**
- * ice_trigger_sw_intr - trigger a software interrupt
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector to trigger the software interrupt for
- */
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
-	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
-	     GLINT_DYN_CTL_SWINT_TRIG_M |
-	     GLINT_DYN_CTL_INTENA_M);
-}
-
-/**
- * ice_vsi_stop_tx_ring - Disable single Tx ring
- * @vsi: the VSI being configured
- * @rst_src: reset source
- * @rel_vmvf_num: Relative ID of VF/VM
- * @ring: Tx ring to be stopped
- * @txq_meta: Meta data of Tx ring to be stopped
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		     u16 rel_vmvf_num, struct ice_ring *ring,
-		     struct ice_txq_meta *txq_meta)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_q_vector *q_vector;
-	struct ice_hw *hw = &pf->hw;
-	enum ice_status status;
-	u32 val;
-
-	/* clear cause_ena bit for disabled queues */
-	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
-	val &= ~QINT_TQCTL_CAUSE_ENA_M;
-	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
-
-	/* software is expected to wait for 100 ns */
-	ndelay(100);
-
-	/* trigger a software interrupt for the vector
-	 * associated to the queue to schedule NAPI handler
-	 */
-	q_vector = ring->q_vector;
-	if (q_vector)
-		ice_trigger_sw_intr(hw, q_vector);
-
-	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
-				 txq_meta->tc, 1, &txq_meta->q_handle,
-				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
-				 rel_vmvf_num, NULL);
-
-	/* if the disable queue command was exercised during an
-	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
-	 * This is not an error as the reset operation disables
-	 * queues at the hardware level anyway.
-	 */
-	if (status == ICE_ERR_RESET_ONGOING) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Reset in progress. LAN Tx queues already disabled\n");
-	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"LAN Tx queues do not exist, nothing to disable\n");
-	} else if (status) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to disable LAN Tx queues, error: %d\n", status);
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/**
- * ice_fill_txq_meta - Prepare the Tx queue's meta data
- * @vsi: VSI that ring belongs to
- * @ring: ring that txq_meta will be based on
- * @txq_meta: a helper struct that wraps Tx queue's information
- *
- * Set up a helper struct that will contain all the necessary fields that
- * are needed for stopping Tx queue
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-void
-ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-		  struct ice_txq_meta *txq_meta)
-{
-	u8 tc = 0;
-
-#ifdef CONFIG_DCB
-	tc = ring->dcb_tc;
-#endif /* CONFIG_DCB */
-	txq_meta->q_id = ring->reg_idx;
-	txq_meta->q_teid = ring->txq_teid;
-	txq_meta->q_handle = ring->q_handle;
-	txq_meta->vsi_idx = vsi->idx;
-	txq_meta->tc = tc;
-}
-
-/**
  * ice_vsi_stop_tx_rings - Disable Tx rings
  * @vsi: the VSI being configured
  * @rst_src: reset source
@@ -2247,34 +1513,24 @@ static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 		      u16 rel_vmvf_num, struct ice_ring **rings)
 {
-	u16 i, q_idx = 0;
-	int status;
-	u8 tc;
+	u16 q_idx;
 
 	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
 		return -EINVAL;
 
-	/* set up the Tx queue list to be disabled for each enabled TC */
-	ice_for_each_traffic_class(tc) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-			break;
-
-		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			struct ice_txq_meta txq_meta = { };
+	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+		struct ice_txq_meta txq_meta = { };
+		int status;
 
-			if (!rings || !rings[q_idx])
-				return -EINVAL;
+		if (!rings || !rings[q_idx])
+			return -EINVAL;
 
-			ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
-			status = ice_vsi_stop_tx_ring(vsi, rst_src,
-						      rel_vmvf_num,
-						      rings[q_idx], &txq_meta);
+		ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+		status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+					      rings[q_idx], &txq_meta);
 
-			if (status)
-				return status;
-
-			q_idx++;
-		}
+		if (status)
+			return status;
 	}
 
 	return 0;
@@ -2294,6 +1550,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 }
 
 /**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
+/**
  * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
  * @vsi: VSI to enable or disable VLAN pruning on
  * @ena: set to true to enable VLAN pruning and false to disable it
@@ -2635,23 +1900,17 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
 	 * The rule is added once for PF VSI in order to create appropriate
 	 * recipe, since VSI/VSI list is ignored with drop action...
-	 * Also add rules to handle LLDP Tx and Rx packets.  Tx LLDP packets
-	 * need to be dropped so that VFs cannot send LLDP packets to reconfig
-	 * DCB settings in the HW.  Also, if the FW DCBX engine is not running
-	 * then Rx LLDP packets need to be redirected up the stack.
+	 * Also add rules to handle LLDP Tx packets.  Tx LLDP packets need to
+	 * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+	 * settings in the HW.
 	 */
-	if (!ice_is_safe_mode(pf)) {
+	if (!ice_is_safe_mode(pf))
 		if (vsi->type == ICE_VSI_PF) {
 			ice_vsi_add_rem_eth_mac(vsi, true);
 
 			/* Tx LLDP packets */
 			ice_cfg_sw_lldp(vsi, true, true);
-
-			/* Rx LLDP packets */
-			if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
-				ice_cfg_sw_lldp(vsi, false, true);
 		}
-	}
 
 	return vsi;
 
@@ -2690,6 +1949,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			if (ice_is_xdp_ena_vsi(vsi)) {
+				u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+				wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+			}
 			txq++;
 		}
 
@@ -2790,6 +2054,62 @@ void ice_vsi_close(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+	int err = 0;
+
+	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return 0;
+
+	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			err = ice_open(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			ice_stop(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		} else {
+			ice_vsi_close(vsi);
+		}
+	}
+}
+
+/**
  * ice_free_res - free a block of resources
  * @res: pointer to the resource
  * @index: starting index previously returned by ice_get_res
@@ -3064,6 +2384,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		vsi->base_vector = 0;
 	}
 
+	if (ice_is_xdp_ena_vsi(vsi))
+		/* return value check can be skipped here, it always returns
+		 * 0 if reset is in progress
+		 */
+		ice_destroy_xdp_rings(vsi);
 	ice_vsi_put_qs(vsi);
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
@@ -3085,7 +2410,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	if (ret < 0)
 		goto err_vsi;
 
-
 	switch (vsi->type) {
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3105,6 +2429,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 			goto err_vectors;
 
 		ice_vsi_map_rings_to_vectors(vsi);
+		if (ice_is_xdp_ena_vsi(vsi)) {
+			vsi->num_xdp_txq = vsi->alloc_txq;
+			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+			if (ret)
+				goto err_vectors;
+		}
 		/* Do not exit if configuring RSS had an issue, at least
 		 * receive traffic on first queue. Hence no need to capture
 		 * return value
@@ -3131,9 +2461,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	}
 
 	/* configure VSI nodes based on number of queues and TC's */
-	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+	for (i = 0; i < vsi->tc_cfg.numtc; i++) {
 		max_txqs[i] = vsi->alloc_txq;
 
+		if (ice_is_xdp_ena_vsi(vsi))
+			max_txqs[i] += vsi->num_xdp_txq;
+	}
+
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
@@ -3166,6 +2500,7 @@ err_vsi:
 bool ice_is_reset_in_progress(unsigned long *state)
 {
 	return test_bit(__ICE_RESET_OICR_RECV, state) ||
+	       test_bit(__ICE_DCBNL_DEVRESET, state) ||
 	       test_bit(__ICE_PFR_REQ, state) ||
 	       test_bit(__ICE_CORER_REQ, state) ||
 	       test_bit(__ICE_GLOBR_REQ, state);
@@ -3271,6 +2606,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
 }
 
 /**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+		      u64 pkts, u64 bytes)
+{
+	ring->stats.bytes += bytes;
+	ring->stats.pkts += pkts;
+	cont->total_bytes += bytes;
+	cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+	u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&rx_ring->syncp);
+	ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+	u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
  * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
  * @vsi: the VSI being configured MAC filter
  * @macaddr: the MAC address to be added.
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 47bc033fff20..e86aa60c0254 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -6,18 +6,7 @@
 
 #include "ice.h"
 
-struct ice_txq_meta {
-	/* Tx-scheduler element identifier */
-	u32 q_teid;
-	/* Entry in VSI's txq_map bitmap */
-	u16 q_id;
-	/* Relative index of Tx queue within TC */
-	u16 q_handle;
-	/* VSI index that Tx queue belongs to */
-	u16 vsi_idx;
-	/* TC number that Tx queue belongs to */
-	u8 tc;
-};
+const char *ice_vsi_type_str(enum ice_vsi_type type);
 
 int
 ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
@@ -33,24 +22,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
-
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
-
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		     u16 rel_vmvf_num, struct ice_ring *ring,
-		     struct ice_txq_meta *txq_meta);
-
-void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-		       struct ice_txq_meta *txq_meta);
-
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
-#endif /* CONFIG_PCI_IOV */
-
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -67,6 +38,10 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			  u16 rel_vmvf_num);
 
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -89,6 +64,10 @@ int ice_vsi_release(struct ice_vsi *vsi);
 
 void ice_vsi_close(struct ice_vsi *vsi);
 
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
 int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
 
 int
@@ -98,16 +77,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
-
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
-#endif /* CONFIG_DCB */
-
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
 
 void ice_vsi_free_irq(struct ice_vsi *vsi);
@@ -118,6 +89,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
 
 int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
 
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
 u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
 
 char *ice_nvm_version_str(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 214cd6eca405..5681e3be81f2 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,8 +6,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 8
@@ -435,42 +437,11 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf)
 }
 
 /**
- * ice_dis_vsi - pause a VSI
- * @vsi: the VSI being paused
- * @locked: is the rtnl_lock already held
- */
-static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
-{
-	if (test_bit(__ICE_DOWN, vsi->state))
-		return;
-
-	set_bit(__ICE_NEEDS_RESTART, vsi->state);
-
-	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
-		if (netif_running(vsi->netdev)) {
-			if (!locked)
-				rtnl_lock();
-
-			ice_stop(vsi->netdev);
-
-			if (!locked)
-				rtnl_unlock();
-		} else {
-			ice_vsi_close(vsi);
-		}
-	}
-}
-
-/**
  * ice_pf_dis_all_vsi - Pause all VSIs on a PF
  * @pf: the PF
  * @locked: is the rtnl_lock already held
  */
-#ifdef CONFIG_DCB
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#else
 static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#endif /* CONFIG_DCB */
 {
 	int v;
 
@@ -636,8 +607,14 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi)
 	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
 	case ICE_AQ_LINK_TOPO_CONFLICT:
 	case ICE_AQ_LINK_MEDIA_CONFLICT:
+	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
 		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
 		break;
+	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+		netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		break;
 	default:
 		break;
 	}
@@ -1661,6 +1638,324 @@ free_q_irqs:
 }
 
 /**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	int i;
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		u16 xdp_q_idx = vsi->alloc_txq + i;
+		struct ice_ring *xdp_ring;
+
+		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+		if (!xdp_ring)
+			goto free_xdp_rings;
+
+		xdp_ring->q_index = xdp_q_idx;
+		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+		xdp_ring->ring_active = false;
+		xdp_ring->vsi = vsi;
+		xdp_ring->netdev = NULL;
+		xdp_ring->dev = dev;
+		xdp_ring->count = vsi->num_tx_desc;
+		vsi->xdp_rings[i] = xdp_ring;
+		if (ice_setup_tx_ring(xdp_ring))
+			goto free_xdp_rings;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+	}
+
+	return 0;
+
+free_xdp_rings:
+	for (; i >= 0; i--)
+		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+			ice_free_tx_ring(vsi->xdp_rings[i]);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+	int i;
+
+	old_prog = xchg(&vsi->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	ice_for_each_rxq(vsi, i)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int xdp_rings_rem = vsi->num_xdp_txq;
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg xdp_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->num_xdp_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = vsi->alloc_txq,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	enum ice_status status;
+	int i, v_idx;
+
+	vsi->xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+				      sizeof(*vsi->xdp_rings), GFP_KERNEL);
+	if (!vsi->xdp_rings)
+		return -ENOMEM;
+
+	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+	if (__ice_vsi_get_qs(&xdp_qs_cfg))
+		goto err_map_xdp;
+
+	if (ice_xdp_alloc_setup_rings(vsi))
+		goto clear_xdp_rings;
+
+	/* follow the logic from ice_vsi_map_rings_to_vectors */
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		int xdp_rings_per_v, q_id, q_base;
+
+		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+					       vsi->num_q_vectors - v_idx);
+		q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+			struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+			xdp_ring->q_vector = q_vector;
+			xdp_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = xdp_ring;
+		}
+		xdp_rings_rem -= xdp_rings_per_v;
+	}
+
+	/* omit the scheduler update if in reset path; XDP queues will be
+	 * taken into account at the end of ice_vsi_rebuild, where
+	 * ice_cfg_vsi_lan is being called
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		return 0;
+
+	/* tell the Tx scheduler that right now we have
+	 * additional queues
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed VSI LAN queue config for XDP, error:%d\n",
+			status);
+		goto clear_xdp_rings;
+	}
+	ice_vsi_assign_bpf_prog(vsi, prog);
+
+	return 0;
+clear_xdp_rings:
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+err_map_xdp:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	int i, v_idx;
+
+	/* q_vectors are freed in reset path so there's no point in detaching
+	 * rings; in case of rebuild being triggered not from reset reset bits
+	 * in pf->state won't be set, so additionally check first q_vector
+	 * against NULL
+	 */
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		goto free_qmap;
+
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		struct ice_ring *ring;
+
+		ice_for_each_ring(ring, q_vector->tx)
+			if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+				break;
+
+		/* restore the value of last node prior to XDP setup */
+		q_vector->tx.ring = ring;
+	}
+
+free_qmap:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			if (vsi->xdp_rings[i]->desc)
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+	devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+	vsi->xdp_rings = NULL;
+
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		return 0;
+
+	ice_vsi_assign_bpf_prog(vsi, NULL);
+
+	/* notify Tx scheduler that we destroyed XDP queues and bring
+	 * back the old number of child nodes
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+			       max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+		   struct netlink_ext_ack *extack)
+{
+	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+	bool if_running = netif_running(vsi->netdev);
+	int ret = 0, xdp_ring_err = 0;
+
+	if (frame_size > vsi->rx_buf_len) {
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+		return -EOPNOTSUPP;
+	}
+
+	/* need to stop netdev while setting up the program for Rx rings */
+	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ret = ice_down(vsi);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Preparing device for XDP attach failed");
+			return ret;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+		vsi->num_xdp_txq = vsi->alloc_txq;
+		xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Setting up XDP Tx resources failed");
+	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Freeing XDP Tx resources failed");
+	} else {
+		ice_vsi_assign_bpf_prog(vsi, prog);
+	}
+
+	if (if_running)
+		ret = ice_up(vsi);
+
+	if (!ret && prog && vsi->xsk_umems) {
+		int i;
+
+		ice_for_each_rxq(vsi, i) {
+			struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+			if (rx_ring->xsk_umem)
+				napi_schedule(&rx_ring->q_vector->napi);
+		}
+	}
+
+	return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->type != ICE_VSI_PF) {
+		NL_SET_ERR_MSG_MOD(xdp->extack,
+				   "XDP can be loaded only on PF VSI");
+		return -EINVAL;
+	}
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+					  xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
  */
@@ -2219,6 +2514,11 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 		status = -ENODEV;
 		goto unroll_vsi_setup;
 	}
+	/* netdev has to be configured before setting frame size */
+	ice_vsi_cfg_frame_size(vsi);
+
+	/* Setup DCB netlink interface */
+	ice_dcbnl_setup(vsi);
 
 	/* registering the NAPI handler requires both the queues and
 	 * netdev to be created, which are done in ice_pf_vsi_setup()
@@ -2300,6 +2600,7 @@ static void ice_deinit_pf(struct ice_pf *pf)
 {
 	ice_service_task_stop(pf);
 	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->tc_mutex);
 	mutex_destroy(&pf->avail_q_mutex);
 
 	if (pf->avail_txqs) {
@@ -2349,6 +2650,7 @@ static int ice_init_pf(struct ice_pf *pf)
 	ice_set_pf_caps(pf);
 
 	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->tc_mutex);
 
 	/* setup service timer and periodic service task */
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
@@ -2598,7 +2900,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
 				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
 		break;
 	case ICE_ERR_AQ_ERROR:
-		switch (hw->adminq.sq_last_status) {
+		switch (hw->pkg_dwnld_status) {
 		case ICE_AQ_RC_ENOSEC:
 		case ICE_AQ_RC_EBADSIG:
 			dev_err(dev,
@@ -2831,6 +3133,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	hw = &pf->hw;
 	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	pci_save_state(pdev);
+
 	hw->back = pf;
 	hw->vendor_id = pdev->vendor;
 	hw->device_id = pdev->device;
@@ -2976,6 +3280,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 		ice_cfg_lldp_mib_change(&pf->hw, true);
 	}
 
+	/* print PCI link speed and width */
+	pcie_print_link_status(pf->pdev);
+
 	return 0;
 
 err_alloc_sw_unroll:
@@ -3027,12 +3334,13 @@ static void ice_remove(struct pci_dev *pdev)
 	}
 	ice_deinit_pf(pf);
 	ice_deinit_hw(&pf->hw);
-	ice_clear_interrupt_scheme(pf);
 	/* Issue a PFR as part of the prescribed driver unload flow.  Do not
 	 * do it via ice_schedule_reset() since there is no need to rebuild
 	 * and the service task is already stopped.
 	 */
 	ice_reset(&pf->hw, ICE_RESET_PFR);
+	pci_wait_for_pending_transaction(pdev);
+	ice_clear_interrupt_scheme(pf);
 	pci_disable_pcie_error_reporting(pdev);
 }
 
@@ -3347,6 +3655,48 @@ static void ice_set_rx_mode(struct net_device *netdev)
 }
 
 /**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_status status;
+	u16 q_handle;
+	u8 tc;
+
+	/* Validate maxrate requested is within permitted range */
+	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+		netdev_err(netdev,
+			   "Invalid max rate %d specified for the queue %d\n",
+			   maxrate, queue_index);
+		return -EINVAL;
+	}
+
+	q_handle = vsi->tx_rings[queue_index]->q_handle;
+	tc = ice_dcb_get_tc(vsi, queue_index);
+
+	/* Set BW back to default, when user set maxrate to 0 */
+	if (!maxrate)
+		status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+					       q_handle, ICE_MAX_BW);
+	else
+		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+					  q_handle, ICE_MAX_BW, maxrate * 1000);
+	if (status) {
+		netdev_err(netdev,
+			   "Unable to set Tx max rate, error %d\n", status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
  * ice_fdb_add - add an entry to the hardware database
  * @ndm: the input from the stack
  * @tb: pointer to array of nladdr (unused)
@@ -3426,6 +3776,7 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
 	int ret = 0;
 
 	/* Don't set any netdev advanced features with device in Safe Mode */
@@ -3435,6 +3786,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		return ret;
 	}
 
+	/* Do not change setting during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(&vsi->back->pdev->dev,
+			"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+		return -EBUSY;
+	}
+
 	/* Multiple features can be changed in one call so keep features in
 	 * separate if/else statements to guarantee each feature is checked
 	 */
@@ -3505,6 +3863,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
 	ice_vsi_cfg_dcb_rings(vsi);
 
 	err = ice_vsi_cfg_lan_txqs(vsi);
+	if (!err && ice_is_xdp_ena_vsi(vsi))
+		err = ice_vsi_cfg_xdp_txqs(vsi);
 	if (!err)
 		err = ice_vsi_cfg_rxqs(vsi);
 
@@ -3920,6 +4280,13 @@ int ice_down(struct ice_vsi *vsi)
 		netdev_err(vsi->netdev,
 			   "Failed stop Tx rings, VSI %d error %d\n",
 			   vsi->vsi_num, tx_err);
+	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+		if (tx_err)
+			netdev_err(vsi->netdev,
+				   "Failed stop XDP rings, VSI %d error %d\n",
+				   vsi->vsi_num, tx_err);
+	}
 
 	rx_err = ice_vsi_stop_rx_rings(vsi);
 	if (rx_err)
@@ -3970,8 +4337,13 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
 	}
 
 	ice_for_each_txq(vsi, i) {
-		vsi->tx_rings[i]->netdev = vsi->netdev;
-		err = ice_setup_tx_ring(vsi->tx_rings[i]);
+		struct ice_ring *ring = vsi->tx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_tx_ring(ring);
 		if (err)
 			break;
 	}
@@ -3996,8 +4368,13 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 	}
 
 	ice_for_each_rxq(vsi, i) {
-		vsi->rx_rings[i]->netdev = vsi->netdev;
-		err = ice_setup_rx_ring(vsi->rx_rings[i]);
+		struct ice_ring *ring = vsi->rx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_rx_ring(ring);
 		if (err)
 			break;
 	}
@@ -4089,54 +4466,6 @@ static void ice_vsi_release_all(struct ice_pf *pf)
 }
 
 /**
- * ice_ena_vsi - resume a VSI
- * @vsi: the VSI being resume
- * @locked: is the rtnl_lock already held
- */
-static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
-{
-	int err = 0;
-
-	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
-		return 0;
-
-	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
-
-	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
-		if (netif_running(vsi->netdev)) {
-			if (!locked)
-				rtnl_lock();
-
-			err = ice_open(vsi->netdev);
-
-			if (!locked)
-				rtnl_unlock();
-		}
-	}
-
-	return err;
-}
-
-/**
- * ice_pf_ena_all_vsi - Resume all VSIs on a PF
- * @pf: the PF
- * @locked: is the rtnl_lock already held
- */
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
-{
-	int v;
-
-	ice_for_each_vsi(pf, v)
-		if (pf->vsi[v])
-			if (ice_ena_vsi(pf->vsi[v], locked))
-				return -EIO;
-
-	return 0;
-}
-#endif /* CONFIG_DCB */
-
-/**
  * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
  * @pf: pointer to the PF instance
  * @type: VSI type to rebuild
@@ -4158,8 +4487,8 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 		err = ice_vsi_rebuild(vsi);
 		if (err) {
 			dev_err(&pf->pdev->dev,
-				"rebuild VSI failed, err %d, VSI index %d, type %d\n",
-				err, vsi->idx, type);
+				"rebuild VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
 			return err;
 		}
 
@@ -4167,8 +4496,8 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 		status = ice_replay_vsi(&pf->hw, vsi->idx);
 		if (status) {
 			dev_err(&pf->pdev->dev,
-				"replay VSI failed, status %d, VSI index %d, type %d\n",
-				status, vsi->idx, type);
+				"replay VSI failed, status %d, VSI index %d, type %s\n",
+				status, vsi->idx, ice_vsi_type_str(type));
 			return -EIO;
 		}
 
@@ -4181,13 +4510,13 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 		err = ice_ena_vsi(vsi, false);
 		if (err) {
 			dev_err(&pf->pdev->dev,
-				"enable VSI failed, err %d, VSI index %d, type %d\n",
-				err, vsi->idx, type);
+				"enable VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
 			return err;
 		}
 
-		dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %d\n",
-			 vsi->idx, type);
+		dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %s\n",
+			 vsi->idx, ice_vsi_type_str(type));
 	}
 
 	return 0;
@@ -4329,6 +4658,18 @@ clear_recovery:
 }
 
 /**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+	if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+	else
+		return ICE_RXBUF_3072;
+}
+
+/**
  * ice_change_mtu - NDO callback to change the MTU
  * @netdev: network interface device structure
  * @new_mtu: new value for maximum frame size
@@ -4347,6 +4688,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		return 0;
 	}
 
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		int frame_size = ice_max_xdp_frame_size(vsi);
+
+		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+			netdev_err(netdev, "max MTU for XDP usage is %d\n",
+				   frame_size - ICE_ETH_PKT_HDR_PAD);
+			return -EINVAL;
+		}
+	}
+
 	if (new_mtu < netdev->min_mtu) {
 		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
@@ -4864,6 +5215,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
 	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
 	.ndo_set_vf_mac = ice_set_vf_mac,
 	.ndo_get_vf_config = ice_get_vf_cfg,
@@ -4878,4 +5230,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_fdb_add = ice_fdb_add,
 	.ndo_fdb_del = ice_fdb_del,
 	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp,
+	.ndo_xdp_xmit = ice_xdp_xmit,
+	.ndo_xsk_wakeup = ice_xsk_wakeup,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index bcb431f1bd92..57c73f613f32 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -219,8 +219,7 @@ static void ice_release_nvm(struct ice_hw *hw)
  *
  * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
  */
-static enum ice_status
-ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
 {
 	enum ice_status status;
 
@@ -242,9 +241,10 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
  */
 enum ice_status ice_init_nvm(struct ice_hw *hw)
 {
+	u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len;
 	struct ice_nvm_info *nvm = &hw->nvm;
 	u16 eetrack_lo, eetrack_hi;
-	enum ice_status status = 0;
+	enum ice_status status;
 	u32 fla, gens_stat;
 	u8 sr_size;
 
@@ -261,15 +261,15 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 	fla = rd32(hw, GLNVM_FLA);
 	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
 		nvm->blank_nvm_mode = false;
-	} else { /* Blank programming mode */
+	} else {
+		/* Blank programming mode */
 		nvm->blank_nvm_mode = true;
-		status = ICE_ERR_NVM_BLANK_MODE;
 		ice_debug(hw, ICE_DBG_NVM,
 			  "NVM init error: unsupported blank mode.\n");
-		return status;
+		return ICE_ERR_NVM_BLANK_MODE;
 	}
 
-	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver);
+	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver);
 	if (status) {
 		ice_debug(hw, ICE_DBG_INIT,
 			  "Failed to read DEV starter version.\n");
@@ -287,9 +287,42 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 		return status;
 	}
 
-	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+	nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
 
-	return status;
+	status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
+					ICE_SR_BOOT_CFG_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read Boot Configuration Block TLV.\n");
+		return status;
+	}
+
+	/* Boot Configuration Block must have length at least 2 words
+	 * (Combo Image Version High and Combo Image Version Low)
+	 */
+	if (boot_cfg_tlv_len < 2) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Invalid Boot Configuration Block TLV size.\n");
+		return ICE_ERR_INVAL_SIZE;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF),
+				  &oem_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1),
+				  &oem_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n");
+		return status;
+	}
+
+	nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo;
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 000000000000..a9fa011c22c6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2fde9653a608..84f609996ed5 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -411,6 +411,27 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
 }
 
 /**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static enum ice_status
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+		       struct ice_aqc_conf_elem *buf, u16 buf_size,
+		       u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_cfgd, cd);
+}
+
+/**
  * ice_aq_suspend_sched_elems - suspend scheduler elements
  * @hw: pointer to the HW struct
  * @elems_req: number of elements to suspend
@@ -557,6 +578,149 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
 }
 
 /**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode:opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static enum ice_status
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+		  u16 num_profiles, struct ice_aqc_rl_profile_generic_elem *buf,
+		  u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_rl_profile *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.rl_profile;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_profiles = cpu_to_le16(num_profiles);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_processed)
+		*num_processed = le16_to_cpu(cmd->num_processed);
+	return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static enum ice_status
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+		      struct ice_aqc_rl_profile_generic_elem *buf,
+		      u16 buf_size, u16 *num_profiles_added,
+		      struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles,
+				 num_profiles, buf,
+				 buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static enum ice_status
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+			 struct ice_aqc_rl_profile_generic_elem *buf,
+			 u16 buf_size, u16 *num_profiles_removed,
+			 struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+				 num_profiles, buf,
+				 buf_size, num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_del_rl_profile(struct ice_hw *hw,
+			 struct ice_aqc_rl_profile_info *rl_info)
+{
+	struct ice_aqc_rl_profile_generic_elem *buf;
+	u16 num_profiles_removed;
+	enum ice_status status;
+	u16 num_profiles = 1;
+
+	if (rl_info->prof_id_ref != 0)
+		return ICE_ERR_IN_USE;
+
+	/* Safe to remove profile ID */
+	buf = (struct ice_aqc_rl_profile_generic_elem *)
+		&rl_info->profile;
+	status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+					  &num_profiles_removed, NULL);
+	if (status || num_profiles_removed != num_profiles)
+		return ICE_ERR_CFG;
+
+	/* Delete stale entry now */
+	list_del(&rl_info->list_entry);
+	devm_kfree(ice_hw_to_dev(hw), rl_info);
+	return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			struct ice_hw *hw = pi->hw;
+			enum ice_status status;
+
+			rl_prof_elem->prof_id_ref = 0;
+			status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+			if (status) {
+				ice_debug(hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+				/* On error, free mem required */
+				list_del(&rl_prof_elem->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+			}
+		}
+	}
+}
+
+/**
  * ice_sched_clear_agg - clears the aggregator related information
  * @hw: pointer to the hardware structure
  *
@@ -592,6 +756,8 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
 {
 	if (!pi)
 		return;
+	/* remove RL profiles related lists */
+	ice_sched_clear_rl_prof(pi);
 	if (pi->root) {
 		ice_free_sched_node(pi, pi->root);
 		pi->root = NULL;
@@ -1014,6 +1180,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi)
 	/* initialize the port for handling the scheduler tree */
 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
 	mutex_init(&pi->sched_lock);
+	for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+		INIT_LIST_HEAD(&pi->rl_prof_list[i]);
 
 err_init_port:
 	if (status && pi->root) {
@@ -1062,8 +1230,8 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
 	 * and so on. This array will be populated from root (index 0) to
 	 * qgroup layer 7. Leaf node has no children.
 	 */
-	for (i = 0; i < hw->num_tx_sched_layers; i++) {
-		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
+	for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+		max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
 		hw->max_children[i] = le16_to_cpu(max_sibl);
 	}
 
@@ -1670,3 +1838,1095 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
 {
 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
 }
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Removed rl profile\n");
+		}
+	}
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * It updates the HW DB, and local SW DB of node. It updates the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+		      struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_aqc_conf_elem buf;
+	enum ice_status status;
+	u16 elem_cfgd = 0;
+	u16 num_elems = 1;
+
+	buf.generic[0] = *info;
+	/* Parent TEID is reserved field in this aq call */
+	buf.generic[0].parent_teid = 0;
+	/* Element type is reserved field in this aq call */
+	buf.generic[0].data.elem_type = 0;
+	/* Flags is reserved field in this aq call */
+	buf.generic[0].data.flags = 0;
+
+	/* Update HW DB */
+	/* Configure element node */
+	status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+					&elem_cfgd, NULL);
+	if (status || elem_cfgd != num_elems) {
+		ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* Config success case */
+	/* Now update local SW DB */
+	/* Only copy the data portion of info buffer */
+	node->info.data = info->data;
+	return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+			    enum ice_rl_type rl_type, u8 bw_alloc)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	if (rl_type == ICE_MIN_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else if (rl_type == ICE_MAX_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else {
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = 0;
+	} else {
+		/* Save type of BW information */
+		set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved shared BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+		/* save EIR BW information */
+		set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved EIR BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+		/* save shared BW information */
+		set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = bw;
+	}
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(s32 bw)
+{
+	s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+	s32 wakeup_f_int;
+	u16 wakeup = 0;
+
+	/* Get the wakeup integer value */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+	wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+	if (wakeup_int > 63) {
+		wakeup = (u16)((1 << 15) | wakeup_int);
+	} else {
+		/* Calculate fraction value up to 4 decimals
+		 * Convert Integer value to a constant multiplier
+		 */
+		wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+		wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+					   ICE_RL_PROF_FREQUENCY,
+				      bytes_per_sec);
+
+		/* Get Fraction value */
+		wakeup_f = wakeup_a - wakeup_b;
+
+		/* Round up the Fractional value via Ceil(Fractional value) */
+		if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+			wakeup_f += 1;
+
+		wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+					       ICE_RL_PROF_MULTIPLIER);
+		wakeup |= (u16)(wakeup_int << 9);
+		wakeup |= (u16)(0x1ff & wakeup_f_int);
+	}
+
+	return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static enum ice_status
+ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	s64 bytes_per_sec, ts_rate, mv_tmp;
+	bool found = false;
+	s32 encode = 0;
+	s64 mv = 0;
+	s32 i;
+
+	/* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+	if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+		return status;
+
+	/* Bytes per second from Kbps */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+	/* encode is 6 bits but really useful are 5 bits */
+	for (i = 0; i < 64; i++) {
+		u64 pow_result = BIT_ULL(i);
+
+		ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+				     pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+		if (ts_rate <= 0)
+			continue;
+
+		/* Multiplier value */
+		mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+				    ts_rate);
+
+		/* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+		mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+		/* First multiplier value greater than the given
+		 * accuracy bytes
+		 */
+		if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+			encode = i;
+			found = true;
+			break;
+		}
+	}
+	if (found) {
+		u16 wm;
+
+		wm = ice_sched_calc_wakeup(bw);
+		profile->rl_multiply = cpu_to_le16(mv);
+		profile->wake_up_calc = cpu_to_le16(wm);
+		profile->rl_encode = cpu_to_le16(encode);
+		status = 0;
+	} else {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+			 enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_generic_elem *buf;
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	u16 profiles_added = 0, num_profiles = 1;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return NULL;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		break;
+	default:
+		return NULL;
+	}
+
+	if (!pi)
+		return NULL;
+	hw = pi->hw;
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if (rl_prof_elem->profile.flags == profile_type &&
+		    rl_prof_elem->bw == bw)
+			/* Return existing profile ID info */
+			return rl_prof_elem;
+
+	/* Create new profile ID */
+	rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+				    GFP_KERNEL);
+
+	if (!rl_prof_elem)
+		return NULL;
+
+	status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+	if (status)
+		goto exit_add_rl_prof;
+
+	rl_prof_elem->bw = bw;
+	/* layer_num is zero relative, and fw expects level from 1 to 9 */
+	rl_prof_elem->profile.level = layer_num + 1;
+	rl_prof_elem->profile.flags = profile_type;
+	rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+	/* Create new entry in HW DB */
+	buf = (struct ice_aqc_rl_profile_generic_elem *)
+		&rl_prof_elem->profile;
+	status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+				       &profiles_added, NULL);
+	if (status || profiles_added != num_profiles)
+		goto exit_add_rl_prof;
+
+	/* Good entry - add in the list */
+	rl_prof_elem->prof_id_ref = 0;
+	list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+	return rl_prof_elem;
+
+exit_add_rl_prof:
+	devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+	return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_MAX_BW:
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			return ICE_ERR_CFG;
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_SHARED_BW:
+		/* Check for removing shared BW */
+		if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+			/* remove shared profile */
+			data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+			data->srl_id = 0; /* clear SRL field */
+
+			/* enable back EIR to default profile */
+			data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+			data->eir_bw.bw_profile_idx =
+				cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+			break;
+		}
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+		    (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+			    ICE_SCHED_DFLT_RL_PROF_ID))
+			return ICE_ERR_CFG;
+		/* EIR BW is set to default, disable it */
+		data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+		/* Okay to enable shared BW now */
+		data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+		data->srl_id = cpu_to_le16(rl_prof_id);
+		break;
+	default:
+		/* Unknown rate limit type */
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+			      enum ice_rl_type rl_type)
+{
+	u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+	struct ice_aqc_txsched_elem *data;
+
+	data = &node->info.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+			rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+		break;
+	case ICE_MAX_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+			rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+		break;
+	case ICE_SHARED_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			rl_prof_id = le16_to_cpu(data->srl_id);
+		break;
+	default:
+		break;
+	}
+
+	return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+			    u8 layer_index)
+{
+	struct ice_hw *hw = pi->hw;
+
+	if (layer_index >= hw->num_tx_sched_layers)
+		return ICE_SCHED_INVAL_LAYER_NUM;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (hw->layer_info[layer_index].max_cir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_MAX_BW:
+		if (hw->layer_info[layer_index].max_eir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_SHARED_BW:
+		/* if current layer doesn't support SRL profile creation
+		 * then try a layer up or down.
+		 */
+		if (hw->layer_info[layer_index].max_srl_profiles)
+			return layer_index;
+		else if (layer_index < hw->num_tx_sched_layers - 1 &&
+			 hw->layer_info[layer_index + 1].max_srl_profiles)
+			return layer_index + 1;
+		else if (layer_index > 0 &&
+			 hw->layer_info[layer_index - 1].max_srl_profiles)
+			return layer_index - 1;
+		break;
+	default:
+		break;
+	}
+	return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+	if (srl_layer > node->tx_sched_layer)
+		return node->children[0];
+	else if (srl_layer < node->tx_sched_layer)
+		/* Node can't be created without a parent. It will always
+		 * have a valid parent except root node.
+		 */
+		return node->parent;
+	else
+		return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static enum ice_status
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+			u16 profile_id)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	enum ice_status status = 0;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return ICE_ERR_PARAM;
+	/* Check the existing list for RL profile */
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if (rl_prof_elem->profile.flags == profile_type &&
+		    le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+		    profile_id) {
+			if (rl_prof_elem->prof_id_ref)
+				rl_prof_elem->prof_id_ref--;
+
+			/* Remove old profile ID from database */
+			status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+			if (status && status != ICE_ERR_IN_USE)
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+			break;
+		}
+	if (status == ICE_ERR_IN_USE)
+		status = 0;
+	return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   enum ice_rl_type rl_type, u8 layer_num)
+{
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+	u16 rl_prof_id;
+	u16 old_id;
+
+	hw = pi->hw;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		/* No SRL is configured for default case */
+		rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* Remove stale RL profile ID */
+	if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node passed in this case, it may be different node */
+		if (bw == ICE_SCHED_DFLT_BW)
+			/* SRL being removed, ice_sched_cfg_node_bw_lmt()
+			 * enables EIR to default. EIR is not set in this
+			 * case, so no additional action is required.
+			 */
+			return 0;
+
+		/* SRL being configured, set EIR to default here.
+		 * ice_sched_cfg_node_bw_lmt() disables EIR when it
+		 * configures SRL
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+						  layer_num);
+	} else if (rl_type == ICE_MAX_BW &&
+		   node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+		/* Remove Shared profile. Set default shared BW call
+		 * removes shared profile for a node.
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node,
+						  ICE_SHARED_BW,
+						  layer_num);
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+		      enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_hw *hw = pi->hw;
+	u16 old_id, rl_prof_id;
+
+	rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+	if (!rl_prof_info)
+		return status;
+
+	rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* New changes has been applied */
+	/* Increment the profile ID reference count */
+	rl_prof_info->prof_id_ref++;
+
+	/* Check for old ID removal */
+	if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num,
+				       rl_prof_info->profile.flags,
+				       old_id);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *cfg_node = node;
+	enum ice_status status;
+
+	struct ice_hw *hw;
+	u8 layer_num;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	/* Remove unused RL profile IDs from HW and SW DB */
+	ice_sched_rm_unused_rl_prof(pi);
+	layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+						node->tx_sched_layer);
+	if (layer_num >= hw->num_tx_sched_layers)
+		return ICE_ERR_PARAM;
+
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node may be different */
+		cfg_node = ice_sched_get_srl_node(node, layer_num);
+		if (!cfg_node)
+			return ICE_ERR_CFG;
+	}
+	/* EIR BW and Shared BW profiles are mutually exclusive and
+	 * hence only one of them may be set for any given element
+	 */
+	status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+					    bw);
+	if (status)
+		return status;
+	if (bw == ICE_SCHED_DFLT_BW)
+		return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+						  layer_num);
+	return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+			       struct ice_sched_node *node,
+			       enum ice_rl_type rl_type)
+{
+	return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+					 ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+	/* SRL profiles are not available on all layers. Check if the
+	 * SRL profile can be applied to a node above or below the
+	 * requested node. SRL configuration is possible only if the
+	 * selected layer's node has single child.
+	 */
+	if (sel_layer == node->tx_sched_layer ||
+	    ((sel_layer == node->tx_sched_layer + 1) &&
+	    node->num_children == 1) ||
+	    ((sel_layer == node->tx_sched_layer - 1) &&
+	    (node->parent && node->parent->num_children == 1)))
+		return 0;
+
+	return ICE_ERR_CFG;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static enum ice_status
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_MAX_BW:
+		ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_SHARED_BW:
+		ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static enum ice_status
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		       u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_sched_node *node;
+	struct ice_q_ctx *q_ctx;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	mutex_lock(&pi->sched_lock);
+	q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+	if (!q_ctx)
+		goto exit_q_bw_lmt;
+	node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!node) {
+		ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+		goto exit_q_bw_lmt;
+	}
+
+	/* Return error if it is not a leaf node */
+	if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+		goto exit_q_bw_lmt;
+
+	/* SRL bandwidth layer selection */
+	if (rl_type == ICE_SHARED_BW) {
+		u8 sel_layer; /* selected layer */
+
+		sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+							node->tx_sched_layer);
+		if (sel_layer >= pi->hw->num_tx_sched_layers) {
+			status = ICE_ERR_PARAM;
+			goto exit_q_bw_lmt;
+		}
+		status = ice_sched_validate_srl_node(node, sel_layer);
+		if (status)
+			goto exit_q_bw_lmt;
+	}
+
+	if (bw == ICE_SCHED_DFLT_BW)
+		status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+	else
+		status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+	if (!status)
+		status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+	u16 burst_size_to_prog;
+
+	if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+	    bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+		return ICE_ERR_PARAM;
+	if (ice_round_to_num(bytes, 64) <=
+	    ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+		/* 64 byte granularity case */
+		/* Disable MSB granularity bit */
+		burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+		/* round number to nearest 64 byte granularity */
+		bytes = ice_round_to_num(bytes, 64);
+		/* The value is in 64 byte chunks */
+		burst_size_to_prog |= (u16)(bytes / 64);
+	} else {
+		/* k bytes granularity case */
+		/* Enable MSB granularity bit */
+		burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+		/* round number to nearest 1024 granularity */
+		bytes = ice_round_to_num(bytes, 1024);
+		/* check rounding doesn't go beyond allowed */
+		if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+			bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+		/* The value is in k bytes */
+		burst_size_to_prog |= (u16)(bytes / 1024);
+	}
+	hw->max_burst_size = burst_size_to_prog;
+	return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+			   u8 priority)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+	data->generic = priority;
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+			 struct ice_bw_type_info *bw_t_info)
+{
+	struct ice_port_info *pi = hw->port_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	u16 bw_alloc;
+
+	if (!node)
+		return status;
+	if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+		return 0;
+	if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_replay_node_prio(hw, node,
+						    bw_t_info->generic);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+						   bw_t_info->cir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->cir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+						   bw_t_info->eir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->eir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+						   bw_t_info->shared_bw);
+	return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+	struct ice_sched_node *q_node;
+
+	/* Following also checks the presence of node in tree */
+	q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!q_node)
+		return ICE_ERR_PARAM;
+	return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 3902a8ad3025..f0593cfb6521 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -8,6 +8,36 @@
 
 #define ICE_QGRP_LAYER_OFFSET	2
 #define ICE_VSI_LAYER_OFFSET	4
+#define ICE_SCHED_INVAL_LAYER_NUM	0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY			0
+#define ICE_KBYTE_GRANULARITY			BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED		64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+	((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+	((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY	ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_FREQUENCY 446000000
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+	struct ice_aqc_rl_profile_elem profile;
+	struct list_head list_entry;
+	u32 bw;			/* requested */
+	u16 prof_id_ref;	/* profile ID to node association ref count */
+};
 
 struct ice_sched_agg_vsi_info {
 	struct list_head list_entry;
@@ -48,4 +78,13 @@ enum ice_status
 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 		  u8 owner, bool enable);
 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type);
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
 #endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 1acdd43a2edd..77d211ea3aae 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -416,8 +416,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
 		ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
 	} else {
 		/* update with new HW VSI num */
-		if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num)
-			tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+		tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index cb123fbe30be..fa14b9545dab 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -14,11 +14,6 @@
 #define ICE_VSI_INVAL_ID 0xffff
 #define ICE_INVAL_Q_HANDLE 0xFFFF
 
-/* VSI queue context structure */
-struct ice_q_ctx {
-	u16  q_handle;
-};
-
 /* VSI context structure for add/get/update/free operations */
 struct ice_vsi_ctx {
 	u16 vsi_num;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 33dd103035dc..2c212f64d99f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -5,8 +5,13 @@
 
 #include <linux/prefetch.h>
 #include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
 #include "ice.h"
 #include "ice_dcb_lib.h"
+#include "ice_xsk.h"
 
 #define ICE_RX_HDR_SIZE		256
 
@@ -19,7 +24,10 @@ static void
 ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
 {
 	if (tx_buf->skb) {
-		dev_kfree_skb_any(tx_buf->skb);
+		if (ice_ring_is_xdp(ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buf->skb);
 		if (dma_unmap_len(tx_buf, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buf, dma),
@@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 {
 	u16 i;
 
+	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+		ice_xsk_clean_xdp_ring(tx_ring);
+		goto tx_skip_free;
+	}
+
 	/* ring already cleared, nothing to do */
 	if (!tx_ring->tx_buf)
 		return;
@@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 	for (i = 0; i < tx_ring->count; i++)
 		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
+tx_skip_free:
 	memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
 
 	/* Zero out the descriptor ring */
@@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 		total_bytes += tx_buf->bytecount;
 		total_pkts += tx_buf->gso_segs;
 
-		/* free the skb */
-		napi_consume_skb(tx_buf->skb, napi_budget);
+		if (ice_ring_is_xdp(tx_ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			/* free the skb */
+			napi_consume_skb(tx_buf->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->stats.bytes += total_bytes;
-	tx_ring->stats.pkts += total_pkts;
-	u64_stats_update_end(&tx_ring->syncp);
-	tx_ring->q_vector->tx.total_bytes += total_bytes;
-	tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+	ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+	if (ice_ring_is_xdp(tx_ring))
+		return !!budget;
 
 	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
 				  total_bytes);
@@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 	if (!rx_ring->rx_buf)
 		return;
 
+	if (rx_ring->xsk_umem) {
+		ice_xsk_clean_rx_ring(rx_ring);
+		goto rx_skip_free;
+	}
+
 	/* Free all the Rx ring sk_buffs */
 	for (i = 0; i < rx_ring->count; i++) {
 		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 		 */
 		dma_sync_single_range_for_cpu(dev, rx_buf->dma,
 					      rx_buf->page_offset,
-					      ICE_RXBUF_2048, DMA_FROM_DEVICE);
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
 
 		/* free resources associated with mapping */
-		dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+		dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 
@@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 		rx_buf->page_offset = 0;
 	}
 
+rx_skip_free:
 	memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
 
 	/* Zero out the descriptor ring */
@@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 void ice_free_rx_ring(struct ice_ring *rx_ring)
 {
 	ice_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == ICE_VSI_PF)
+		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	rx_ring->xdp_prog = NULL;
 	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
 	rx_ring->rx_buf = NULL;
 
@@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
 
 	rx_ring->next_to_use = 0;
 	rx_ring->next_to_clean = 0;
+
+	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+	if (rx_ring->vsi->type == ICE_VSI_PF &&
+	    !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+				     rx_ring->q_index))
+			goto err;
 	return 0;
 
 err:
@@ -372,34 +408,110 @@ err:
 }
 
 /**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
  */
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
 {
-	u16 prev_ntu = rx_ring->next_to_use;
+	if (ice_ring_uses_build_skb(rx_ring))
+		return ICE_SKB_PAD;
+	else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		return XDP_PACKET_HEADROOM;
 
-	rx_ring->next_to_use = val;
+	return 0;
+}
 
-	/* update next to alloc since we have filled the ring */
-	rx_ring->next_to_alloc = val;
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+	    struct bpf_prog *xdp_prog)
+{
+	int err, result = ICE_XDP_PASS;
+	struct ice_ring *xdp_ring;
+	u32 act;
 
-	/* QRX_TAIL will be updated with any tail value, but hardware ignores
-	 * the lower 3 bits. This makes it so we only bump tail on meaningful
-	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
-	 * the budget depending on the current traffic load.
-	 */
-	val &= ~0x7;
-	if (prev_ntu != val) {
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch. (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		writel(val, rx_ring->tail);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough -- not supported action */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping frame */
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
 	}
+
+	return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *xdp_ring;
+	int drops = 0, i;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+		return -ENXIO;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	xdp_ring = vsi->xdp_rings[queue_index];
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+		if (err != ICE_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		ice_xdp_ring_update_tail(xdp_ring);
+
+	return n - drops;
 }
 
 /**
@@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
 	}
 
 	/* alloc new page for storage */
-	page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+	page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
 	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_pages(page, 0);
+		__free_pages(page, ice_rx_pg_order(rx_ring));
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = 0;
+	bi->page_offset = ice_rx_offset(rx_ring);
 	page_ref_add(page, USHRT_MAX - 1);
 	bi->pagecnt_bias = USHRT_MAX;
 
@@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
 		/* sync the buffer for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
 						 bi->page_offset,
-						 ICE_RXBUF_2048,
+						 rx_ring->rx_buf_len,
 						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if buffer_addrs didn't change
@@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
  */
 static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 {
-#if (PAGE_SIZE >= 8192)
-	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-#endif
 	unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
 	struct page *page = rx_buf->page;
 
@@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 	if (unlikely((page_count(page) - pagecnt_bias) > 1))
 		return false;
 #else
-	if (rx_buf->page_offset > last_offset)
+#define ICE_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+	if (rx_buf->page_offset > ICE_LAST_OFFSET)
 		return false;
 #endif /* PAGE_SIZE < 8192) */
 
@@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 
 /**
  * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: buffer containing page to add
  * @skb: sk_buff to place the data into
  * @size: packet length from rx_desc
@@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
  * The function will then update the page offset.
  */
 static void
-ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
-		unsigned int size)
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		struct sk_buff *skb, unsigned int size)
 {
 #if (PAGE_SIZE >= 8192)
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
 #else
-	unsigned int truesize = ICE_RXBUF_2048;
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
 
 	if (!size)
@@ -679,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
 }
 
 /**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+	      struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points exactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
+#endif
+	/* build an skb around the page buffer */
+	skb = build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* must to record Rx queue, otherwise OS features such as
+	 * symmetric queue won't work
+	 */
+	skb_record_rx_queue(skb, rx_ring->q_index);
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	/* buffer is used by skb, update page_offset */
+	ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+	return skb;
+}
+
+/**
  * ice_construct_skb - Allocate skb and populate it
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
  *
  * This function allocates an skb. It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
@@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
  */
 static struct sk_buff *
 ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
-		  unsigned int size)
+		  struct xdp_buff *xdp)
 {
-	void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+	unsigned int size = xdp->data_end - xdp->data;
 	unsigned int headlen;
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
+	prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-	prefetch((u8 *)va + L1_CACHE_BYTES);
+	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
 #endif /* L1_CACHE_BYTES */
 
 	/* allocate a skb to store the frags */
@@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > ICE_RX_HDR_SIZE)
-		headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+		headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+							 sizeof(long)));
 
 	/* if we exhaust the linear part then add what is left as a frag */
 	size -= headlen;
@@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 #if (PAGE_SIZE >= 8192)
 		unsigned int truesize = SKB_DATA_ALIGN(size);
 #else
-		unsigned int truesize = ICE_RXBUF_2048;
+		unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
 		skb_add_rx_frag(skb, 0, rx_buf->page,
 				rx_buf->page_offset + headlen, size, truesize);
@@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
  *
- * This function will  clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
  */
 static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
 	if (!rx_buf)
 		return;
 
@@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 		rx_ring->rx_stats.page_reuse_count++;
 	} else {
 		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
-				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+				     ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     ICE_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 	}
 
@@ -770,227 +945,31 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 }
 
 /**
- * ice_cleanup_headers - Correct empty headers
- * @skb: pointer to current skb being fixed
- *
- * Also address the case where we are pulling data in on pages only
- * and as such no data is present in the skb header.
- *
- * In addition if skb is not at least 60 bytes we need to pad it so that
- * it is large enough to qualify as a valid Ethernet frame.
- *
- * Returns true if an error was encountered and skb was freed.
- */
-static bool ice_cleanup_headers(struct sk_buff *skb)
-{
-	/* if eth_skb_pad returns an error the skb was freed */
-	if (eth_skb_pad(skb))
-		return true;
-
-	return false;
-}
-
-/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
-	return !!(rx_desc->wb.status_error0 &
-		  cpu_to_le16(stat_err_bits));
-}
-
-/**
  * ice_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
  * @skb: Current socket buffer containing buffer in progress
  *
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
  */
 static bool
 ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
 	       struct sk_buff *skb)
 {
-	u32 ntc = rx_ring->next_to_clean + 1;
-
-	/* fetch, update, and store next to clean */
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-
-	prefetch(ICE_RX_DESC(rx_ring, ntc));
-
 	/* if we are the last buffer then there is nothing else to do */
 #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
 	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
 		return false;
 
 	/* place skb in next buffer to be received */
-	rx_ring->rx_buf[ntc].skb = skb;
+	rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
 	rx_ring->rx_stats.non_eop_descs++;
 
 	return true;
 }
 
 /**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
-	return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-	    struct sk_buff *skb, u8 rx_ptype)
-{
-	struct ice_32b_rx_flex_desc_nic *nic_mdid;
-	u32 hash;
-
-	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
-		return;
-
-	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
-		return;
-
-	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
-	hash = le32_to_cpu(nic_mdid->rss_hash);
-	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
-	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
-	struct ice_rx_ptype_decoded decoded;
-	u32 rx_error, rx_status;
-	bool ipv4, ipv6;
-
-	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
-	rx_error = rx_status;
-
-	decoded = ice_decode_rx_desc_ptype(ptype);
-
-	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
-	skb->ip_summed = CHECKSUM_NONE;
-	skb_checksum_none_assert(skb);
-
-	/* check if Rx checksum is enabled */
-	if (!(ring->netdev->features & NETIF_F_RXCSUM))
-		return;
-
-	/* check if HW has decoded the packet and checksum */
-	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
-		return;
-
-	if (!(decoded.known && decoded.outer_ip))
-		return;
-
-	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
-	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
-				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
-		goto checksum_fail;
-	else if (ipv6 && (rx_status &
-		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
-		goto checksum_fail;
-
-	/* check for L4 errors and handle packets that were not able to be
-	 * checksummed due to arrival speed
-	 */
-	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
-		goto checksum_fail;
-
-	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
-	switch (decoded.inner_prot) {
-	case ICE_RX_PTYPE_INNER_PROT_TCP:
-	case ICE_RX_PTYPE_INNER_PROT_UDP:
-	case ICE_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	default:
-		break;
-	}
-	return;
-
-checksum_fail:
-	ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
-		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb, u8 ptype)
-{
-	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
-	/* modifies the skb - consumes the enet header */
-	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
-	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
-	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-	    (vlan_tag & VLAN_VID_MASK))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-	napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
-/**
  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: Rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
@@ -1006,8 +985,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	struct bpf_prog *xdp_prog = NULL;
+	struct xdp_buff xdp;
 	bool failure;
 
+	xdp.rxq = &rx_ring->xdp_rxq;
+
 	/* start the loop to process Rx packets bounded by 'budget' */
 	while (likely(total_rx_pkts < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1026,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		/* retrieve a buffer from the ring */
 		rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
 
+		if (!size) {
+			xdp.data = NULL;
+			xdp.data_end = NULL;
+			xdp.data_hard_start = NULL;
+			xdp.data_meta = NULL;
+			goto construct_skb;
+		}
+
+		xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+		xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+		xdp.data_meta = xdp.data;
+		xdp.data_end = xdp.data + size;
+
+		rcu_read_lock();
+		xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+		if (!xdp_prog) {
+			rcu_read_unlock();
+			goto construct_skb;
+		}
+
+		xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+		rcu_read_unlock();
+		if (!xdp_res)
+			goto construct_skb;
+		if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+			truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+			truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
+						  size);
+#endif
+			xdp_xmit |= xdp_res;
+			ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+		} else {
+			rx_buf->pagecnt_bias++;
+		}
+		total_rx_bytes += size;
+		total_rx_pkts++;
+
+		cleaned_count++;
+		ice_put_rx_buf(rx_ring, rx_buf);
+		continue;
+construct_skb:
 		if (skb)
-			ice_add_rx_frag(rx_buf, skb, size);
+			ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+		else if (ice_ring_uses_build_skb(rx_ring))
+			skb = ice_build_skb(rx_ring, rx_buf, &xdp);
 		else
-			skb = ice_construct_skb(rx_ring, rx_buf, size);
+			skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -1072,10 +1103,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		if (ice_test_staterr(rx_desc, stat_err_bits))
 			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
 
-		/* correct empty headers and pad skb if needed (to make valid
-		 * ethernet frame
-		 */
-		if (ice_cleanup_headers(skb)) {
+		/* pad the skb if needed, to make a valid ethernet frame */
+		if (eth_skb_pad(skb)) {
 			skb = NULL;
 			continue;
 		}
@@ -1099,13 +1128,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 	/* return up to cleaned_count buffers to hardware */
 	failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
 
-	/* update queue and vector specific stats */
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->stats.pkts += total_rx_pkts;
-	rx_ring->stats.bytes += total_rx_bytes;
-	u64_stats_update_end(&rx_ring->syncp);
-	rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
-	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+	if (xdp_prog)
+		ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+	ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
 
 	/* guarantee a trip back through this routine if there was a failure */
 	return failure ? budget : (int)total_rx_pkts;
@@ -1483,9 +1509,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
-	ice_for_each_ring(ring, q_vector->tx)
-		if (!ice_clean_tx_irq(ring, budget))
+	ice_for_each_ring(ring, q_vector->tx) {
+		bool wd = ring->xsk_umem ?
+			  ice_clean_tx_irq_zc(ring, budget) :
+			  ice_clean_tx_irq(ring, budget);
+
+		if (!wd)
 			clean_complete = false;
+	}
 
 	/* Handle case where we are called by netpoll with a budget of 0 */
 	if (unlikely(budget <= 0))
@@ -1505,7 +1536,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	ice_for_each_ring(ring, q_vector->rx) {
 		int cleaned;
 
-		cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+		/* A dedicated path for zero-copy allows making a single
+		 * comparison in the irq context instead of many inside the
+		 * ice_clean_rx_irq function and makes the codebase cleaner.
+		 */
+		cleaned = ring->xsk_umem ?
+			  ice_clean_rx_irq_zc(ring, budget_per_ring) :
+			  ice_clean_rx_irq(ring, budget_per_ring);
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
 		if (cleaned >= budget_per_ring)
@@ -1527,17 +1564,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	return min_t(int, work_done, budget - 1);
 }
 
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
-	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
-			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
-			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
-			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
-			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
-}
-
 /**
  * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
  * @tx_ring: the ring to be checked
@@ -1689,9 +1715,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 		i = 0;
 
 	/* write last descriptor with RS and EOP bits */
-	td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
-	tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag);
+	td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+	tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+						  td_tag);
 
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 94a9280193e2..a84cc0e6dd27 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,8 +4,12 @@
 #ifndef _ICE_TXRX_H_
 #define _ICE_TXRX_H_
 
+#include "ice_type.h"
+
 #define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_3072		3072
 #define ICE_RXBUF_2048		2048
+#define ICE_RXBUF_1536		1536
 #define ICE_MAX_CHAINED_RX_BUFS	5
 #define ICE_MAX_BUF_TXD		8
 #define ICE_MIN_TX_LEN		17
@@ -22,6 +26,71 @@
 #define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG	128
 
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room.  From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *       up negative.  In these cases we should fall back to the legacy
+ *       receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+	int half_page_size;
+
+	half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (ICE_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = ICE_RXBUF_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
 /* We are assuming that the cache line is always 64 Bytes here for ice.
  * In order to make sure that is a correct assumption there is a check in probe
  * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -49,12 +118,24 @@
 #define ICE_TX_FLAGS_VLAN_PR_S	29
 #define ICE_TX_FLAGS_VLAN_S	16
 
+#define ICE_XDP_PASS		0
+#define ICE_XDP_CONSUMED	BIT(0)
+#define ICE_XDP_TX		BIT(1)
+#define ICE_XDP_REDIR		BIT(2)
+
 #define ICE_RX_DMA_ATTR \
 	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
+#define ICE_ETH_PKT_HDR_PAD	(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
 struct ice_tx_buf {
 	struct ice_tx_desc *next_to_watch;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		void *raw_buf; /* used for XDP */
+	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
@@ -76,9 +157,17 @@ struct ice_tx_offload_params {
 struct ice_rx_buf {
 	struct sk_buff *skb;
 	dma_addr_t dma;
-	struct page *page;
-	unsigned int page_offset;
-	u16 pagecnt_bias;
+	union {
+		struct {
+			struct page *page;
+			unsigned int page_offset;
+			u16 pagecnt_bias;
+		};
+		struct {
+			void *addr;
+			u64 handle;
+		};
+	};
 };
 
 struct ice_q_stats {
@@ -198,18 +287,44 @@ struct ice_ring {
 	};
 
 	struct rcu_head rcu;		/* to avoid race on free */
+	struct bpf_prog *xdp_prog;
+	struct xdp_umem *xsk_umem;
+	struct zero_copy_allocator zca;
+	/* CL3 - 3rd cacheline starts here */
+	struct xdp_rxq_info xdp_rxq;
 	/* CLX - the below items are only accessed infrequently and should be
 	 * in their own cache line if possible
 	 */
+#define ICE_TX_FLAGS_RING_XDP		BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB	BIT(1)
+	u8 flags;
 	dma_addr_t dma;			/* physical address of ring */
 	unsigned int size;		/* length of descriptor ring in bytes */
 	u32 txq_teid;			/* Added Tx queue TEID */
 	u16 rx_buf_len;
-#ifdef CONFIG_DCB
 	u8 dcb_tc;			/* Traffic class of ring */
-#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
 struct ice_ring_container {
 	/* head of linked-list of rings */
 	struct ice_ring *ring;
@@ -230,6 +345,19 @@ struct ice_ring_container {
 #define ice_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos; pos = pos->next)
 
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
 bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000000..35bbc4ff603c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+	u16 prev_ntu = rx_ring->next_to_use;
+
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* QRX_TAIL will be updated with any tail value, but hardware ignores
+	 * the lower 3 bits. This makes it so we only bump tail on meaningful
+	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+	 * the budget depending on the current traffic load.
+	 */
+	val &= ~0x7;
+	if (prev_ntu != val) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch. (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(val, rx_ring->tail);
+	}
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u8 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+
+	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_error = rx_status;
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+	else if (ipv6 && (rx_status &
+		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK))
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+	u16 i = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+
+	if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+		xdp_ring->tx_stats.tx_busy++;
+		return ICE_XDP_CONSUMED;
+	}
+
+	dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(xdp_ring->dev, dma))
+		return ICE_XDP_CONSUMED;
+
+	tx_buf = &xdp_ring->tx_buf[i];
+	tx_buf->bytecount = size;
+	tx_buf->gso_segs = 1;
+	tx_buf->raw_buf = data;
+
+	/* record length, and DMA address */
+	dma_unmap_len_set(tx_buf, len, size);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc = ICE_TX_DESC(xdp_ring, i);
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+						  size, 0);
+
+	/* Make certain all of the status bits have been updated
+	 * before next_to_watch is written.
+	 */
+	smp_wmb();
+
+	i++;
+	if (i == xdp_ring->count)
+		i = 0;
+
+	tx_buf->next_to_watch = tx_desc;
+	xdp_ring->next_to_use = i;
+
+	return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+	if (unlikely(!xdpf))
+		return ICE_XDP_CONSUMED;
+
+	return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+	if (xdp_res & ICE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & ICE_XDP_TX) {
+		struct ice_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+		ice_xdp_ring_update_tail(xdp_ring);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000000..ba9164dad9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+	return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 6667d17a4206..eba8b04b8cbd 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -19,6 +19,17 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 	return test_bit(tc, &bitmap);
 }
 
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+	return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+	return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+		((((N) + (R) - 1) / (R)) * (R)));
+}
+
 /* Driver always calls main vsi_handle first */
 #define ICE_MAIN_VSI_HANDLE		0
 
@@ -35,6 +46,8 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 #define ICE_DBG_PKG		BIT_ULL(16)
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_DESC		BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF	BIT_ULL(26)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
 #define ICE_DBG_USER		BIT_ULL(31)
 
@@ -272,10 +285,56 @@ enum ice_agg_type {
 	ICE_AGG_TYPE_QG
 };
 
+/* Rate limit types */
+enum ice_rl_type {
+	ICE_UNKNOWN_BW = 0,
+	ICE_MIN_BW,		/* for CIR profile */
+	ICE_MAX_BW,		/* for EIR profile */
+	ICE_SHARED_BW		/* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW		500		/* in Kbps */
+#define ICE_SCHED_MAX_BW		100000000	/* in Kbps */
+#define ICE_SCHED_DFLT_BW		0xFFFFFFFF	/* unlimited */
 #define ICE_SCHED_DFLT_RL_PROF_ID	0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID	0xFFFF
 #define ICE_SCHED_DFLT_BW_WT		1
+#define ICE_SCHED_INVAL_PROF_ID		0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE	(15 * 1024)	/* in bytes (15k) */
 
-/* VSI type list entry to locate corresponding VSI/ag nodes */
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+	ICE_BW_TYPE_PRIO,
+	ICE_BW_TYPE_CIR,
+	ICE_BW_TYPE_CIR_WT,
+	ICE_BW_TYPE_EIR,
+	ICE_BW_TYPE_EIR_WT,
+	ICE_BW_TYPE_SHARED,
+	ICE_BW_TYPE_CNT		/* This must be last */
+};
+
+struct ice_bw {
+	u32 bw;
+	u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+	DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+	u8 generic;
+	struct ice_bw cir_bw;
+	struct ice_bw eir_bw;
+	u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+	u16  q_handle;
+	u32  q_teid;
+	/* bw_t_info saves queue BW information */
+	struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
 struct ice_sched_vsi_info {
 	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
 	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
@@ -364,6 +423,8 @@ struct ice_port_info {
 	struct mutex sched_lock;	/* protect access to TXSched tree */
 	struct ice_sched_node *
 		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	/* List contain profile ID(s) and other params per layer */
+	struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
 	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
 	/* DCBX info */
 	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
@@ -415,6 +476,8 @@ struct ice_hw {
 
 	u8 pf_id;		/* device profile info */
 
+	u16 max_burst_size;	/* driver sets this value */
+
 	/* Tx Scheduler values */
 	u16 num_tx_sched_layers;
 	u16 num_tx_sched_phys_layers;
@@ -555,6 +618,8 @@ struct ice_hw_port_stats {
 };
 
 /* Checksum and Shadow RAM pointers */
+#define ICE_SR_BOOT_CFG_PTR		0x132
+#define ICE_NVM_OEM_VER_OFF		0x02
 #define ICE_SR_NVM_DEV_STARTER_VER	0x18
 #define ICE_SR_NVM_EETRACK_LO		0x2D
 #define ICE_SR_NVM_EETRACK_HI		0x2E
@@ -568,6 +633,7 @@ struct ice_hw_port_stats {
 #define ICE_OEM_VER_BUILD_MASK		(0xffff << ICE_OEM_VER_BUILD_SHIFT)
 #define ICE_OEM_VER_SHIFT		24
 #define ICE_OEM_VER_MASK		(0xff << ICE_OEM_VER_SHIFT)
+#define ICE_SR_PFA_PTR			0x40
 #define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
 #define ICE_SR_WORDS_IN_1KB		512
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b45797f39b2f..2ac83ad3d1a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 /**
@@ -389,7 +390,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 	 * by the time we get here.
 	 */
 	if (!is_pfr)
-		wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
+		wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
 
 	/* In the case of a VFLR, the HW has already reset the VF and we
 	 * just need to clean up, so don't hit the VFRTRIG register.
@@ -1151,6 +1152,25 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 }
 
 /**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * Returns true if the PF or VF is disabled, false otherwise.
+ */
+static bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	/* If the PF has been disabled, there is no need resetting VF until
+	 * PF is active again. Similarly, if the VF has been disabled, this
+	 * means something else is resetting the VF, so we shouldn't continue.
+	 * Otherwise, set disable VF state bit for actual reset, and continue.
+	 */
+	return (test_bit(__ICE_VF_DIS, pf->state) ||
+		test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
  * ice_reset_vf - Reset a particular VF
  * @vf: pointer to the VF structure
  * @is_vflr: true if VFLR was issued, false if not
@@ -1167,19 +1187,15 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	u32 reg;
 	int i;
 
-	/* If the PF has been disabled, there is no need resetting VF until
-	 * PF is active again.
-	 */
-	if (test_bit(__ICE_VF_DIS, pf->state))
-		return false;
-
-	/* If the VF has been disabled, this means something else is
-	 * resetting the VF, so we shouldn't continue. Otherwise, set
-	 * disable VF state bit for actual reset, and continue.
-	 */
-	if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
-		return false;
+	if (ice_is_vf_disabled(vf)) {
+		dev_dbg(&pf->pdev->dev,
+			"VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+			 vf->vf_id);
+		return true;
+	}
 
+	/* Set VF disable bit state here, before triggering reset */
+	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
 	ice_trigger_vf_reset(vf, is_vflr, false);
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
@@ -1407,7 +1423,7 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 
 	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
 		dev_err(dev, "This device is not capable of SR-IOV\n");
-		return -ENODEV;
+		return -EOPNOTSUPP;
 	}
 
 	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
@@ -1495,12 +1511,10 @@ void ice_process_vflr_event(struct ice_pf *pf)
 }
 
 /**
- * ice_vc_dis_vf - Disable a given VF via SW reset
+ * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF
  * @vf: pointer to the VF info
- *
- * Disable the VF through a SW reset
  */
-static void ice_vc_dis_vf(struct ice_vf *vf)
+static void ice_vc_reset_vf(struct ice_vf *vf)
 {
 	ice_vc_notify_vf_reset(vf);
 	ice_reset_vf(vf, false);
@@ -2159,9 +2173,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 
 		vector_id = map->vector_id;
 		vsi_id = map->vsi_id;
-		/* validate msg params */
-		if (!(vector_id < pf->hw.func_caps.common_cap
-		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		/* vector_id is always 0-based for each VF, and can never be
+		 * larger than or equal to the max allowed interrupts per VF
+		 */
+		if (!(vector_id < ICE_MAX_INTR_PER_VF) ||
+		    !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
 		    (!vector_id && (map->rxq_map || map->txq_map))) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
@@ -2540,7 +2556,7 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
 	} else {
 		/* request is successful, then reset VF */
 		vf->num_req_qs = req_queues;
-		ice_vc_dis_vf(vf);
+		ice_vc_reset_vf(vf);
 		dev_info(&pf->pdev->dev,
 			 "VF %d granted request of %u queues.\n",
 			 vf->vf_id, req_queues);
@@ -3124,6 +3140,23 @@ out:
 }
 
 /**
+ * ice_wait_on_vf_reset
+ * @vf: The VF being resseting
+ *
+ * Poll to make sure a given VF is ready after reset
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+	int i;
+
+	for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) {
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+			break;
+		msleep(20);
+	}
+}
+
+/**
  * ice_set_vf_mac
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -3146,6 +3179,15 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	}
 
 	vf = &pf->vf[vf_id];
+	/* Don't set MAC on disabled VF */
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	/* In case VF is in reset mode, wait until it is completed. Depending
+	 * on factors like queue disabling routine, this could take ~250ms
+	 */
+	ice_wait_on_vf_reset(vf);
+
 	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
 		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
 		return -EBUSY;
@@ -3167,7 +3209,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		    "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
 		    vf_id, mac);
 
-	ice_vc_dis_vf(vf);
+	ice_vc_reset_vf(vf);
 	return ret;
 }
 
@@ -3193,6 +3235,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
 	}
 
 	vf = &pf->vf[vf_id];
+	/* Don't set Trusted Mode on disabled VF */
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	/* In case VF is in reset mode, wait until it is completed. Depending
+	 * on factors like queue disabling routine, this could take ~250ms
+	 */
+	ice_wait_on_vf_reset(vf);
+
 	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
 		return -EBUSY;
@@ -3203,7 +3254,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
 		return 0;
 
 	vf->trusted = trusted;
-	ice_vc_dis_vf(vf);
+	ice_vc_reset_vf(vf);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, trusted ? "" : "un");
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0d9880c8bba3..2e867ad2e81d 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -38,6 +38,7 @@
 #define ICE_MAX_POLICY_INTR_PER_VF	33
 #define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
 #define ICE_DFLT_INTR_PER_VF		(ICE_DFLT_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_WAIT		15
 
 /* Specific VF states */
 enum ice_vf_states {
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000000..fcffad0069d6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
+	memset(&vsi->tx_rings[q_idx]->stats, 0,
+	       sizeof(vsi->tx_rings[q_idx]->stats));
+	if (ice_is_xdp_ena_vsi(vsi))
+		memset(&vsi->xdp_rings[q_idx]->stats, 0,
+		       sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+		     bool enable)
+{
+	if (!vsi->netdev || !q_vector)
+		return;
+
+	if (enable)
+		napi_enable(&q_vector->napi);
+	else
+		napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+		 struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u16 reg;
+	u32 val;
+
+	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+	 * here only QINT_RQCTL
+	 */
+	reg = rx_ring->reg_idx;
+	val = rd32(hw, QINT_RQCTL(reg));
+	val &= ~QINT_RQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_RQCTL(reg), val);
+
+	if (q_vector) {
+		u16 v_idx = q_vector->v_idx;
+
+		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+		ice_flush(hw);
+		synchronize_irq(pf->msix_entries[v_idx + base].vector);
+	}
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	u16 reg_idx = q_vector->reg_idx;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ring *ring;
+
+	ice_cfg_itr(hw, q_vector);
+
+	wr32(hw, GLINT_RATE(reg_idx),
+	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->tx.itr_idx);
+
+	ice_for_each_ring(ring, q_vector->rx)
+		ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->rx.itr_idx);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_txq_meta txq_meta = { };
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	int timeout = 50;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+	if (err)
+		return err;
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(&txq_meta, 0, sizeof(txq_meta));
+		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+					   &txq_meta);
+		if (err)
+			return err;
+	}
+	err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+	if (err)
+		return err;
+
+	ice_qvec_toggle_napi(vsi, q_vector, false);
+	ice_qp_clean_rings(vsi, q_idx);
+	ice_qp_reset_stats(vsi, q_idx);
+
+	return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+	if (err)
+		goto free_buf;
+
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(qg_buf, 0, sizeof(*qg_buf));
+		qg_buf->num_txqs = 1;
+		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+		if (err)
+			goto free_buf;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+	}
+
+	err = ice_setup_rx_ctx(rx_ring);
+	if (err)
+		goto free_buf;
+
+	ice_qvec_cfg_msix(vsi, q_vector);
+
+	err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+	if (err)
+		goto free_buf;
+
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	ice_qvec_toggle_napi(vsi, q_vector, true);
+	ice_qvec_ena_irq(vsi, q_vector);
+
+	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+	if (vsi->xsk_umems)
+		return 0;
+
+	vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+				 GFP_KERNEL);
+
+	if (!vsi->xsk_umems) {
+		vsi->num_xsk_umems = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	int err;
+
+	err = ice_xsk_alloc_umems(vsi);
+	if (err)
+		return err;
+
+	vsi->xsk_umems[qid] = umem;
+	vsi->num_xsk_umems_used++;
+
+	return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+	vsi->xsk_umems[qid] = NULL;
+	vsi->num_xsk_umems_used--;
+
+	if (vsi->num_xsk_umems_used == 0) {
+		kfree(vsi->xsk_umems);
+		vsi->xsk_umems = NULL;
+		vsi->num_xsk_umems = 0;
+	}
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i;
+
+	dev = &pf->pdev->dev;
+	for (i = 0; i < umem->npgs; i++) {
+		dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+						    PAGE_SIZE,
+						    DMA_BIDIRECTIONAL,
+						    ICE_RX_DMA_ATTR);
+		if (dma_mapping_error(dev, dma)) {
+			dev_dbg(dev,
+				"XSK UMEM DMA mapping error on page num %d", i);
+			goto out_unmap;
+		}
+
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+out_unmap:
+	for (; i > 0; i--) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i;
+
+	dev = &pf->pdev->dev;
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+		umem->pages[i].dma = 0;
+	}
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+	if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+	    !vsi->xsk_umems[qid])
+		return -EINVAL;
+
+	ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+	ice_xsk_remove_umem(vsi, qid);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	struct xdp_umem_fq_reuse *reuseq;
+	int err;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+	if (qid >= vsi->num_xsk_umems)
+		return -EINVAL;
+
+	if (vsi->xsk_umems && vsi->xsk_umems[qid])
+		return -EBUSY;
+
+	reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+	if (!reuseq)
+		return -ENOMEM;
+
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	err = ice_xsk_umem_dma_map(vsi, umem);
+	if (err)
+		return err;
+
+	err = ice_xsk_add_umem(vsi, umem, qid);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	bool if_running, umem_present = !!umem;
+	int ret = 0, umem_failure = 0;
+
+	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+	if (if_running) {
+		ret = ice_qp_dis(vsi, qid);
+		if (ret) {
+			netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+			goto xsk_umem_if_up;
+		}
+	}
+
+	umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+				      ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+	if (if_running) {
+		ret = ice_qp_ena(vsi, qid);
+		if (!ret && umem_present)
+			napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+		else if (ret)
+			netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+	}
+
+	if (umem_failure) {
+		netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+			   umem_present ? "en" : "dis", umem_failure);
+		return umem_failure;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+	struct ice_rx_buf *rx_buf;
+	struct ice_ring *rx_ring;
+	struct xdp_umem *umem;
+	u64 hr, mask;
+	u16 nta;
+
+	rx_ring = container_of(zca, struct ice_ring, zca);
+	umem = rx_ring->xsk_umem;
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	mask = umem->chunk_mask;
+
+	nta = rx_ring->next_to_alloc;
+	rx_buf = &rx_ring->rx_buf[nta];
+
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	handle &= mask;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += hr;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += hr;
+
+	rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	void *addr = rx_buf->addr;
+	u64 handle, hr;
+
+	if (addr) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	if (!xsk_umem_peek_addr(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += hr;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += hr;
+
+	rx_buf->handle = handle + umem->headroom;
+
+	xsk_umem_discard_addr(umem);
+	return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	u64 handle, headroom;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	handle &= umem->chunk_mask;
+	headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += headroom;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += headroom;
+
+	rx_buf->handle = handle + umem->headroom;
+
+	xsk_umem_discard_addr_rq(umem);
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+		     bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *rx_buf;
+	bool ret = false;
+
+	if (!count)
+		return false;
+
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	rx_buf = &rx_ring->rx_buf[ntu];
+
+	do {
+		if (!alloc(rx_ring, rx_buf)) {
+			ret = true;
+			break;
+		}
+
+		dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+						 rx_ring->rx_buf_len,
+						 DMA_BIDIRECTIONAL);
+
+		rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+		rx_desc->wb.status_error0 = 0;
+
+		rx_desc++;
+		rx_buf++;
+		ntu++;
+
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			rx_buf = rx_ring->rx_buf;
+			ntu = 0;
+		}
+	} while (--count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+	return ice_alloc_rx_bufs_zc(rx_ring, count,
+				    ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+	return ice_alloc_rx_bufs_zc(rx_ring, count,
+				    ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+	int ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+	struct ice_rx_buf *rx_buf;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+				      size, DMA_BIDIRECTIONAL);
+
+	return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+	unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+	u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta++];
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	new_buf->dma = old_buf->dma & mask;
+	new_buf->dma += hr;
+
+	new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+	new_buf->addr += hr;
+
+	new_buf->handle = old_buf->handle & mask;
+	new_buf->handle += rx_ring->xsk_umem->headroom;
+
+	old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		     struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	unsigned int datasize_hard = xdp->data_end -
+				     xdp->data_hard_start;
+	struct sk_buff *skb;
+
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+	return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+	int err, result = ICE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring *xdp_ring;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	if (!xdp_prog) {
+		rcu_read_unlock();
+		return ICE_XDP_PASS;
+	}
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	xdp->handle += xdp->data - xdp->data_hard_start;
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough -- not supported action */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping frame */
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
+	}
+
+	rcu_read_unlock();
+	return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_xmit = 0;
+	struct xdp_buff xdp;
+	bool failure = 0;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		unsigned int size, xdp_res = 0;
+		struct ice_rx_buf *rx_buf;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+
+		if (cleaned_count >= ICE_RX_BUF_WRITE) {
+			failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+							     cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+				   ICE_RX_FLX_DESC_PKT_LEN_M;
+		if (!size)
+			break;
+
+		rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+		if (!rx_buf->addr)
+			break;
+
+		xdp.data = rx_buf->addr;
+		xdp.data_meta = xdp.data;
+		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+		xdp.data_end = xdp.data + size;
+		xdp.handle = rx_buf->handle;
+
+		xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+		if (xdp_res) {
+			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				rx_buf->addr = NULL;
+			} else {
+				ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+			}
+
+			total_rx_bytes += size;
+			total_rx_packets++;
+			cleaned_count++;
+
+			ice_bump_ntc(rx_ring);
+			continue;
+		}
+
+		/* XDP_PASS path */
+		skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			break;
+		}
+
+		cleaned_count++;
+		ice_bump_ntc(rx_ring);
+
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+				       ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+	}
+
+	ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+	return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+	struct ice_tx_desc *tx_desc = NULL;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+
+	while (likely(budget-- > 0)) {
+		struct ice_tx_buf *tx_buf;
+
+		if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+			xdp_ring->tx_stats.tx_busy++;
+			work_done = false;
+			break;
+		}
+
+		tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+			break;
+
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+					   DMA_BIDIRECTIONAL);
+
+		tx_buf->bytecount = desc.len;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+							  0, desc.len, 0);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		ice_xdp_ring_update_tail(xdp_ring);
+		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+	}
+
+	return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+	int total_packets = 0, total_bytes = 0;
+	s16 ntc = xdp_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	bool xmit_done = true;
+	u32 xsk_frames = 0;
+
+	tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+	tx_buf = &xdp_ring->tx_buf[ntc];
+	ntc -= xdp_ring->count;
+
+	do {
+		if (!(tx_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		total_bytes += tx_buf->bytecount;
+		total_packets++;
+
+		if (tx_buf->raw_buf) {
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+			tx_buf->raw_buf = NULL;
+		} else {
+			xsk_frames++;
+		}
+
+		tx_desc->cmd_type_offset_bsz = 0;
+		tx_buf++;
+		tx_desc++;
+		ntc++;
+
+		if (unlikely(!ntc)) {
+			ntc -= xdp_ring->count;
+			tx_buf = xdp_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(xdp_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+	} while (likely(--budget));
+
+	ntc += xdp_ring->count;
+	xdp_ring->next_to_clean = ntc;
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+	ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+	xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+	return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+	       u32 __always_unused flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_q_vector *q_vector;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *ring;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return -ENXIO;
+
+	if (queue_id >= vsi->num_txq)
+		return -ENXIO;
+
+	if (!vsi->xdp_rings[queue_id]->xsk_umem)
+		return -ENXIO;
+
+	ring = vsi->xdp_rings[queue_id];
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	q_vector = ring->q_vector;
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->xsk_umems)
+		return false;
+
+	for (i = 0; i < vsi->num_xsk_umems; i++) {
+		if (vsi->xsk_umems[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	u16 i;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (!rx_buf->addr)
+			continue;
+
+		xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+		rx_buf->addr = NULL;
+	}
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+		if (tx_buf->raw_buf)
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+		else
+			xsk_frames++;
+
+		tx_buf->raw_buf = NULL;
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000000..3479e1de98fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+		   struct xdp_umem __always_unused *umem,
+		   u16 __always_unused qid)
+{
+	return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+	     unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+		    int __always_unused budget)
+{
+	return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+		    int __always_unused budget)
+{
+	return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+			  u16 __always_unused count)
+{
+	return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+	return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+	       u32 __always_unused queue_id, u32 __always_unused flags)
+{
+	return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index 6ad775b1a4c5..63ec253ac788 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -127,6 +127,7 @@ struct e1000_adv_tx_context_desc {
 };
 
 #define E1000_ADVTXD_MACLEN_SHIFT    9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000  /* L4 Packet TYPE of UDP */
 #define E1000_ADVTXD_TUCMD_IPV4    0x00000400  /* IP Packet Type: 1=IPv4 */
 #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800  /* L4 Packet TYPE of TCP */
 #define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ed7e667d7eb2..98346eb064d5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2518,6 +2518,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
 		return features & ~(NETIF_F_HW_CSUM |
 				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
 				    NETIF_F_HW_VLAN_CTAG_TX |
 				    NETIF_F_TSO |
 				    NETIF_F_TSO6);
@@ -2526,6 +2527,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(network_hdr_len >  IGB_MAX_NETWORK_HDR_LEN))
 		return features & ~(NETIF_F_HW_CSUM |
 				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
 				    NETIF_F_TSO |
 				    NETIF_F_TSO6);
 
@@ -3122,7 +3124,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			    NETIF_F_HW_CSUM;
 
 	if (hw->mac.type >= e1000_82576)
-		netdev->features |= NETIF_F_SCTP_CRC;
+		netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
 
 	if (hw->mac.type >= e1000_i350)
 		netdev->features |= NETIF_F_HW_TC;
@@ -5696,6 +5698,7 @@ static int igb_tso(struct igb_ring *tx_ring,
 	} ip;
 	union {
 		struct tcphdr *tcp;
+		struct udphdr *udp;
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
@@ -5715,7 +5718,8 @@ static int igb_tso(struct igb_ring *tx_ring,
 	l4.hdr = skb_checksum_start(skb);
 
 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
-	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+	type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+		      E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP;
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
@@ -5743,12 +5747,19 @@ static int igb_tso(struct igb_ring *tx_ring,
 	/* determine offset of inner transport header */
 	l4_offset = l4.hdr - skb->data;
 
-	/* compute length of segmentation header */
-	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
-
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+	if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+			(__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
 
 	/* update gso size and bytecount with header size */
 	first->gso_segs = skb_shinfo(skb)->gso_segs;
@@ -6225,7 +6236,6 @@ static void igb_get_stats64(struct net_device *netdev,
 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct pci_dev *pdev = adapter->pdev;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
 	/* adjust max frame to be at least the size of a standard frame */
@@ -6241,8 +6251,8 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 	if (netif_running(netdev))
 		igb_down(adapter);
 
-	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
-		 netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 0f2b68f4bb0f..6003dc3ff5fd 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2437,8 +2437,8 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu)
 		adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN +
 					 ETH_FCS_LEN;
 
-	dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
-		 netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 7e16345d836e..0868677d43ed 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -411,7 +411,6 @@ struct igc_adapter {
 	u32 tx_hwtstamp_timeouts;
 	u32 tx_hwtstamp_skipped;
 	u32 rx_hwtstamp_cleared;
-	u32 *shadow_vfta;
 
 	u32 rss_queues;
 	u32 rss_indir_tbl_init;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f3f2325fe567..f3788f0b95b4 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -282,7 +282,10 @@
 #define IGC_RCTL_BAM		0x00008000 /* broadcast enable */
 
 /* Receive Descriptor bit definitions */
-#define IGC_RXD_STAT_EOP	0x02    /* End of Packet */
+#define IGC_RXD_STAT_EOP	0x02	/* End of Packet */
+#define IGC_RXD_STAT_IXSM	0x04	/* Ignore checksum */
+#define IGC_RXD_STAT_UDPCS	0x10	/* UDP xsum calculated */
+#define IGC_RXD_STAT_TCPCS	0x20	/* TCP xsum calculated */
 
 #define IGC_RXDEXT_STATERR_CE		0x01000000
 #define IGC_RXDEXT_STATERR_SE		0x02000000
@@ -402,4 +405,7 @@
 #define IGC_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet Type of TCP */
 #define IGC_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 packet TYPE of SCTP */
 
+/* Maximum size of the MTA register table in all supported adapters */
+#define MAX_MTA_REG			128
+
 #endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index abb2d72911ff..20f710645746 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -91,6 +91,7 @@ struct igc_mac_info {
 	u16 mta_reg_count;
 	u16 uta_reg_count;
 
+	u32 mta_shadow[MAX_MTA_REG];
 	u16 rar_entry_count;
 
 	u8 forced_speed_duplex;
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 5eeb4c8caf4a..12aa6b5fcb5d 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -784,3 +784,107 @@ bool igc_enable_mng_pass_thru(struct igc_hw *hw)
 out:
 	return ret_val;
 }
+
+/**
+ *  igc_hash_mc_addr - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.  See
+ *  igc_mta_set()
+ **/
+static u32 igc_hash_mc_addr(struct igc_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 0;
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF)
+		bit_shift++;
+
+	/* The portion of the address that is used for the hash table
+	 * is determined by the mc_filter_type setting.
+	 * The algorithm is such that there is a total of 8 bits of shifting.
+	 * The bit_shift for a mc_filter_type of 0 represents the number of
+	 * left-shifts where the MSB of mc_addr[5] would still fall within
+	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
+	 * of 8 bits of shifting, then mc_addr[4] will shift right the
+	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+	 * cases are a variation of this algorithm...essentially raising the
+	 * number of bits to shift mc_addr[5] left, while still keeping the
+	 * 8-bit shifting total.
+	 *
+	 * For example, given the following Destination MAC Address and an
+	 * MTA register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+	 * we can see that the bit_shift for case 0 is 4.  These are the hash
+	 * values resulting from each mc_filter_type...
+	 * [0] [1] [2] [3] [4] [5]
+	 * 01  AA  00  12  34  56
+	 * LSB                 MSB
+	 *
+	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+	 */
+	switch (hw->mac.mc_filter_type) {
+	default:
+	case 0:
+		break;
+	case 1:
+		bit_shift += 1;
+		break;
+	case 2:
+		bit_shift += 2;
+		break;
+	case 3:
+		bit_shift += 4;
+		break;
+	}
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				  (((u16)mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ *  igc_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igc_update_mc_addr_list(struct igc_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count)
+{
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
+
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32)i < mc_addr_count; i++) {
+		hash_value = igc_hash_mc_addr(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit);
+		mc_addr_list += ETH_ALEN;
+	}
+
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		array_wr32(IGC_MTA, i, hw->mac.mta_shadow[i]);
+	wrfl();
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
index 782bc995badc..832cccec87cd 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.h
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h
@@ -29,6 +29,8 @@ s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
 				    u16 *duplex);
 
 bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+void igc_update_mc_addr_list(struct igc_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count);
 
 enum igc_mng_mode {
 	igc_mng_mode_none = 0,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 24888676f69b..9700527dd797 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -795,6 +795,44 @@ static int igc_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
+/**
+ *  igc_write_mc_addr_list - write multicast addresses to MTA
+ *  @netdev: network interface device structure
+ *
+ *  Writes multicast address list to the MTA hash table.
+ *  Returns: -ENOMEM on failure
+ *           0 on no addresses written
+ *           X on writing X addresses to MTA
+ **/
+static int igc_write_mc_addr_list(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	u8  *mta_list;
+	int i;
+
+	if (netdev_mc_empty(netdev)) {
+		/* nothing to program, so clear mc list */
+		igc_update_mc_addr_list(hw, NULL, 0);
+		return 0;
+	}
+
+	mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
+	if (!mta_list)
+		return -ENOMEM;
+
+	/* The shared function expects a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+	igc_update_mc_addr_list(hw, mta_list, i);
+	kfree(mta_list);
+
+	return netdev_mc_count(netdev);
+}
+
 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 			    struct igc_tx_buffer *first,
 			    u32 vlan_macip_lens, u32 type_tucmd,
@@ -1163,6 +1201,46 @@ static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
 	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
 }
 
+static void igc_rx_checksum(struct igc_ring *ring,
+			    union igc_adv_rx_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Ignore Checksum bit is set */
+	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
+		return;
+
+	/* Rx checksum disabled via ethtool */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (igc_test_staterr(rx_desc,
+			     IGC_RXDEXT_STATERR_TCPE |
+			     IGC_RXDEXT_STATERR_IPE)) {
+		/* work around errata with sctp packets where the TCPE aka
+		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+		 * packets (aka let the stack check the crc32c)
+		 */
+		if (!(skb->len == 60 &&
+		      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
+			u64_stats_update_begin(&ring->rx_syncp);
+			ring->rx_stats.csum_err++;
+			u64_stats_update_end(&ring->rx_syncp);
+		}
+		/* let the stack verify checksum errors */
+		return;
+	}
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
+				      IGC_RXD_STAT_UDPCS))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	dev_dbg(ring->dev, "cksum success: bits %08X\n",
+		le32_to_cpu(rx_desc->wb.upper.status_error));
+}
+
 static inline void igc_rx_hash(struct igc_ring *ring,
 			       union igc_adv_rx_desc *rx_desc,
 			       struct sk_buff *skb)
@@ -1189,6 +1267,8 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
 {
 	igc_rx_hash(rx_ring, rx_desc, skb);
 
+	igc_rx_checksum(rx_ring, rx_desc, skb);
+
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -2192,7 +2272,6 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct igc_adapter *adapter = netdev_priv(netdev);
-	struct pci_dev *pdev = adapter->pdev;
 
 	/* adjust max frame to be at least the size of a standard frame */
 	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
@@ -2207,8 +2286,8 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
 	if (netif_running(netdev))
 		igc_down(adapter);
 
-	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
-		 netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
@@ -2518,6 +2597,110 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
 					IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+			      const u8 *addr, const u8 queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for the first empty entry in the MAC table.
+	 * Do not touch entries at the end of the table reserved for the VF MAC
+	 * addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
+					       addr, 0))
+			continue;
+
+		ether_addr_copy(adapter->mac_table[i].addr, addr);
+		adapter->mac_table[i].queue = queue;
+		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
+
+		igc_rar_set_index(adapter, i);
+		return i;
+	}
+
+	return -ENOSPC;
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_del_mac_filter(struct igc_adapter *adapter,
+			      const u8 *addr, const u8 queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for matching entry in the MAC table based on given address
+	 * and queue. Do not touch entries at the end of the table reserved
+	 * for the VF MAC addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
+			continue;
+		if (adapter->mac_table[i].state != 0)
+			continue;
+		if (adapter->mac_table[i].queue != queue)
+			continue;
+		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+			continue;
+
+		/* When a filter for the default address is "deleted",
+		 * we return it to its initial configuration
+		 */
+		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
+			adapter->mac_table[i].state =
+				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+			adapter->mac_table[i].queue = 0;
+		} else {
+			adapter->mac_table[i].state = 0;
+			adapter->mac_table[i].queue = 0;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+		}
+
+		igc_rar_set_index(adapter, i);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+	return min_t(int, ret, 0);
+}
+
+static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+	return 0;
+}
+
 /**
  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
  * @netdev: network interface device structure
@@ -2529,6 +2712,44 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
  */
 static void igc_set_rx_mode(struct net_device *netdev)
 {
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= IGC_RCTL_MPE;
+		} else {
+			/* Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = igc_write_mc_addr_list(netdev);
+			if (count < 0)
+				rctl |= IGC_RCTL_MPE;
+		}
+	}
+
+	/* Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
+		rctl |= IGC_RCTL_UPE;
+
+	/* update state of unicast and multicast */
+	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
+	wr32(IGC_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
+		rlpml = IGC_MAX_FRAME_BUILD_SKB;
+#endif
+	wr32(IGC_RLPML, rlpml);
 }
 
 /**
@@ -3982,6 +4203,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
 	.ndo_start_xmit		= igc_xmit_frame,
+	.ndo_set_rx_mode	= igc_set_rx_mode,
 	.ndo_set_mac_address	= igc_set_mac,
 	.ndo_change_mtu		= igc_change_mtu,
 	.ndo_get_stats		= igc_get_stats,
@@ -4211,7 +4433,9 @@ static int igc_probe(struct pci_dev *pdev,
 		goto err_sw_init;
 
 	/* Add supported features to the features list*/
+	netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_HW_CSUM;
+	netdev->features |= NETIF_F_SCTP_CRC;
 
 	/* setup the private structure */
 	err = igc_sw_init(adapter);
@@ -4349,7 +4573,6 @@ static void igc_remove(struct pci_dev *pdev)
 	pci_release_mem_regions(pdev);
 
 	kfree(adapter->mac_table);
-	kfree(adapter->shadow_vfta);
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index cc3196ae5aea..fd9f5d41b594 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -832,9 +832,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 				int xdp_count, int xdp_idx,
 				int rxr_count, int rxr_idx)
 {
+	int node = dev_to_node(&adapter->pdev->dev);
 	struct ixgbe_q_vector *q_vector;
 	struct ixgbe_ring *ring;
-	int node = NUMA_NO_NODE;
 	int cpu = -1;
 	int ring_count;
 	u8 tcs = adapter->hw_tcs;
@@ -845,10 +845,8 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 	if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
 		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
 		if (rss_i > 1 && adapter->atr_sample_rate) {
-			if (cpu_online(v_idx)) {
-				cpu = v_idx;
-				node = cpu_to_node(cpu);
-			}
+			cpu = cpumask_local_spread(v_idx, node);
+			node = cpu_to_node(cpu);
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 91b3780ddb04..25c097cd8100 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6725,7 +6725,8 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 	    (new_mtu > ETH_DATA_LEN))
 		e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
 
-	e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
 
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
@@ -7945,6 +7946,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	} ip;
 	union {
 		struct tcphdr *tcp;
+		struct udphdr *udp;
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
@@ -7968,7 +7970,8 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	l4.hdr = skb_checksum_start(skb);
 
 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
-	type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
+	type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+		      IXGBE_ADVTXD_TUCMD_L4T_UDP : IXGBE_ADVTXD_TUCMD_L4T_TCP;
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
@@ -7998,12 +8001,20 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	/* determine offset of inner transport header */
 	l4_offset = l4.hdr - skb->data;
 
-	/* compute length of segmentation header */
-	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
-
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+	if (type_tucmd & IXGBE_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
 
 	/* update gso size and bytecount with header size */
 	first->gso_segs = skb_shinfo(skb)->gso_segs;
@@ -8639,7 +8650,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 	    adapter->ptp_clock) {
-		if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+		    !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
 					   &adapter->state)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
@@ -10189,6 +10201,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN))
 		return features & ~(NETIF_F_HW_CSUM |
 				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
 				    NETIF_F_HW_VLAN_CTAG_TX |
 				    NETIF_F_TSO |
 				    NETIF_F_TSO6);
@@ -10197,6 +10210,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(network_hdr_len >  IXGBE_MAX_NETWORK_HDR_LEN))
 		return features & ~(NETIF_F_HW_CSUM |
 				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
 				    NETIF_F_TSO |
 				    NETIF_F_TSO6);
 
@@ -10906,7 +10920,7 @@ skip_sriov:
 			    IXGBE_GSO_PARTIAL_FEATURES;
 
 	if (hw->mac.type >= ixgbe_mac_82599EB)
-		netdev->features |= NETIF_F_SCTP_CRC;
+		netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
 
 #ifdef CONFIG_IXGBE_IPSEC
 #define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index fb942167ee54..3d5caea096fb 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -61,6 +61,7 @@ config MVNETA
 	depends on ARCH_MVEBU || COMPILE_TEST
 	select MVMDIO
 	select PHYLINK
+	select PAGE_POOL
 	---help---
 	  This driver supports the network interface units in the
 	  Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 82ea55ae5053..d5b644131cff 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2959,15 +2959,16 @@ static void set_params(struct mv643xx_eth_private *mp,
 static int get_phy_mode(struct mv643xx_eth_private *mp)
 {
 	struct device *dev = mp->dev->dev.parent;
-	int iface = -1;
+	phy_interface_t iface;
+	int err;
 
 	if (dev->of_node)
-		iface = of_get_phy_mode(dev->of_node);
+		err = of_get_phy_mode(dev->of_node, &iface);
 
 	/* Historical default if unspecified. We could also read/write
 	 * the interface state in the PSC1
 	 */
-	if (iface < 0)
+	if (!dev->of_node || err)
 		iface = PHY_INTERFACE_MODE_GMII;
 	return iface;
 }
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e49820675c8c..274ac39c0f0f 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -37,6 +37,8 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/tso.h>
+#include <net/page_pool.h>
+#include <linux/bpf_trace.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -322,6 +324,13 @@
 	      ETH_HLEN + ETH_FCS_LEN,			     \
 	      cache_line_size())
 
+#define MVNETA_SKB_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+				 NET_IP_ALIGN)
+#define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
+			 MVNETA_SKB_HEADROOM))
+#define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
+#define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)
+
 #define IS_TSO_HEADER(txq, addr) \
 	((addr >= txq->tso_hdrs_phys) && \
 	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
@@ -346,6 +355,11 @@ struct mvneta_statistic {
 #define T_REG_64	64
 #define T_SW		1
 
+#define MVNETA_XDP_PASS		BIT(0)
+#define MVNETA_XDP_DROPPED	BIT(1)
+#define MVNETA_XDP_TX		BIT(2)
+#define MVNETA_XDP_REDIR	BIT(3)
+
 static const struct mvneta_statistic mvneta_statistics[] = {
 	{ 0x3000, T_REG_64, "good_octets_received", },
 	{ 0x3010, T_REG_32, "good_frames_received", },
@@ -425,6 +439,8 @@ struct mvneta_port {
 	u32 cause_rx_tx;
 	struct napi_struct napi;
 
+	struct bpf_prog *xdp_prog;
+
 	/* Core clock */
 	struct clk *clk;
 	/* AXI clock */
@@ -545,6 +561,20 @@ struct mvneta_rx_desc {
 };
 #endif
 
+enum mvneta_tx_buf_type {
+	MVNETA_TYPE_SKB,
+	MVNETA_TYPE_XDP_TX,
+	MVNETA_TYPE_XDP_NDO,
+};
+
+struct mvneta_tx_buf {
+	enum mvneta_tx_buf_type type;
+	union {
+		struct xdp_frame *xdpf;
+		struct sk_buff *skb;
+	};
+};
+
 struct mvneta_tx_queue {
 	/* Number of this TX queue, in the range 0-7 */
 	u8 id;
@@ -560,8 +590,8 @@ struct mvneta_tx_queue {
 	int tx_stop_threshold;
 	int tx_wake_threshold;
 
-	/* Array of transmitted skb */
-	struct sk_buff **tx_skb;
+	/* Array of transmitted buffers */
+	struct mvneta_tx_buf *buf;
 
 	/* Index of last TX DMA descriptor that was inserted */
 	int txq_put_index;
@@ -603,6 +633,10 @@ struct mvneta_rx_queue {
 	u32 pkts_coal;
 	u32 time_coal;
 
+	/* page_pool */
+	struct page_pool *page_pool;
+	struct xdp_rxq_info xdp_rxq;
+
 	/* Virtual address of the RX buffer */
 	void  **buf_virt_addr;
 
@@ -641,7 +675,6 @@ static int txq_number = 8;
 static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
-static int rx_header_size __read_mostly = 128;
 
 /* HW BM need that each port be identify by a unique ID */
 static int global_port_id;
@@ -1761,24 +1794,25 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
 	int i;
 
 	for (i = 0; i < num; i++) {
+		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
 		struct mvneta_tx_desc *tx_desc = txq->descs +
 			txq->txq_get_index;
-		struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];
-
-		if (skb) {
-			bytes_compl += skb->len;
-			pkts_compl++;
-		}
 
 		mvneta_txq_inc_get(txq);
 
-		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
+		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
+		    buf->type != MVNETA_TYPE_XDP_TX)
 			dma_unmap_single(pp->dev->dev.parent,
 					 tx_desc->buf_phys_addr,
 					 tx_desc->data_size, DMA_TO_DEVICE);
-		if (!skb)
-			continue;
-		dev_kfree_skb_any(skb);
+		if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
+			bytes_compl += buf->skb->len;
+			pkts_compl++;
+			dev_kfree_skb_any(buf->skb);
+		} else if (buf->type == MVNETA_TYPE_XDP_TX ||
+			   buf->type == MVNETA_TYPE_XDP_NDO) {
+			xdp_return_frame(buf->xdpf);
+		}
 	}
 
 	netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
@@ -1812,23 +1846,21 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
 			    struct mvneta_rx_queue *rxq,
 			    gfp_t gfp_mask)
 {
+	enum dma_data_direction dma_dir;
 	dma_addr_t phys_addr;
 	struct page *page;
 
-	page = __dev_alloc_page(gfp_mask);
+	page = page_pool_alloc_pages(rxq->page_pool,
+				     gfp_mask | __GFP_NOWARN);
 	if (!page)
 		return -ENOMEM;
 
-	/* map page for use */
-	phys_addr = dma_map_page(pp->dev->dev.parent, page, 0, PAGE_SIZE,
-				 DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
-		__free_page(page);
-		return -ENOMEM;
-	}
-
-	phys_addr += pp->rx_offset_correction;
+	phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
+	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+	dma_sync_single_for_device(pp->dev->dev.parent, phys_addr,
+				   MVNETA_MAX_RX_BUF_SIZE, dma_dir);
 	mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
+
 	return 0;
 }
 
@@ -1894,10 +1926,29 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 		if (!data || !(rx_desc->buf_phys_addr))
 			continue;
 
-		dma_unmap_page(pp->dev->dev.parent, rx_desc->buf_phys_addr,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
-		__free_page(data);
+		page_pool_put_page(rxq->page_pool, data, false);
+	}
+	if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
+		xdp_rxq_info_unreg(&rxq->xdp_rxq);
+	page_pool_destroy(rxq->page_pool);
+	rxq->page_pool = NULL;
+}
+
+static void
+mvneta_update_stats(struct mvneta_port *pp, u32 pkts,
+		    u32 len, bool tx)
+{
+	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+	u64_stats_update_begin(&stats->syncp);
+	if (tx) {
+		stats->tx_packets += pkts;
+		stats->tx_bytes += len;
+	} else {
+		stats->rx_packets += pkts;
+		stats->rx_bytes += len;
 	}
+	u64_stats_update_end(&stats->syncp);
 }
 
 static inline
@@ -1925,43 +1976,292 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
 	return i;
 }
 
+static int
+mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
+			struct xdp_frame *xdpf, bool dma_map)
+{
+	struct mvneta_tx_desc *tx_desc;
+	struct mvneta_tx_buf *buf;
+	dma_addr_t dma_addr;
+
+	if (txq->count >= txq->tx_stop_threshold)
+		return MVNETA_XDP_DROPPED;
+
+	tx_desc = mvneta_txq_next_desc_get(txq);
+
+	buf = &txq->buf[txq->txq_put_index];
+	if (dma_map) {
+		/* ndo_xdp_xmit */
+		dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
+					  xdpf->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
+			mvneta_txq_desc_put(txq);
+			return MVNETA_XDP_DROPPED;
+		}
+		buf->type = MVNETA_TYPE_XDP_NDO;
+	} else {
+		struct page *page = virt_to_page(xdpf->data);
+
+		dma_addr = page_pool_get_dma_addr(page) +
+			   sizeof(*xdpf) + xdpf->headroom;
+		dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
+					   xdpf->len, DMA_BIDIRECTIONAL);
+		buf->type = MVNETA_TYPE_XDP_TX;
+	}
+	buf->xdpf = xdpf;
+
+	tx_desc->command = MVNETA_TXD_FLZ_DESC;
+	tx_desc->buf_phys_addr = dma_addr;
+	tx_desc->data_size = xdpf->len;
+
+	mvneta_update_stats(pp, 1, xdpf->len, true);
+	mvneta_txq_inc_put(txq);
+	txq->pending++;
+	txq->count++;
+
+	return MVNETA_XDP_TX;
+}
+
+static int
+mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
+{
+	struct mvneta_tx_queue *txq;
+	struct netdev_queue *nq;
+	struct xdp_frame *xdpf;
+	int cpu;
+	u32 ret;
+
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return MVNETA_XDP_DROPPED;
+
+	cpu = smp_processor_id();
+	txq = &pp->txqs[cpu % txq_number];
+	nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+	__netif_tx_lock(nq, cpu);
+	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
+	if (ret == MVNETA_XDP_TX)
+		mvneta_txq_pend_desc_add(pp, txq, 0);
+	__netif_tx_unlock(nq);
+
+	return ret;
+}
+
+static int
+mvneta_xdp_xmit(struct net_device *dev, int num_frame,
+		struct xdp_frame **frames, u32 flags)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct mvneta_tx_queue *txq;
+	struct netdev_queue *nq;
+	int i, drops = 0;
+	u32 ret;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	txq = &pp->txqs[cpu % txq_number];
+	nq = netdev_get_tx_queue(pp->dev, txq->id);
+
+	__netif_tx_lock(nq, cpu);
+	for (i = 0; i < num_frame; i++) {
+		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
+		if (ret != MVNETA_XDP_TX) {
+			xdp_return_frame_rx_napi(frames[i]);
+			drops++;
+		}
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		mvneta_txq_pend_desc_add(pp, txq, 0);
+	__netif_tx_unlock(nq);
+
+	return num_frame - drops;
+}
+
+static int
+mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+	       struct bpf_prog *prog, struct xdp_buff *xdp)
+{
+	u32 ret, act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+		ret = MVNETA_XDP_PASS;
+		break;
+	case XDP_REDIRECT: {
+		int err;
+
+		err = xdp_do_redirect(pp->dev, xdp, prog);
+		if (err) {
+			ret = MVNETA_XDP_DROPPED;
+			xdp_return_buff(xdp);
+		} else {
+			ret = MVNETA_XDP_REDIR;
+		}
+		break;
+	}
+	case XDP_TX:
+		ret = mvneta_xdp_xmit_back(pp, xdp);
+		if (ret != MVNETA_XDP_TX)
+			xdp_return_buff(xdp);
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(pp->dev, prog, act);
+		/* fall through */
+	case XDP_DROP:
+		page_pool_recycle_direct(rxq->page_pool,
+					 virt_to_head_page(xdp->data));
+		ret = MVNETA_XDP_DROPPED;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+mvneta_swbm_rx_frame(struct mvneta_port *pp,
+		     struct mvneta_rx_desc *rx_desc,
+		     struct mvneta_rx_queue *rxq,
+		     struct xdp_buff *xdp,
+		     struct bpf_prog *xdp_prog,
+		     struct page *page, u32 *xdp_ret)
+{
+	unsigned char *data = page_address(page);
+	int data_len = -MVNETA_MH_SIZE, len;
+	struct net_device *dev = pp->dev;
+	enum dma_data_direction dma_dir;
+
+	if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
+		len = MVNETA_MAX_RX_BUF_SIZE;
+		data_len += len;
+	} else {
+		len = rx_desc->data_size;
+		data_len += len - ETH_FCS_LEN;
+	}
+
+	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+	dma_sync_single_for_cpu(dev->dev.parent,
+				rx_desc->buf_phys_addr,
+				len, dma_dir);
+
+	/* Prefetch header */
+	prefetch(data);
+
+	xdp->data_hard_start = data;
+	xdp->data = data + MVNETA_SKB_HEADROOM + MVNETA_MH_SIZE;
+	xdp->data_end = xdp->data + data_len;
+	xdp_set_data_meta_invalid(xdp);
+
+	if (xdp_prog) {
+		u32 ret;
+
+		ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp);
+		if (ret != MVNETA_XDP_PASS) {
+			mvneta_update_stats(pp, 1,
+					    xdp->data_end - xdp->data,
+					    false);
+			rx_desc->buf_phys_addr = 0;
+			*xdp_ret |= ret;
+			return ret;
+		}
+	}
+
+	rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
+	if (unlikely(!rxq->skb)) {
+		netdev_err(dev,
+			   "Can't allocate skb on queue %d\n",
+			   rxq->id);
+		dev->stats.rx_dropped++;
+		rxq->skb_alloc_err++;
+		return -ENOMEM;
+	}
+	page_pool_release_page(rxq->page_pool, page);
+
+	skb_reserve(rxq->skb,
+		    xdp->data - xdp->data_hard_start);
+	skb_put(rxq->skb, xdp->data_end - xdp->data);
+	mvneta_rx_csum(pp, rx_desc->status, rxq->skb);
+
+	rxq->left_size = rx_desc->data_size - len;
+	rx_desc->buf_phys_addr = 0;
+
+	return 0;
+}
+
+static void
+mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
+			    struct mvneta_rx_desc *rx_desc,
+			    struct mvneta_rx_queue *rxq,
+			    struct page *page)
+{
+	struct net_device *dev = pp->dev;
+	enum dma_data_direction dma_dir;
+	int data_len, len;
+
+	if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) {
+		len = MVNETA_MAX_RX_BUF_SIZE;
+		data_len = len;
+	} else {
+		len = rxq->left_size;
+		data_len = len - ETH_FCS_LEN;
+	}
+	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
+	dma_sync_single_for_cpu(dev->dev.parent,
+				rx_desc->buf_phys_addr,
+				len, dma_dir);
+	if (data_len > 0) {
+		/* refill descriptor with new buffer later */
+		skb_add_rx_frag(rxq->skb,
+				skb_shinfo(rxq->skb)->nr_frags,
+				page, MVNETA_SKB_HEADROOM, data_len,
+				PAGE_SIZE);
+	}
+	page_pool_release_page(rxq->page_pool, page);
+	rx_desc->buf_phys_addr = 0;
+	rxq->left_size -= len;
+}
+
 /* Main rx processing when using software buffer management */
 static int mvneta_rx_swbm(struct napi_struct *napi,
 			  struct mvneta_port *pp, int budget,
 			  struct mvneta_rx_queue *rxq)
 {
+	int rcvd_pkts = 0, rcvd_bytes = 0, rx_proc = 0;
 	struct net_device *dev = pp->dev;
-	int rx_todo, rx_proc;
-	int refill = 0;
-	u32 rcvd_pkts = 0;
-	u32 rcvd_bytes = 0;
+	struct bpf_prog *xdp_prog;
+	struct xdp_buff xdp_buf;
+	int rx_todo, refill;
+	u32 xdp_ret = 0;
 
 	/* Get number of received packets */
 	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
-	rx_proc = 0;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(pp->xdp_prog);
+	xdp_buf.rxq = &rxq->xdp_rxq;
 
 	/* Fairness NAPI loop */
-	while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
+	while (rx_proc < budget && rx_proc < rx_todo) {
 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
-		unsigned char *data;
-		struct page *page;
-		dma_addr_t phys_addr;
 		u32 rx_status, index;
-		int rx_bytes, skb_size, copy_size;
-		int frag_num, frag_size, frag_offset;
+		struct page *page;
 
 		index = rx_desc - rxq->descs;
 		page = (struct page *)rxq->buf_virt_addr[index];
-		data = page_address(page);
-		/* Prefetch header */
-		prefetch(data);
 
-		phys_addr = rx_desc->buf_phys_addr;
 		rx_status = rx_desc->status;
 		rx_proc++;
 		rxq->refill_num++;
 
 		if (rx_status & MVNETA_RXD_FIRST_DESC) {
+			int err;
+
 			/* Check errors only for FIRST descriptor */
 			if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
 				mvneta_rx_error(pp, rx_desc);
@@ -1969,85 +2269,18 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 				/* leave the descriptor untouched */
 				continue;
 			}
-			rx_bytes = rx_desc->data_size -
-				   (ETH_FCS_LEN + MVNETA_MH_SIZE);
 
-			/* Allocate small skb for each new packet */
-			skb_size = max(rx_copybreak, rx_header_size);
-			rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
-			if (unlikely(!rxq->skb)) {
-				netdev_err(dev,
-					   "Can't allocate skb on queue %d\n",
-					   rxq->id);
-				dev->stats.rx_dropped++;
-				rxq->skb_alloc_err++;
+			err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
+						   xdp_prog, page, &xdp_ret);
+			if (err)
 				continue;
-			}
-			copy_size = min(skb_size, rx_bytes);
-
-			/* Copy data from buffer to SKB, skip Marvell header */
-			memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
-			       copy_size);
-			skb_put(rxq->skb, copy_size);
-			rxq->left_size = rx_bytes - copy_size;
-
-			mvneta_rx_csum(pp, rx_status, rxq->skb);
-			if (rxq->left_size == 0) {
-				int size = copy_size + MVNETA_MH_SIZE;
-
-				dma_sync_single_range_for_cpu(dev->dev.parent,
-							      phys_addr, 0,
-							      size,
-							      DMA_FROM_DEVICE);
-
-				/* leave the descriptor and buffer untouched */
-			} else {
-				/* refill descriptor with new buffer later */
-				rx_desc->buf_phys_addr = 0;
-
-				frag_num = 0;
-				frag_offset = copy_size + MVNETA_MH_SIZE;
-				frag_size = min(rxq->left_size,
-						(int)(PAGE_SIZE - frag_offset));
-				skb_add_rx_frag(rxq->skb, frag_num, page,
-						frag_offset, frag_size,
-						PAGE_SIZE);
-				dma_unmap_page(dev->dev.parent, phys_addr,
-					       PAGE_SIZE, DMA_FROM_DEVICE);
-				rxq->left_size -= frag_size;
-			}
 		} else {
-			/* Middle or Last descriptor */
 			if (unlikely(!rxq->skb)) {
 				pr_debug("no skb for rx_status 0x%x\n",
 					 rx_status);
 				continue;
 			}
-			if (!rxq->left_size) {
-				/* last descriptor has only FCS */
-				/* and can be discarded */
-				dma_sync_single_range_for_cpu(dev->dev.parent,
-							      phys_addr, 0,
-							      ETH_FCS_LEN,
-							      DMA_FROM_DEVICE);
-				/* leave the descriptor and buffer untouched */
-			} else {
-				/* refill descriptor with new buffer later */
-				rx_desc->buf_phys_addr = 0;
-
-				frag_num = skb_shinfo(rxq->skb)->nr_frags;
-				frag_offset = 0;
-				frag_size = min(rxq->left_size,
-						(int)(PAGE_SIZE - frag_offset));
-				skb_add_rx_frag(rxq->skb, frag_num, page,
-						frag_offset, frag_size,
-						PAGE_SIZE);
-
-				dma_unmap_page(dev->dev.parent, phys_addr,
-					       PAGE_SIZE, DMA_FROM_DEVICE);
-
-				rxq->left_size -= frag_size;
-			}
+			mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page);
 		} /* Middle or Last descriptor */
 
 		if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2072,17 +2305,14 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 
 		/* clean uncomplete skb pointer in queue */
 		rxq->skb = NULL;
-		rxq->left_size = 0;
 	}
+	rcu_read_unlock();
 
-	if (rcvd_pkts) {
-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+	if (xdp_ret & MVNETA_XDP_REDIR)
+		xdp_do_flush_map();
 
-		u64_stats_update_begin(&stats->syncp);
-		stats->rx_packets += rcvd_pkts;
-		stats->rx_bytes   += rcvd_bytes;
-		u64_stats_update_end(&stats->syncp);
-	}
+	if (rcvd_pkts)
+		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
 
 	/* return some buffers to hardware queue, one at a time is too slow */
 	refill = mvneta_rx_refill_queue(pp, rxq);
@@ -2206,14 +2436,8 @@ err_drop_frame:
 		napi_gro_receive(napi, skb);
 	}
 
-	if (rcvd_pkts) {
-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
-
-		u64_stats_update_begin(&stats->syncp);
-		stats->rx_packets += rcvd_pkts;
-		stats->rx_bytes   += rcvd_bytes;
-		u64_stats_update_end(&stats->syncp);
-	}
+	if (rcvd_pkts)
+		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);
 
 	/* Update rxq management counters */
 	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
@@ -2225,16 +2449,19 @@ static inline void
 mvneta_tso_put_hdr(struct sk_buff *skb,
 		   struct mvneta_port *pp, struct mvneta_tx_queue *txq)
 {
-	struct mvneta_tx_desc *tx_desc;
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
+	struct mvneta_tx_desc *tx_desc;
 
-	txq->tx_skb[txq->txq_put_index] = NULL;
 	tx_desc = mvneta_txq_next_desc_get(txq);
 	tx_desc->data_size = hdr_len;
 	tx_desc->command = mvneta_skb_tx_csum(pp, skb);
 	tx_desc->command |= MVNETA_TXD_F_DESC;
 	tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
 				 txq->txq_put_index * TSO_HEADER_SIZE;
+	buf->type = MVNETA_TYPE_SKB;
+	buf->skb = NULL;
+
 	mvneta_txq_inc_put(txq);
 }
 
@@ -2243,6 +2470,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
 		    struct sk_buff *skb, char *data, int size,
 		    bool last_tcp, bool is_last)
 {
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
 	struct mvneta_tx_desc *tx_desc;
 
 	tx_desc = mvneta_txq_next_desc_get(txq);
@@ -2256,7 +2484,8 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
 	}
 
 	tx_desc->command = 0;
-	txq->tx_skb[txq->txq_put_index] = NULL;
+	buf->type = MVNETA_TYPE_SKB;
+	buf->skb = NULL;
 
 	if (last_tcp) {
 		/* last descriptor in the TCP packet */
@@ -2264,7 +2493,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
 
 		/* last descriptor in SKB */
 		if (is_last)
-			txq->tx_skb[txq->txq_put_index] = skb;
+			buf->skb = skb;
 	}
 	mvneta_txq_inc_put(txq);
 	return 0;
@@ -2349,6 +2578,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 	int i, nr_frags = skb_shinfo(skb)->nr_frags;
 
 	for (i = 0; i < nr_frags; i++) {
+		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		void *addr = skb_frag_address(frag);
 
@@ -2368,12 +2598,13 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
 		if (i == nr_frags - 1) {
 			/* Last descriptor */
 			tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
-			txq->tx_skb[txq->txq_put_index] = skb;
+			buf->skb = skb;
 		} else {
 			/* Descriptor in the middle: Not First, Not Last */
 			tx_desc->command = 0;
-			txq->tx_skb[txq->txq_put_index] = NULL;
+			buf->skb = NULL;
 		}
+		buf->type = MVNETA_TYPE_SKB;
 		mvneta_txq_inc_put(txq);
 	}
 
@@ -2401,6 +2632,7 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 	struct mvneta_port *pp = netdev_priv(dev);
 	u16 txq_id = skb_get_queue_mapping(skb);
 	struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
+	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
 	struct mvneta_tx_desc *tx_desc;
 	int len = skb->len;
 	int frags = 0;
@@ -2433,16 +2665,17 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
+	buf->type = MVNETA_TYPE_SKB;
 	if (frags == 1) {
 		/* First and Last descriptor */
 		tx_cmd |= MVNETA_TXD_FLZ_DESC;
 		tx_desc->command = tx_cmd;
-		txq->tx_skb[txq->txq_put_index] = skb;
+		buf->skb = skb;
 		mvneta_txq_inc_put(txq);
 	} else {
 		/* First but not Last */
 		tx_cmd |= MVNETA_TXD_F_DESC;
-		txq->tx_skb[txq->txq_put_index] = NULL;
+		buf->skb = NULL;
 		mvneta_txq_inc_put(txq);
 		tx_desc->command = tx_cmd;
 		/* Continue with other skb fragments */
@@ -2459,7 +2692,6 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
 
 out:
 	if (frags > 0) {
-		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
 
 		netdev_tx_sent_queue(nq, len);
@@ -2474,10 +2706,7 @@ out:
 		else
 			txq->pending += frags;
 
-		u64_stats_update_begin(&stats->syncp);
-		stats->tx_packets++;
-		stats->tx_bytes  += len;
-		u64_stats_update_end(&stats->syncp);
+		mvneta_update_stats(pp, 1, len, true);
 	} else {
 		dev->stats.tx_dropped++;
 		dev_kfree_skb_any(skb);
@@ -2830,11 +3059,55 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	return rx_done;
 }
 
+static int mvneta_create_page_pool(struct mvneta_port *pp,
+				   struct mvneta_rx_queue *rxq, int size)
+{
+	struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog);
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.flags = PP_FLAG_DMA_MAP,
+		.pool_size = size,
+		.nid = cpu_to_node(0),
+		.dev = pp->dev->dev.parent,
+		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+	};
+	int err;
+
+	rxq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rxq->page_pool)) {
+		err = PTR_ERR(rxq->page_pool);
+		rxq->page_pool = NULL;
+		return err;
+	}
+
+	err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id);
+	if (err < 0)
+		goto err_free_pp;
+
+	err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+					 rxq->page_pool);
+	if (err)
+		goto err_unregister_rxq;
+
+	return 0;
+
+err_unregister_rxq:
+	xdp_rxq_info_unreg(&rxq->xdp_rxq);
+err_free_pp:
+	page_pool_destroy(rxq->page_pool);
+	rxq->page_pool = NULL;
+	return err;
+}
+
 /* Handle rxq fill: allocates rxq skbs; called when initializing a port */
 static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 			   int num)
 {
-	int i;
+	int i, err;
+
+	err = mvneta_create_page_pool(pp, rxq, num);
+	if (err < 0)
+		return err;
 
 	for (i = 0; i < num; i++) {
 		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
@@ -2908,7 +3181,7 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
 		/* Set Offset */
 		mvneta_rxq_offset_set(pp, rxq, 0);
 		mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
-					PAGE_SIZE :
+					MVNETA_MAX_RX_BUF_SIZE :
 					MVNETA_RX_BUF_SIZE(pp->pkt_size));
 		mvneta_rxq_bm_disable(pp, rxq);
 		mvneta_rxq_fill(pp, rxq, rxq->size);
@@ -2989,9 +3262,8 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
 
 	txq->last_desc = txq->size - 1;
 
-	txq->tx_skb = kmalloc_array(txq->size, sizeof(*txq->tx_skb),
-				    GFP_KERNEL);
-	if (!txq->tx_skb) {
+	txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
+	if (!txq->buf) {
 		dma_free_coherent(pp->dev->dev.parent,
 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
 				  txq->descs, txq->descs_phys);
@@ -3003,7 +3275,7 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
 					   txq->size * TSO_HEADER_SIZE,
 					   &txq->tso_hdrs_phys, GFP_KERNEL);
 	if (!txq->tso_hdrs) {
-		kfree(txq->tx_skb);
+		kfree(txq->buf);
 		dma_free_coherent(pp->dev->dev.parent,
 				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
 				  txq->descs, txq->descs_phys);
@@ -3056,7 +3328,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
 {
 	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 
-	kfree(txq->tx_skb);
+	kfree(txq->buf);
 
 	if (txq->tso_hdrs)
 		dma_free_coherent(pp->dev->dev.parent,
@@ -3263,6 +3535,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
 		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
 	}
 
+	if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
+		netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
+		return -EINVAL;
+	}
+
 	dev->mtu = mtu;
 
 	if (!netif_running(dev)) {
@@ -3932,6 +4209,47 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return phylink_mii_ioctl(pp->phylink, ifr, cmd);
 }
 
+static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
+			    struct netlink_ext_ack *extack)
+{
+	bool need_update, running = netif_running(dev);
+	struct mvneta_port *pp = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+		return -EOPNOTSUPP;
+	}
+
+	need_update = !!pp->xdp_prog != !!prog;
+	if (running && need_update)
+		mvneta_stop(dev);
+
+	old_prog = xchg(&pp->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (running && need_update)
+		return mvneta_open(dev);
+
+	return 0;
+}
+
+static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 /* Ethtool methods */
 
 /* Set link ksettings (phy address, speed) for ethtools */
@@ -4328,6 +4646,8 @@ static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_fix_features    = mvneta_fix_features,
 	.ndo_get_stats64     = mvneta_get_stats64,
 	.ndo_do_ioctl        = mvneta_ioctl,
+	.ndo_bpf	     = mvneta_xdp,
+	.ndo_xdp_xmit        = mvneta_xdp_xmit,
 };
 
 static const struct ethtool_ops mvneta_eth_tool_ops = {
@@ -4477,9 +4797,9 @@ static int mvneta_probe(struct platform_device *pdev)
 	struct phy *comphy;
 	const char *dt_mac_addr;
 	char hw_mac_addr[ETH_ALEN];
+	phy_interface_t phy_mode;
 	const char *mac_from;
 	int tx_csum_limit;
-	int phy_mode;
 	int err;
 	int cpu;
 
@@ -4492,10 +4812,9 @@ static int mvneta_probe(struct platform_device *pdev)
 	if (dev->irq == 0)
 		return -EINVAL;
 
-	phy_mode = of_get_phy_mode(dn);
-	if (phy_mode < 0) {
+	err = of_get_phy_mode(dn, &phy_mode);
+	if (err) {
 		dev_err(&pdev->dev, "incorrect phy-mode\n");
-		err = -EINVAL;
 		goto err_free_irq;
 	}
 
@@ -4618,7 +4937,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	pp->id = global_port_id++;
-	pp->rx_offset_correction = 0; /* not relevant for SW BM */
+	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
 
 	/* Obtain access to BM resources if enabled and already initialized */
 	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 111b3b8239e1..17e24c1e1c2b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -2863,7 +2863,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 	skb->ip_summed = CHECKSUM_NONE;
 }
 
-/* Reuse skb if possible, or allocate a new skb and add it to BM pool */
+/* Allocate a new skb and add it to BM pool */
 static int mvpp2_rx_refill(struct mvpp2_port *port,
 			   struct mvpp2_bm_pool *bm_pool, int pool)
 {
@@ -2871,7 +2871,6 @@ static int mvpp2_rx_refill(struct mvpp2_port *port,
 	phys_addr_t phys_addr;
 	void *buf;
 
-	/* No recycle or too many buffers are in use, so allocate a new skb */
 	buf = mvpp2_buf_alloc(port, bm_pool, &dma_addr, &phys_addr,
 			      GFP_ATOMIC);
 	if (!buf)
@@ -2957,14 +2956,13 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		 * by the hardware, and the information about the buffer is
 		 * comprised by the RX descriptor.
 		 */
-		if (rx_status & MVPP2_RXD_ERR_SUMMARY) {
-err_drop_frame:
-			dev->stats.rx_errors++;
-			mvpp2_rx_error(port, rx_desc);
-			/* Return the buffer to the pool */
-			mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
-			continue;
-		}
+		if (rx_status & MVPP2_RXD_ERR_SUMMARY)
+			goto err_drop_frame;
+
+		dma_sync_single_for_cpu(dev->dev.parent, dma_addr,
+					rx_bytes + MVPP2_MH_SIZE,
+					DMA_FROM_DEVICE);
+		prefetch(data);
 
 		if (bm_pool->frag_size > PAGE_SIZE)
 			frag_size = 0;
@@ -2983,8 +2981,9 @@ err_drop_frame:
 			goto err_drop_frame;
 		}
 
-		dma_unmap_single(dev->dev.parent, dma_addr,
-				 bm_pool->buf_size, DMA_FROM_DEVICE);
+		dma_unmap_single_attrs(dev->dev.parent, dma_addr,
+				       bm_pool->buf_size, DMA_FROM_DEVICE,
+				       DMA_ATTR_SKIP_CPU_SYNC);
 
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
@@ -2995,6 +2994,13 @@ err_drop_frame:
 		mvpp2_rx_csum(port, rx_status, skb);
 
 		napi_gro_receive(napi, skb);
+		continue;
+
+err_drop_frame:
+		dev->stats.rx_errors++;
+		mvpp2_rx_error(port, rx_desc);
+		/* Return the buffer to the pool */
+		mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 	}
 
 	if (rcvd_pkts) {
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 51b77c2de400..3fb7ee3d4d13 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1489,8 +1489,10 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 			goto err_netdev;
 		}
 		of_property_read_u32(np, "reg", &pep->phy_addr);
-		pep->phy_intf = of_get_phy_mode(pdev->dev.of_node);
 		of_node_put(np);
+		err = of_get_phy_mode(pdev->dev.of_node, &pep->phy_intf);
+		if (err && err != -ENODEV)
+			goto err_netdev;
 	}
 
 	/* Hardware supports only 3 ports */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
index ef11cf3d1ccc..0fe97155dd8f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_path.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c
@@ -57,7 +57,7 @@ static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
 	default:
 		updated = false;
 		break;
-	};
+	}
 
 	if (updated) {
 		val = mtk_r32(eth, MTK_MAC_MISC);
@@ -143,7 +143,7 @@ static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
 	default:
 		updated = false;
 		break;
-	};
+	}
 
 	if (updated)
 		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
@@ -174,7 +174,7 @@ static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
 		break;
 	default:
 		updated = false;
-	};
+	}
 
 	if (updated)
 		regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 703adb96429e..385a4ab9ec99 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2758,9 +2758,10 @@ static const struct net_device_ops mtk_netdev_ops = {
 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 {
 	const __be32 *_id = of_get_property(np, "reg", NULL);
+	phy_interface_t phy_mode;
 	struct phylink *phylink;
-	int phy_mode, id, err;
 	struct mtk_mac *mac;
+	int id, err;
 
 	if (!_id) {
 		dev_err(eth->dev, "missing mac id\n");
@@ -2805,10 +2806,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	/* phylink create */
-	phy_mode = of_get_phy_mode(np);
-	if (phy_mode < 0) {
+	err = of_get_phy_mode(np, &phy_mode);
+	if (err) {
 		dev_err(eth->dev, "incorrect phy-mode\n");
-		err = -EINVAL;
 		goto free_netdev;
 	}
 
diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
index 4db27dfc7ec1..32d83421226a 100644
--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -93,7 +93,7 @@ int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id,
 	case SPEED_1000:
 		val |= SGMII_SPEED_1000;
 		break;
-	};
+	}
 
 	if (state->duplex == DUPLEX_FULL)
 		val |= SGMII_DUPLEX_FULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 69bb6bb06e76..5716c3d2bb86 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3934,13 +3934,17 @@ static void mlx4_restart_one_down(struct pci_dev *pdev);
 static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
 			       struct devlink *devlink);
 
-static int mlx4_devlink_reload_down(struct devlink *devlink,
+static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_dev_persistent *persist = dev->persist;
 
+	if (netns_change) {
+		NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
+		return -EOPNOTSUPP;
+	}
 	if (persist->num_vfs)
 		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
 	mlx4_restart_one_down(persist->pdev);
@@ -4010,6 +4014,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_params_unregister;
 
 	devlink_params_publish(devlink);
+	devlink_reload_enable(devlink);
 	pci_save_state(pdev);
 	return 0;
 
@@ -4121,6 +4126,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct devlink *devlink = priv_to_devlink(priv);
 	int active_vfs = 0;
 
+	devlink_reload_disable(devlink);
+
 	if (mlx4_is_slave(dev))
 		persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5708fcc079ca..a6f390fdb971 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -70,7 +70,7 @@ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/t
 
 mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
 					steering/dr_matcher.o steering/dr_rule.o \
-					steering/dr_icm_pool.o steering/dr_crc32.o \
+					steering/dr_icm_pool.o \
 					steering/dr_ste.o steering/dr_send.o \
 					steering/dr_cmd.o steering/dr_fw.o \
 					steering/dr_action.o steering/fs_dr.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index ea934cd02448..34cba97f7bf4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -866,7 +866,7 @@ static void cmd_work_handler(struct work_struct *work)
 	if (!ent->page_queue) {
 		alloc_ret = alloc_ent(cmd);
 		if (alloc_ret < 0) {
-			mlx5_core_err(dev, "failed to allocate command entry\n");
+			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
 			if (ent->callback) {
 				ent->callback(-EAGAIN, ent->context);
 				mlx5_free_cmd_msg(dev, ent->out);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 633b117eb13e..7b672ada63a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -175,7 +175,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
  *	@port_buffer: <output> port receive buffer configuration
  *	@change: <output>
  *
- *	Update buffer configuration based on pfc configuraiton and
+ *	Update buffer configuration based on pfc configuration and
  *	priority to buffer mapping.
  *	Buffer's lossy bit is changed to:
  *		lossless if there is at least one PFC enabled priority
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index b860569d4247..6c72b592315b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -222,7 +222,8 @@ static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
-				     void *context)
+				     void *context,
+				     struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_err_ctx *err_ctx = context;
@@ -301,7 +302,8 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 }
 
 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_fmsg *fmsg)
+				      struct devlink_fmsg *fmsg,
+				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_params *params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index bfed558637c2..b468549e96ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -135,7 +135,8 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 }
 
 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
-				     void *context)
+				     void *context,
+				     struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_err_ctx *err_ctx = context;
@@ -205,7 +206,8 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
 }
 
 static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_fmsg *fmsg)
+				      struct devlink_fmsg *fmsg,
+				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fda0b37075e8..bb970b2ebf8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2241,13 +2241,14 @@ out_err:
 
 struct mlx5_fields {
 	u8  field;
-	u8  size;
+	u8  field_bsize;
+	u32 field_mask;
 	u32 offset;
 	u32 match_offset;
 };
 
-#define OFFLOAD(fw_field, size, field, off, match_field) \
-		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
+#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
+		{MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
 		 offsetof(struct pedit_headers, field) + (off), \
 		 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
 
@@ -2265,18 +2266,18 @@ struct mlx5_fields {
 })
 
 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
-			 void *matchmaskp, int size)
+			 void *matchmaskp, u8 bsize)
 {
 	bool same = false;
 
-	switch (size) {
-	case sizeof(u8):
+	switch (bsize) {
+	case 8:
 		same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
 		break;
-	case sizeof(u16):
+	case 16:
 		same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
 		break;
-	case sizeof(u32):
+	case 32:
 		same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
 		break;
 	}
@@ -2285,41 +2286,43 @@ static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
 }
 
 static struct mlx5_fields fields[] = {
-	OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
-	OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0, dmac_15_0),
-	OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
-	OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0, smac_15_0),
-	OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0, ethertype),
-	OFFLOAD(FIRST_VID,  2, vlan.h_vlan_TCI, 0, first_vid),
-
-	OFFLOAD(IP_TTL, 1, ip4.ttl,   0, ttl_hoplimit),
-	OFFLOAD(SIPV4,  4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
-	OFFLOAD(DIPV4,  4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-
-	OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
+	OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
+	OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
+	OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
+	OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
+	OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
+	OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
+
+	OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
+	OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
+	OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+	OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+
+	OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
 		src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
-	OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0,
+	OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
 		src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
-	OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0,
+	OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
 		src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
-	OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0,
+	OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
 		src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
-	OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
+	OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
-	OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0,
+	OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
-	OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0,
+	OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
-	OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0,
+	OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
 		dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
-	OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
+	OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
 
-	OFFLOAD(TCP_SPORT, 2, tcp.source,  0, tcp_sport),
-	OFFLOAD(TCP_DPORT, 2, tcp.dest,    0, tcp_dport),
-	OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
+	OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
+	OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
+	/* in linux iphdr tcp_flags is 8 bits long */
+	OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
 
-	OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
-	OFFLOAD(UDP_DPORT, 2, udp.dest,   0, udp_dport),
+	OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
+	OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
 };
 
 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
@@ -2332,19 +2335,17 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
 				struct netlink_ext_ack *extack)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-	void *headers_c = get_match_headers_criteria(*action_flags,
-						     &parse_attr->spec);
-	void *headers_v = get_match_headers_value(*action_flags,
-						  &parse_attr->spec);
 	int i, action_size, nactions, max_actions, first, last, next_z;
-	void *s_masks_p, *a_masks_p, *vals_p;
+	void *headers_c, *headers_v, *action, *vals_p;
+	u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
 	struct mlx5_fields *f;
-	u8 cmd, field_bsize;
-	u32 s_mask, a_mask;
 	unsigned long mask;
 	__be32 mask_be32;
 	__be16 mask_be16;
-	void *action;
+	u8 cmd;
+
+	headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
+	headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
 
 	set_masks = &hdrs[0].masks;
 	add_masks = &hdrs[1].masks;
@@ -2369,8 +2370,8 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
 		s_masks_p = (void *)set_masks + f->offset;
 		a_masks_p = (void *)add_masks + f->offset;
 
-		memcpy(&s_mask, s_masks_p, f->size);
-		memcpy(&a_mask, a_masks_p, f->size);
+		s_mask = *s_masks_p & f->field_mask;
+		a_mask = *a_masks_p & f->field_mask;
 
 		if (!s_mask && !a_mask) /* nothing to offload here */
 			continue;
@@ -2399,38 +2400,34 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
 			vals_p = (void *)set_vals + f->offset;
 			/* don't rewrite if we have a match on the same value */
 			if (cmp_val_mask(vals_p, s_masks_p, match_val,
-					 match_mask, f->size))
+					 match_mask, f->field_bsize))
 				skip = true;
 			/* clear to denote we consumed this field */
-			memset(s_masks_p, 0, f->size);
+			*s_masks_p &= ~f->field_mask;
 		} else {
-			u32 zero = 0;
-
 			cmd  = MLX5_ACTION_TYPE_ADD;
 			mask = a_mask;
 			vals_p = (void *)add_vals + f->offset;
 			/* add 0 is no change */
-			if (!memcmp(vals_p, &zero, f->size))
+			if ((*(u32 *)vals_p & f->field_mask) == 0)
 				skip = true;
 			/* clear to denote we consumed this field */
-			memset(a_masks_p, 0, f->size);
+			*a_masks_p &= ~f->field_mask;
 		}
 		if (skip)
 			continue;
 
-		field_bsize = f->size * BITS_PER_BYTE;
-
-		if (field_bsize == 32) {
+		if (f->field_bsize == 32) {
 			mask_be32 = *(__be32 *)&mask;
 			mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
-		} else if (field_bsize == 16) {
+		} else if (f->field_bsize == 16) {
 			mask_be16 = *(__be16 *)&mask;
 			mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
 		}
 
-		first = find_first_bit(&mask, field_bsize);
-		next_z = find_next_zero_bit(&mask, field_bsize, first);
-		last  = find_last_bit(&mask, field_bsize);
+		first = find_first_bit(&mask, f->field_bsize);
+		next_z = find_next_zero_bit(&mask, f->field_bsize, first);
+		last  = find_last_bit(&mask, f->field_bsize);
 		if (first < next_z && next_z < last) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "rewrite of few sub-fields isn't supported");
@@ -2443,16 +2440,22 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
 		MLX5_SET(set_action_in, action, field, f->field);
 
 		if (cmd == MLX5_ACTION_TYPE_SET) {
-			MLX5_SET(set_action_in, action, offset, first);
+			int start;
+
+			/* if field is bit sized it can start not from first bit */
+			start = find_first_bit((unsigned long *)&f->field_mask,
+					       f->field_bsize);
+
+			MLX5_SET(set_action_in, action, offset, first - start);
 			/* length is num of bits to be written, zero means length of 32 */
 			MLX5_SET(set_action_in, action, length, (last - first + 1));
 		}
 
-		if (field_bsize == 32)
+		if (f->field_bsize == 32)
 			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
-		else if (field_bsize == 16)
+		else if (f->field_bsize == 16)
 			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
-		else if (field_bsize == 8)
+		else if (f->field_bsize == 8)
 			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
 
 		action += action_size;
@@ -3443,6 +3446,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
+	if (!(attr->action &
+	      (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+		NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action");
+		return -EOPNOTSUPP;
+	}
+
 	if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "current firmware doesn't support split rule for port mirroring");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 67dc4f0921b6..66951ff975f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -461,8 +461,14 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
 			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
 					      &sq->state)) {
+				struct mlx5e_tx_wqe_info *wi;
+				u16 ci;
+
+				ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+				wi = &sq->db.wqe_info[ci];
 				mlx5e_dump_error_cqe(sq,
 						     (struct mlx5_err_cqe *)cqe);
+				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
 				queue_work(cq->channel->priv->wq,
 					   &sq->recover_work);
 			}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 30aae76b6a1d..89a2806eceb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1831,6 +1831,15 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
 	flush_workqueue(esw->work_queue);
 }
 
+static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int i;
+
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+		memset(&vport->info, 0, sizeof(vport->info));
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
@@ -1923,7 +1932,7 @@ abort:
 	return err;
 }
 
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 {
 	int old_mode;
 
@@ -1952,6 +1961,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 	}
+	if (clear_vf)
+		mlx5_eswitch_clear_vf_vports_info(esw);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 6bd6f5895244..804a7ed2b969 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -270,7 +270,7 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       u16 vport, u8 mac[ETH_ALEN]);
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -603,7 +603,7 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
 static inline int  mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
-static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
 static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9004a07e457a..6d781c2ed103 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1369,7 +1369,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 		return -EINVAL;
 	}
 
-	mlx5_eswitch_disable(esw);
+	mlx5_eswitch_disable(esw, false);
 	mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
 	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
 	if (err) {
@@ -2195,7 +2195,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 {
 	int err, err1;
 
-	mlx5_eswitch_disable(esw);
+	mlx5_eswitch_disable(esw, false);
 	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index eb8b0fe0b4e1..11621d265d7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -35,11 +35,11 @@
 
 #include <linux/mlx5/driver.h>
 
-enum mlx5_fpga_device_id {
-	MLX5_FPGA_DEVICE_UNKNOWN = 0,
-	MLX5_FPGA_DEVICE_KU040 = 1,
-	MLX5_FPGA_DEVICE_KU060 = 2,
-	MLX5_FPGA_DEVICE_KU060_2 = 3,
+enum mlx5_fpga_id {
+	MLX5_FPGA_NEWTON = 0,
+	MLX5_FPGA_EDISON = 1,
+	MLX5_FPGA_MORSE = 2,
+	MLX5_FPGA_MORSEQ = 3,
 };
 
 enum mlx5_fpga_image {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index d046d1ec2a86..2ce4241459ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -81,19 +81,28 @@ static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
 	}
 }
 
-static const char *mlx5_fpga_device_name(u32 device)
+static const char *mlx5_fpga_name(u32 fpga_id)
 {
-	switch (device) {
-	case MLX5_FPGA_DEVICE_KU040:
-		return "ku040";
-	case MLX5_FPGA_DEVICE_KU060:
-		return "ku060";
-	case MLX5_FPGA_DEVICE_KU060_2:
-		return "ku060_2";
-	case MLX5_FPGA_DEVICE_UNKNOWN:
-	default:
-		return "unknown";
+	static char ret[32];
+
+	switch (fpga_id) {
+	case MLX5_FPGA_NEWTON:
+		return "Newton";
+	case MLX5_FPGA_EDISON:
+		return "Edison";
+	case MLX5_FPGA_MORSE:
+		return "Morse";
+	case MLX5_FPGA_MORSEQ:
+		return "MorseQ";
 	}
+
+	snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
+	return ret;
+}
+
+static int mlx5_is_fpga_lookaside(u32 fpga_id)
+{
+	return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
 }
 
 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
@@ -110,8 +119,12 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 	fdev->last_admin_image = query.admin_image;
 	fdev->last_oper_image = query.oper_image;
 
-	mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
-		      query.status, query.admin_image, query.oper_image);
+	mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
+		       query.status, query.admin_image, query.oper_image);
+
+	/* for FPGA lookaside projects FPGA load status is not important */
+	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+		return 0;
 
 	if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
 		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
@@ -167,25 +180,30 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned int max_num_qps;
 	unsigned long flags;
-	u32 fpga_device_id;
+	u32 fpga_id;
 	int err;
 
 	if (!fdev)
 		return 0;
 
-	err = mlx5_fpga_device_load_check(fdev);
+	err = mlx5_fpga_caps(fdev->mdev);
 	if (err)
 		goto out;
 
-	err = mlx5_fpga_caps(fdev->mdev);
+	err = mlx5_fpga_device_load_check(fdev);
 	if (err)
 		goto out;
 
-	fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
-	mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
-		       mlx5_fpga_device_name(fpga_device_id),
-		       fpga_device_id,
+	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
+	mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
+
+	/* No QPs if FPGA does not participate in net processing */
+	if (mlx5_is_fpga_lookaside(fpga_id))
+		goto out;
+
+	mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
 		       mlx5_fpga_image_name(fdev->last_oper_image),
+		       fdev->last_oper_image,
 		       MLX5_CAP_FPGA(fdev->mdev, image_version),
 		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
@@ -264,6 +282,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 	if (!fdev)
 		return;
 
+	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
+		return;
+
 	spin_lock_irqsave(&fdev->state_lock, flags);
 	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
 		spin_unlock_irqrestore(&fdev->state_lock, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 3bbb49354829..0246f5cdd355 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -531,9 +531,16 @@ static void del_hw_fte(struct fs_node *node)
 	}
 }
 
+static void del_sw_fte_rcu(struct rcu_head *head)
+{
+	struct fs_fte *fte = container_of(head, struct fs_fte, rcu);
+	struct mlx5_flow_steering *steering = get_steering(&fte->node);
+
+	kmem_cache_free(steering->ftes_cache, fte);
+}
+
 static void del_sw_fte(struct fs_node *node)
 {
-	struct mlx5_flow_steering *steering = get_steering(node);
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
 	int err;
@@ -546,7 +553,8 @@ static void del_sw_fte(struct fs_node *node)
 				     rhash_fte);
 	WARN_ON(err);
 	ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
-	kmem_cache_free(steering->ftes_cache, fte);
+
+	call_rcu(&fte->rcu, del_sw_fte_rcu);
 }
 
 static void del_hw_flow_group(struct fs_node *node)
@@ -1623,22 +1631,47 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
 }
 
 static struct fs_fte *
-lookup_fte_locked(struct mlx5_flow_group *g,
-		  const u32 *match_value,
-		  bool take_write)
+lookup_fte_for_write_locked(struct mlx5_flow_group *g, const u32 *match_value)
 {
 	struct fs_fte *fte_tmp;
 
-	if (take_write)
-		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
-	else
-		nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
-	fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
-					 rhash_fte);
+	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+	fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, rhash_fte);
+	if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+		fte_tmp = NULL;
+		goto out;
+	}
+
+	if (!fte_tmp->node.active) {
+		tree_put_node(&fte_tmp->node, false);
+		fte_tmp = NULL;
+		goto out;
+	}
+	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
+out:
+	up_write_ref_node(&g->node, false);
+	return fte_tmp;
+}
+
+static struct fs_fte *
+lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value)
+{
+	struct fs_fte *fte_tmp;
+
+	if (!tree_get_node(&g->node))
+		return NULL;
+
+	rcu_read_lock();
+	fte_tmp = rhashtable_lookup(&g->ftes_hash, match_value, rhash_fte);
 	if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+		rcu_read_unlock();
 		fte_tmp = NULL;
 		goto out;
 	}
+	rcu_read_unlock();
+
 	if (!fte_tmp->node.active) {
 		tree_put_node(&fte_tmp->node, false);
 		fte_tmp = NULL;
@@ -1646,14 +1679,21 @@ lookup_fte_locked(struct mlx5_flow_group *g,
 	}
 
 	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
 out:
-	if (take_write)
-		up_write_ref_node(&g->node, false);
-	else
-		up_read_ref_node(&g->node);
+	tree_put_node(&g->node, false);
 	return fte_tmp;
 }
 
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g, const u32 *match_value, bool write)
+{
+	if (write)
+		return lookup_fte_for_write_locked(g, match_value);
+	else
+		return lookup_fte_for_read_locked(g, match_value);
+}
+
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
@@ -1814,6 +1854,13 @@ search_again_locked:
 		return rule;
 	}
 
+	fte = alloc_fte(ft, spec, flow_act);
+	if (IS_ERR(fte)) {
+		up_write_ref_node(&ft->node, false);
+		err = PTR_ERR(fte);
+		goto err_alloc_fte;
+	}
+
 	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
 	up_write_ref_node(&ft->node, false);
 
@@ -1821,17 +1868,9 @@ search_again_locked:
 	if (err)
 		goto err_release_fg;
 
-	fte = alloc_fte(ft, spec, flow_act);
-	if (IS_ERR(fte)) {
-		err = PTR_ERR(fte);
-		goto err_release_fg;
-	}
-
 	err = insert_fte(g, fte);
-	if (err) {
-		kmem_cache_free(steering->ftes_cache, fte);
+	if (err)
 		goto err_release_fg;
-	}
 
 	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
 	up_write_ref_node(&g->node, false);
@@ -1843,6 +1882,8 @@ search_again_locked:
 
 err_release_fg:
 	up_write_ref_node(&g->node, false);
+	kmem_cache_free(steering->ftes_cache, fte);
+err_alloc_fte:
 	tree_put_node(&g->node, false);
 	return ERR_PTR(err);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 00717eba2256..f278298b0f6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -202,6 +202,7 @@ struct fs_fte {
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 	struct rhash_head		hash;
+	struct rcu_head	rcu;
 	int				modify_mask;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index c07f3154437c..e718170a80c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -390,7 +390,8 @@ static void print_health_info(struct mlx5_core_dev *dev)
 
 static int
 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
-			  struct devlink_fmsg *fmsg)
+			  struct devlink_fmsg *fmsg,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	struct mlx5_core_health *health = &dev->priv.health;
@@ -491,7 +492,8 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 
 static int
 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
-		      struct devlink_fmsg *fmsg, void *priv_ctx)
+		      struct devlink_fmsg *fmsg, void *priv_ctx,
+		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	int err;
@@ -545,7 +547,8 @@ static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 
 static int
 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
-			       void *priv_ctx)
+			       void *priv_ctx,
+			       struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 
@@ -555,7 +558,8 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
 #define MLX5_CR_DUMP_CHUNK_SIZE 256
 static int
 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
-			    struct devlink_fmsg *fmsg, void *priv_ctx)
+			    struct devlink_fmsg *fmsg, void *priv_ctx,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
 	u32 crdump_size = dev->priv.health.crdump_size;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index c5ef2ff26465..fc0d9583475d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -145,34 +145,35 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 {
 	*port1 = 1;
 	*port2 = 2;
-	if (!tracker->netdev_state[0].tx_enabled ||
-	    !tracker->netdev_state[0].link_up) {
+	if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
+	    !tracker->netdev_state[MLX5_LAG_P1].link_up) {
 		*port1 = 2;
 		return;
 	}
 
-	if (!tracker->netdev_state[1].tx_enabled ||
-	    !tracker->netdev_state[1].link_up)
+	if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
+	    !tracker->netdev_state[MLX5_LAG_P2].link_up)
 		*port2 = 1;
 }
 
 void mlx5_modify_lag(struct mlx5_lag *ldev,
 		     struct lag_tracker *tracker)
 {
-	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	u8 v2p_port1, v2p_port2;
 	int err;
 
 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
 				       &v2p_port2);
 
-	if (v2p_port1 != ldev->v2p_map[0] ||
-	    v2p_port2 != ldev->v2p_map[1]) {
-		ldev->v2p_map[0] = v2p_port1;
-		ldev->v2p_map[1] = v2p_port2;
+	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
+	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
+		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
+		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
 
 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
-			       ldev->v2p_map[0], ldev->v2p_map[1]);
+			       ldev->v2p_map[MLX5_LAG_P1],
+			       ldev->v2p_map[MLX5_LAG_P2]);
 
 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
 		if (err)
@@ -185,16 +186,17 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 static int mlx5_create_lag(struct mlx5_lag *ldev,
 			   struct lag_tracker *tracker)
 {
-	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	int err;
 
-	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
-				       &ldev->v2p_map[1]);
+	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
+				       &ldev->v2p_map[MLX5_LAG_P2]);
 
 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-		       ldev->v2p_map[0], ldev->v2p_map[1]);
+		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
 
-	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
+				  ldev->v2p_map[MLX5_LAG_P2]);
 	if (err)
 		mlx5_core_err(dev0,
 			      "Failed to create LAG (%d)\n",
@@ -207,7 +209,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      u8 flags)
 {
 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
-	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	int err;
 
 	err = mlx5_create_lag(ldev, tracker);
@@ -229,7 +231,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 
 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
-	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 	bool roce_lag = __mlx5_lag_is_roce(ldev);
 	int err;
 
@@ -252,14 +254,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 {
-	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
 		return false;
 
 #ifdef CONFIG_MLX5_ESWITCH
-	return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
+				   ldev->pf[MLX5_LAG_P2].dev);
 #else
-	return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
-		!mlx5_sriov_is_enabled(ldev->pf[1].dev));
+	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
+		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
 #endif
 }
 
@@ -285,8 +288,8 @@ static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
 {
-	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
-	struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 	struct lag_tracker tracker;
 	bool do_bond, roce_lag;
 	int err;
@@ -692,10 +695,11 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 		goto unlock;
 
 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
-		ndev = ldev->tracker.netdev_state[0].tx_enabled ?
-		       ldev->pf[0].netdev : ldev->pf[1].netdev;
+		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
+		       ldev->pf[MLX5_LAG_P1].netdev :
+		       ldev->pf[MLX5_LAG_P2].netdev;
 	} else {
-		ndev = ldev->pf[0].netdev;
+		ndev = ldev->pf[MLX5_LAG_P1].netdev;
 	}
 	if (ndev)
 		dev_hold(ndev);
@@ -717,7 +721,8 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		return true;
 
 	ldev = mlx5_lag_dev_get(dev);
-	if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
+	if (!ldev || !__mlx5_lag_is_roce(ldev) ||
+	    ldev->pf[MLX5_LAG_P1].dev == dev)
 		return true;
 
 	/* If bonded, we do not add an IB device for PF1. */
@@ -746,11 +751,11 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 	ldev = mlx5_lag_dev_get(dev);
 	if (ldev && __mlx5_lag_is_roce(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
-		mdev[0] = ldev->pf[0].dev;
-		mdev[1] = ldev->pf[1].dev;
+		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
+		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
 	} else {
 		num_ports = 1;
-		mdev[0] = dev;
+		mdev[MLX5_LAG_P1] = dev;
 	}
 
 	for (i = 0; i < num_ports; ++i) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 1dea0b1c9826..f1068aac6406 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -8,6 +8,11 @@
 #include "lag_mp.h"
 
 enum {
+	MLX5_LAG_P1,
+	MLX5_LAG_P2,
+};
+
+enum {
 	MLX5_LAG_FLAG_ROCE   = 1 << 0,
 	MLX5_LAG_FLAG_SRIOV  = 1 << 1,
 	MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 5d20d615663e..b70afa310ad2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -11,10 +11,11 @@
 
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
 {
-	if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
 		return false;
 
-	return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
+					 ldev->pf[MLX5_LAG_P2].dev);
 }
 
 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
@@ -43,7 +44,8 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
  *     2 - set affinity to port 2.
  *
  **/
-static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
+static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
+				       enum mlx5_lag_port_affinity port)
 {
 	struct lag_tracker tracker;
 
@@ -51,37 +53,37 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
 		return;
 
 	switch (port) {
-	case 0:
-		tracker.netdev_state[0].tx_enabled = true;
-		tracker.netdev_state[1].tx_enabled = true;
-		tracker.netdev_state[0].link_up = true;
-		tracker.netdev_state[1].link_up = true;
+	case MLX5_LAG_NORMAL_AFFINITY:
+		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
 		break;
-	case 1:
-		tracker.netdev_state[0].tx_enabled = true;
-		tracker.netdev_state[0].link_up = true;
-		tracker.netdev_state[1].tx_enabled = false;
-		tracker.netdev_state[1].link_up = false;
+	case MLX5_LAG_P1_AFFINITY:
+		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
+		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
+		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
+		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
 		break;
-	case 2:
-		tracker.netdev_state[0].tx_enabled = false;
-		tracker.netdev_state[0].link_up = false;
-		tracker.netdev_state[1].tx_enabled = true;
-		tracker.netdev_state[1].link_up = true;
+	case MLX5_LAG_P2_AFFINITY:
+		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
+		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
+		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
+		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
 		break;
 	default:
-		mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
-			       port);
+		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+			       "Invalid affinity port %d", port);
 		return;
 	}
 
-	if (tracker.netdev_state[0].tx_enabled)
-		mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
+	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
 					 MLX5_DEV_EVENT_PORT_AFFINITY,
 					 (void *)0);
 
-	if (tracker.netdev_state[1].tx_enabled)
-		mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
+	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
 					 MLX5_DEV_EVENT_PORT_AFFINITY,
 					 (void *)0);
 
@@ -141,11 +143,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 	/* Verify next hops are ports of the same hca */
 	fib_nh0 = fib_info_nh(fi, 0);
 	fib_nh1 = fib_info_nh(fi, 1);
-	if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
-	      fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
-	    !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
-	      fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
-		mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
+	if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
+	    !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
+	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
+		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+			       "Multipath offload require two ports of the same HCA\n");
 		return;
 	}
 
@@ -157,7 +160,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
 	}
 
-	mlx5_lag_set_port_affinity(ldev, 0);
+	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
 	mp->mfi = fi;
 }
 
@@ -182,7 +185,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
 		}
 	} else if (event == FIB_EVENT_NH_ADD &&
 		   fib_info_num_path(fi) == 2) {
-		mlx5_lag_set_port_affinity(ldev, 0);
+		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
 	}
 }
 
@@ -248,9 +251,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
 	struct net_device *fib_dev;
 	struct fib_info *fi;
 
-	if (!net_eq(info->net, &init_net))
-		return NOTIFY_DONE;
-
 	if (info->family != AF_INET)
 		return NOTIFY_DONE;
 
@@ -270,8 +270,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
 			return notifier_from_errno(-EINVAL);
 		}
 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
-		if (fib_dev != ldev->pf[0].netdev &&
-		    fib_dev != ldev->pf[1].netdev) {
+		if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
+		    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
 			return NOTIFY_DONE;
 		}
 		fib_work = mlx5_lag_init_fib_work(ldev, event);
@@ -311,8 +311,8 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
 		return 0;
 
 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
-	err = register_fib_notifier(&mp->fib_nb,
-				    mlx5_lag_fib_event_flush);
+	err = register_fib_notifier(&init_net, &mp->fib_nb,
+				    mlx5_lag_fib_event_flush, NULL);
 	if (err)
 		mp->fib_nb.notifier_call = NULL;
 
@@ -326,6 +326,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
 	if (!mp->fib_nb.notifier_call)
 		return;
 
-	unregister_fib_notifier(&mp->fib_nb);
+	unregister_fib_notifier(&init_net, &mp->fib_nb);
 	mp->fib_nb.notifier_call = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
index 6d14b1100be9..79be89e9c7a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -7,6 +7,12 @@
 #include "lag.h"
 #include "mlx5_core.h"
 
+enum mlx5_lag_port_affinity {
+	MLX5_LAG_NORMAL_AFFINITY,
+	MLX5_LAG_P1_AFFINITY,
+	MLX5_LAG_P2_AFFINITY,
+};
+
 struct lag_mp {
 	struct notifier_block     fib_nb;
 	struct fib_info           *mfi; /* used in tracking fib events */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e47dd7c1b909..c9a091d3226c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1228,8 +1228,6 @@ function_teardown:
 
 static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
-	int err = 0;
-
 	if (cleanup) {
 		mlx5_unregister_device(dev);
 		mlx5_drain_health_wq(dev);
@@ -1257,7 +1255,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 	mlx5_function_teardown(dev, cleanup);
 out:
 	mutex_unlock(&dev->intf_state_mutex);
-	return err;
+	return 0;
 }
 
 static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 61fcfd8b39b4..f641f1336402 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -108,7 +108,7 @@ enable_vfs_hca:
 	return 0;
 }
 
-static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev, bool clear_vf)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 	int num_vfs = pci_num_vf(dev->pdev);
@@ -127,7 +127,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	}
 
 	if (MLX5_ESWITCH_MANAGER(dev))
-		mlx5_eswitch_disable(dev->priv.eswitch);
+		mlx5_eswitch_disable(dev->priv.eswitch, clear_vf);
 
 	if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -147,7 +147,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
 	err = pci_enable_sriov(pdev, num_vfs);
 	if (err) {
 		mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
-		mlx5_device_disable_sriov(dev);
+		mlx5_device_disable_sriov(dev, true);
 	}
 	return err;
 }
@@ -157,7 +157,7 @@ static void mlx5_sriov_disable(struct pci_dev *pdev)
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 
 	pci_disable_sriov(pdev);
-	mlx5_device_disable_sriov(dev);
+	mlx5_device_disable_sriov(dev, true);
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -192,7 +192,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
 	if (!mlx5_core_is_pf(dev))
 		return;
 
-	mlx5_device_disable_sriov(dev);
+	mlx5_device_disable_sriov(dev, false);
 }
 
 static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
deleted file mode 100644
index 9e2eccbb1eb8..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2019 Mellanox Technologies. */
-
-/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved.
- * Slicing-by-16 contributed by Bulat Ziganshin
- *
- * This software is provided 'as-is', without any express or implied warranty.
- * In no event will the author be held liable for any damages arising from the
- * of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- *    claim that you wrote the original software.
- * 2. If you use this software in a product, an acknowledgment in the product
- *    documentation would be appreciated but is not required.
- * 3. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- *
- * Taken from http://create.stephan-brumme.com/crc32/ and adapted.
- */
-
-#include "dr_types.h"
-
-#define DR_STE_CRC_POLY 0xEDB88320L
-
-static u32 dr_ste_crc_tab32[8][256];
-
-static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j)
-{
-	tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff];
-}
-
-void mlx5dr_crc32_init_table(void)
-{
-	u32 crc, i, j;
-
-	for (i = 0; i < 256; i++) {
-		crc = i;
-		for (j = 0; j < 8; j++) {
-			if (crc & 0x00000001L)
-				crc = (crc >> 1) ^ DR_STE_CRC_POLY;
-			else
-				crc = crc >> 1;
-		}
-		dr_ste_crc_tab32[0][i] = crc;
-	}
-
-	/* Init CRC lookup tables according to crc_slice_8 algorithm */
-	for (i = 0; i < 256; i++) {
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i);
-		dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i);
-	}
-}
-
-/* Compute CRC32 (Slicing-by-8 algorithm) */
-u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length)
-{
-	const u32 *curr = (const u32 *)input_data;
-	const u8 *curr_char;
-	u32 crc = 0, one, two;
-
-	if (!input_data)
-		return 0;
-
-	/* Process eight bytes at once (Slicing-by-8) */
-	while (length >= 8) {
-		one = *curr++ ^ crc;
-		two = *curr++;
-
-		crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff]
-			^ dr_ste_crc_tab32[1][(two >> 16) & 0xff]
-			^ dr_ste_crc_tab32[2][(two >> 8) & 0xff]
-			^ dr_ste_crc_tab32[3][two & 0xff]
-			^ dr_ste_crc_tab32[4][(one >> 24) & 0xff]
-			^ dr_ste_crc_tab32[5][(one >> 16) & 0xff]
-			^ dr_ste_crc_tab32[6][(one >> 8) & 0xff]
-			^ dr_ste_crc_tab32[7][one & 0xff];
-
-		length -= 8;
-	}
-
-	curr_char = (const u8 *)curr;
-	/* Remaining 1 to 7 bytes (standard algorithm) */
-	while (length-- != 0)
-		crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff)
-			^ *curr_char++];
-
-	return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
-		((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 5b24732b18c0..a9da961d4d2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -326,9 +326,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
 		goto uninit_resourses;
 	}
 
-	/* Init CRC table for htbl CRC calculation */
-	mlx5dr_crc32_init_table();
-
 	return dmn;
 
 uninit_resourses:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 67dea7698fc9..5db947df8763 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -146,17 +146,15 @@ dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn)
 
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 				   struct mlx5dr_matcher_rx_tx *nic_matcher,
-				   bool ipv6)
+				   enum mlx5dr_ipv outer_ipv,
+				   enum mlx5dr_ipv inner_ipv)
 {
-	if (ipv6) {
-		nic_matcher->ste_builder = nic_matcher->ste_builder6;
-		nic_matcher->num_of_builders = nic_matcher->num_of_builders6;
-	} else {
-		nic_matcher->ste_builder = nic_matcher->ste_builder4;
-		nic_matcher->num_of_builders = nic_matcher->num_of_builders4;
-	}
+	nic_matcher->ste_builder =
+		nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
+	nic_matcher->num_of_builders =
+		nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv];
 
-	if (!nic_matcher->num_of_builders) {
+	if (!nic_matcher->ste_builder) {
 		mlx5dr_dbg(matcher->tbl->dmn,
 			   "Rule not supported on this matcher due to IP related fields\n");
 		return -EINVAL;
@@ -167,26 +165,19 @@ int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 
 static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 				       struct mlx5dr_matcher_rx_tx *nic_matcher,
-				       bool ipv6)
+				       enum mlx5dr_ipv outer_ipv,
+				       enum mlx5dr_ipv inner_ipv)
 {
 	struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
 	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
 	struct mlx5dr_match_param mask = {};
 	struct mlx5dr_match_misc3 *misc3;
 	struct mlx5dr_ste_build *sb;
-	u8 *num_of_builders;
 	bool inner, rx;
 	int idx = 0;
 	int ret, i;
 
-	if (ipv6) {
-		sb = nic_matcher->ste_builder6;
-		num_of_builders = &nic_matcher->num_of_builders6;
-	} else {
-		sb = nic_matcher->ste_builder4;
-		num_of_builders = &nic_matcher->num_of_builders4;
-	}
-
+	sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
 	rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
 
 	/* Create a temporary mask to track and clear used mask fields */
@@ -249,7 +240,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 		if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
 			mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
 
-		if (ipv6) {
+		if (outer_ipv == DR_RULE_IPV6) {
 			if (dr_mask_is_dst_addr_set(&mask.outer))
 				mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
 								 inner, rx);
@@ -325,7 +316,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 		if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
 			mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
 
-		if (ipv6) {
+		if (inner_ipv == DR_RULE_IPV6) {
 			if (dr_mask_is_dst_addr_set(&mask.inner))
 				mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
 								 inner, rx);
@@ -373,7 +364,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 		}
 	}
 
-	*num_of_builders = idx;
+	nic_matcher->ste_builder = sb;
+	nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx;
 
 	return 0;
 }
@@ -524,24 +516,33 @@ static void dr_matcher_uninit(struct mlx5dr_matcher *matcher)
 	}
 }
 
-static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
-			       struct mlx5dr_matcher_rx_tx *nic_matcher)
+static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher,
+					   struct mlx5dr_matcher_rx_tx *nic_matcher)
 {
 	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
-	int ret, ret_v4, ret_v6;
 
-	ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false);
-	ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true);
+	dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4);
+	dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6);
+	dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4);
+	dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6);
 
-	if (ret_v4 && ret_v6) {
+	if (!nic_matcher->ste_builder) {
 		mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
 		return -EINVAL;
 	}
 
-	if (!ret_v4)
-		nic_matcher->ste_builder = nic_matcher->ste_builder4;
-	else
-		nic_matcher->ste_builder = nic_matcher->ste_builder6;
+	return 0;
+}
+
+static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
+			       struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+	struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+	int ret;
+
+	ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher);
+	if (ret)
+		return ret;
 
 	nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
 						      DR_CHUNK_SIZE_1,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 5dcb8baf491a..8560460d97fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -954,12 +954,12 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
 	return 0;
 }
 
-static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param)
+static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
 {
-	return (param->outer.ip_version == 6 ||
-		param->inner.ip_version == 6 ||
-		param->outer.ethertype == ETH_P_IPV6 ||
-		param->inner.ethertype == ETH_P_IPV6);
+	if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6)
+		return DR_RULE_IPV6;
+
+	return DR_RULE_IPV4;
 }
 
 static bool dr_rule_skip(enum mlx5dr_domain_type domain,
@@ -1023,7 +1023,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 
 	ret = mlx5dr_matcher_select_builders(matcher,
 					     nic_matcher,
-					     dr_rule_is_ipv6(param));
+					     dr_rule_get_ipv(&param->outer),
+					     dr_rule_get_ipv(&param->inner));
 	if (ret)
 		goto out_err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 4efe1b0be4a8..7e9d6cfc356f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include <linux/types.h>
+#include <linux/crc32.h>
 #include "dr_types.h"
 
 #define DR_STE_CRC_POLY 0xEDB88320L
@@ -107,6 +108,13 @@ struct dr_hw_ste_format {
 	u8 mask[DR_STE_SIZE_MASK];
 };
 
+static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
+{
+	u32 crc = crc32(0, input_data, length);
+
+	return htonl(crc);
+}
+
 u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
 {
 	struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
@@ -128,7 +136,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
 		bit = bit >> 1;
 	}
 
-	crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG);
+	crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG);
 	index = crc32 & (htbl->chunk->num_of_entries - 1);
 
 	return index;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 1cb3769d4e3c..c1f45a60ee6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -106,6 +106,12 @@ enum mlx5dr_action_type {
 	DR_ACTION_TYP_MAX,
 };
 
+enum mlx5dr_ipv {
+	DR_RULE_IPV4,
+	DR_RULE_IPV6,
+	DR_RULE_IPV_MAX,
+};
+
 struct mlx5dr_icm_pool;
 struct mlx5dr_icm_chunk;
 struct mlx5dr_icm_bucket;
@@ -679,11 +685,11 @@ struct mlx5dr_matcher_rx_tx {
 	struct mlx5dr_ste_htbl *s_htbl;
 	struct mlx5dr_ste_htbl *e_anchor;
 	struct mlx5dr_ste_build *ste_builder;
-	struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES];
-	struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES];
+	struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX]
+					       [DR_RULE_IPV_MAX]
+					       [DR_RULE_MAX_STES];
 	u8 num_of_builders;
-	u8 num_of_builders4;
-	u8 num_of_builders6;
+	u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX];
 	u64 default_icm_addr;
 	struct mlx5dr_table_rx_tx *nic_tbl;
 };
@@ -812,7 +818,8 @@ mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps)
 
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 				   struct mlx5dr_matcher_rx_tx *nic_matcher,
-				   bool ipv6);
+				   enum mlx5dr_ipv outer_ipv,
+				   enum mlx5dr_ipv inner_ipv);
 
 static inline u32
 mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
@@ -962,9 +969,6 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
 			   struct mlx5dr_match_param *set_param,
 			   struct mlx5dr_match_parameters *mask);
 
-void mlx5dr_crc32_init_table(void);
-u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length);
-
 struct mlx5dr_qp {
 	struct mlx5_core_dev *mdev;
 	struct mlx5_wq_qp wq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index dd2315ce4441..f2a0e72285ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -34,26 +34,6 @@
 #include "wq.h"
 #include "mlx5_core.h"
 
-u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
-{
-	return (u32)wq->fbc.sz_m1 + 1;
-}
-
-u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
-{
-	return wq->fbc.sz_m1 + 1;
-}
-
-u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
-{
-	return wq->fbc.log_stride;
-}
-
-u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
-{
-	return (u32)wq->fbc.sz_m1 + 1;
-}
-
 static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
 {
 	return ((u32)1 << log_sz) << log_stride;
@@ -96,6 +76,24 @@ err_db_free:
 	return err;
 }
 
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
+{
+	size_t len;
+	void *wqe;
+
+	if (!net_ratelimit())
+		return;
+
+	nstrides = max_t(u8, nstrides, 1);
+
+	len = nstrides << wq->fbc.log_stride;
+	wqe = mlx5_wq_cyc_get_wqe(wq, ix);
+
+	pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %ld\n",
+		mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len);
+	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
+}
+
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 55791f71a778..d9a94bc223c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -79,7 +79,7 @@ struct mlx5_wq_ll {
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -88,16 +88,18 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
-u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl);
-u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
 
+static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
+{
+	return (u32)wq->fbc.sz_m1 + 1;
+}
+
 static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
 {
 	return wq->cur_sz == wq->sz;
@@ -168,6 +170,16 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 	return !equal && !smaller;
 }
 
+static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
+{
+	return wq->fbc.sz_m1 + 1;
+}
+
+static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq)
+{
+	return wq->fbc.log_stride;
+}
+
 static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr)
 {
 	return ctr & wq->fbc.sz_m1;
@@ -224,6 +236,11 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
 	return cqe;
 }
 
+static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
+{
+	return (u32)wq->fbc.sz_m1 + 1;
+}
+
 static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
 {
 	return wq->cur_sz == wq->fbc.sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 4421ab22182f..da436a6aad2f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -127,6 +127,16 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
 }
 EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
 
+bool
+mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
+					  const struct mlxsw_fw_rev *req_rev)
+{
+	return rev->minor > req_rev->minor ||
+	       (rev->minor == req_rev->minor &&
+		rev->subminor >= req_rev->subminor);
+}
+EXPORT_SYMBOL(mlxsw_core_fw_rev_minor_subminor_validate);
+
 struct mlxsw_rx_listener_item {
 	struct list_head list;
 	struct mlxsw_rx_listener rxl;
@@ -985,6 +995,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 
 static int
 mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
+					  bool netns_change,
 					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -1005,7 +1016,7 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
 	return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
 					      mlxsw_core->bus,
 					      mlxsw_core->bus_priv, true,
-					      devlink);
+					      devlink, extack);
 }
 
 static int mlxsw_devlink_flash_update(struct devlink *devlink,
@@ -1098,7 +1109,8 @@ static int
 __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 				 const struct mlxsw_bus *mlxsw_bus,
 				 void *bus_priv, bool reload,
-				 struct devlink *devlink)
+				 struct devlink *devlink,
+				 struct netlink_ext_ack *extack)
 {
 	const char *device_kind = mlxsw_bus_info->device_kind;
 	struct mlxsw_core *mlxsw_core;
@@ -1172,7 +1184,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	}
 
 	if (mlxsw_driver->init) {
-		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack);
 		if (err)
 			goto err_driver_init;
 	}
@@ -1186,8 +1198,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_thermal_init;
 
-	if (mlxsw_driver->params_register)
+	if (mlxsw_driver->params_register) {
 		devlink_params_publish(devlink);
+		devlink_reload_enable(devlink);
+	}
 
 	return 0;
 
@@ -1223,14 +1237,16 @@ err_devlink_alloc:
 int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 				   const struct mlxsw_bus *mlxsw_bus,
 				   void *bus_priv, bool reload,
-				   struct devlink *devlink)
+				   struct devlink *devlink,
+				   struct netlink_ext_ack *extack)
 {
 	bool called_again = false;
 	int err;
 
 again:
 	err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus,
-					       bus_priv, reload, devlink);
+					       bus_priv, reload,
+					       devlink, extack);
 	/* -EAGAIN is returned in case the FW was updated. FW needs
 	 * a reset, so lets try to call __mlxsw_core_bus_device_register()
 	 * again.
@@ -1249,6 +1265,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
+	if (!reload)
+		devlink_reload_disable(devlink);
 	if (devlink_is_reload_failed(devlink)) {
 		if (!reload)
 			/* Only the parts that were not de-initialized in the
@@ -2003,6 +2021,35 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
 
+int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
+{
+	enum mlxsw_reg_pmtm_module_type module_type;
+	char pmtm_pl[MLXSW_REG_PMTM_LEN];
+	int err;
+
+	mlxsw_reg_pmtm_pack(pmtm_pl, module);
+	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl);
+	if (err)
+		return err;
+	mlxsw_reg_pmtm_unpack(pmtm_pl, &module_type);
+
+	/* Here we need to get the module width according to the module type. */
+
+	switch (module_type) {
+	case MLXSW_REG_PMTM_MODULE_TYPE_BP_4X: /* fall through */
+	case MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP:
+		return 4;
+	case MLXSW_REG_PMTM_MODULE_TYPE_BP_2X:
+		return 2;
+	case MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP: /* fall through */
+	case MLXSW_REG_PMTM_MODULE_TYPE_BP_1X:
+		return 1;
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(mlxsw_core_module_max_width);
+
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
 				    const char *buf, size_t size)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 5d7d2ab6d155..0d18bee6d140 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
+#include <linux/net_namespace.h>
 #include <net/devlink.h>
 
 #include "trap.h"
@@ -23,6 +24,7 @@ struct mlxsw_core_port;
 struct mlxsw_driver;
 struct mlxsw_bus;
 struct mlxsw_bus_info;
+struct mlxsw_fw_rev;
 
 unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
 
@@ -30,13 +32,18 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
 bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
 
+bool
+mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
+					  const struct mlxsw_fw_rev *req_rev);
+
 int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver);
 void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver);
 
 int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 				   const struct mlxsw_bus *mlxsw_bus,
 				   void *bus_priv, bool reload,
-				   struct devlink *devlink);
+				   struct devlink *devlink,
+				   struct netlink_ext_ack *extack);
 void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload);
 
 struct mlxsw_tx_info {
@@ -193,6 +200,7 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
 struct devlink_port *
 mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
 				 u8 local_port);
+int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
 bool mlxsw_core_schedule_work(struct work_struct *work);
@@ -252,7 +260,8 @@ struct mlxsw_driver {
 	const char *kind;
 	size_t priv_size;
 	int (*init)(struct mlxsw_core *mlxsw_core,
-		    const struct mlxsw_bus_info *mlxsw_bus_info);
+		    const struct mlxsw_bus_info *mlxsw_bus_info,
+		    struct netlink_ext_ack *extack);
 	void (*fini)(struct mlxsw_core *mlxsw_core);
 	int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
 	int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port,
@@ -350,6 +359,11 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 #define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id)			\
 	mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id)
 
+static inline struct net *mlxsw_core_net(struct mlxsw_core *mlxsw_core)
+{
+	return devlink_net(priv_to_devlink(mlxsw_core));
+}
+
 #define MLXSW_BUS_F_TXRX	BIT(0)
 #define MLXSW_BUS_F_RESET	BIT(1)
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index d2c7ce67c300..08215fed193d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -50,6 +50,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	char mcia_pl[MLXSW_REG_MCIA_LEN];
 	u16 i2c_addr;
+	u8 page = 0;
 	int status;
 	int err;
 
@@ -62,11 +63,21 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 
 	i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW;
 	if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) {
-		i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH;
-		offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH;
+		page = MLXSW_REG_MCIA_PAGE_GET(offset);
+		offset -= MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH * page;
+		/* When reading upper pages 1, 2 and 3 the offset starts at
+		 * 128. Please refer to "QSFP+ Memory Map" figure in SFF-8436
+		 * specification for graphical depiction.
+		 * MCIA register accepts buffer size <= 48. Page of size 128
+		 * should be read by chunks of size 48, 48, 32. Align the size
+		 * of the last chunk to avoid reading after the end of the
+		 * page.
+		 */
+		if (offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
+			size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset;
 	}
 
-	mlxsw_reg_mcia_pack(mcia_pl, module, 0, 0, offset, size, i2c_addr);
+	mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr);
 
 	err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
 	if (err)
@@ -168,7 +179,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
 	switch (module_id) {
 	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP:
 		modinfo->type       = ETH_MODULE_SFF_8436;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		break;
 	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */
 	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28:
@@ -176,10 +187,10 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module,
 		    module_rev_id >=
 		    MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) {
 			modinfo->type       = ETH_MODULE_SFF_8636;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
 		} else {
 			modinfo->type       = ETH_MODULE_SFF_8436;
-			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		}
 		break;
 	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 5b00726c4346..9bf8da5f6daf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -41,7 +41,7 @@ struct mlxsw_hwmon {
 	struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
 	unsigned int attrs_count;
 	u8 sensor_count;
-	u8 module_sensor_count;
+	u8 module_sensor_max;
 };
 
 static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -56,7 +56,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
 	int err;
 
 	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-					   mlxsw_hwmon->module_sensor_count);
+					   mlxsw_hwmon->module_sensor_max);
 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
@@ -79,7 +79,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
 	int err;
 
 	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-					   mlxsw_hwmon->module_sensor_count);
+					   mlxsw_hwmon->module_sensor_max);
 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
@@ -109,7 +109,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
 		return -EINVAL;
 
 	index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
-					   mlxsw_hwmon->module_sensor_count);
+					   mlxsw_hwmon->module_sensor_max);
 	mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
 	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err) {
@@ -336,7 +336,7 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
 	int index = mlwsw_hwmon_attr->type_index -
-		    mlxsw_hwmon->module_sensor_count + 1;
+		    mlxsw_hwmon->module_sensor_max + 1;
 
 	return sprintf(buf, "gearbox %03u\n", index);
 }
@@ -528,51 +528,45 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
 
 static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-	unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core);
-	char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0};
-	int i, index;
-	u8 width;
-	int err;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	u8 module_sensor_max;
+	int i, err;
 
 	if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core))
 		return 0;
 
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
+			       &module_sensor_max);
+
 	/* Add extra attributes for module temperature. Sensor index is
 	 * assigned to sensor_count value, while all indexed before
 	 * sensor_count are already utilized by the sensors connected through
 	 * mtmp register by mlxsw_hwmon_temp_init().
 	 */
-	index = mlxsw_hwmon->sensor_count;
-	for (i = 1; i < module_count; i++) {
-		mlxsw_reg_pmlp_pack(pmlp_pl, i);
-		err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp),
-				      pmlp_pl);
-		if (err) {
-			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n",
-				i);
-			return err;
-		}
-		width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-		if (!width)
-			continue;
+	mlxsw_hwmon->module_sensor_max = mlxsw_hwmon->sensor_count +
+					 module_sensor_max;
+	for (i = mlxsw_hwmon->sensor_count;
+	     i < mlxsw_hwmon->module_sensor_max; i++) {
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
-				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index,
-				     index);
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, i, i);
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
 				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT,
-				     index, index);
+				     i, i);
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
-				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
-				     index, index);
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, i,
+				     i);
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
 				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
-				     index, index);
+				     i, i);
 		mlxsw_hwmon_attr_add(mlxsw_hwmon,
 				     MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
-				     index, index);
-		index++;
+				     i, i);
 	}
-	mlxsw_hwmon->module_sensor_count = index;
 
 	return 0;
 }
@@ -590,14 +584,14 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
 	if (err)
 		return err;
 
-	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL, NULL);
 	if (!gbox_num)
 		return 0;
 
-	index = mlxsw_hwmon->module_sensor_count;
-	max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+	index = mlxsw_hwmon->module_sensor_max;
+	max_index = mlxsw_hwmon->module_sensor_max + gbox_num;
 	while (index < max_index) {
-		sensor_index = index % mlxsw_hwmon->module_sensor_count +
+		sensor_index = index % mlxsw_hwmon->module_sensor_max +
 			       MLXSW_REG_MTMP_GBOX_INDEX_MIN;
 		mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
 		err = mlxsw_reg_write(mlxsw_hwmon->core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 35a1dc89c28a..c721b171bd8d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -112,6 +112,7 @@ struct mlxsw_thermal {
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
 	enum thermal_device_mode mode;
 	struct mlxsw_thermal_module *tz_module_arr;
+	u8 tz_module_num;
 	struct mlxsw_thermal_module *tz_gearbox_arr;
 	u8 tz_gearbox_num;
 	unsigned int tz_highest_score;
@@ -775,23 +776,10 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
 
 static int
 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
-			  struct mlxsw_thermal *thermal, u8 local_port)
+			  struct mlxsw_thermal *thermal, u8 module)
 {
 	struct mlxsw_thermal_module *module_tz;
-	char pmlp_pl[MLXSW_REG_PMLP_LEN];
-	u8 width, module;
-	int err;
-
-	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
-	err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl);
-	if (err)
-		return err;
 
-	width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-	if (!width)
-		return 0;
-
-	module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
 	module_tz = &thermal->tz_module_arr[module];
 	/* Skip if parent is already set (case of port split). */
 	if (module_tz->parent)
@@ -819,26 +807,34 @@ static int
 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
 			   struct mlxsw_thermal *thermal)
 {
-	unsigned int module_count = mlxsw_core_max_ports(core);
 	struct mlxsw_thermal_module *module_tz;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
 	int i, err;
 
 	if (!mlxsw_core_res_query_enabled(core))
 		return 0;
 
-	thermal->tz_module_arr = kcalloc(module_count,
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
+			       &thermal->tz_module_num);
+
+	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
 					 sizeof(*thermal->tz_module_arr),
 					 GFP_KERNEL);
 	if (!thermal->tz_module_arr)
 		return -ENOMEM;
 
-	for (i = 1; i < module_count; i++) {
+	for (i = 0; i < thermal->tz_module_num; i++) {
 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
 		if (err)
 			goto err_unreg_tz_module_arr;
 	}
 
-	for (i = 0; i < module_count - 1; i++) {
+	for (i = 0; i < thermal->tz_module_num; i++) {
 		module_tz = &thermal->tz_module_arr[i];
 		if (!module_tz->parent)
 			continue;
@@ -850,7 +846,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
 	return 0;
 
 err_unreg_tz_module_arr:
-	for (i = module_count - 1; i >= 0; i--)
+	for (i = thermal->tz_module_num - 1; i >= 0; i--)
 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
 	kfree(thermal->tz_module_arr);
 	return err;
@@ -859,13 +855,12 @@ err_unreg_tz_module_arr:
 static void
 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
 {
-	unsigned int module_count = mlxsw_core_max_ports(thermal->core);
 	int i;
 
 	if (!mlxsw_core_res_query_enabled(thermal->core))
 		return;
 
-	for (i = module_count - 1; i >= 0; i--)
+	for (i = thermal->tz_module_num - 1; i >= 0; i--)
 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
 	kfree(thermal->tz_module_arr);
 }
@@ -913,7 +908,8 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
 	if (err)
 		return err;
 
-	mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL,
+			       NULL);
 	if (!thermal->tz_gearbox_num)
 		return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 95f408d0e103..34566eb62c47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -640,7 +640,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 
 	err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
 					     &mlxsw_i2c_bus, mlxsw_i2c, false,
-					     NULL);
+					     NULL, NULL);
 	if (err) {
 		dev_err(&client->dev, "Fail to register core bus\n");
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 471b0ca6d69a..2b543911ae00 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -16,6 +16,14 @@
 
 static const char mlxsw_m_driver_name[] = "mlxsw_minimal";
 
+#define MLXSW_M_FWREV_MINOR	2000
+#define MLXSW_M_FWREV_SUBMINOR	1886
+
+static const struct mlxsw_fw_rev mlxsw_m_fw_rev = {
+	.minor = MLXSW_M_FWREV_MINOR,
+	.subminor = MLXSW_M_FWREV_SUBMINOR,
+};
+
 struct mlxsw_m_port;
 
 struct mlxsw_m {
@@ -172,6 +180,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module)
 	}
 
 	SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev);
+	dev_net_set(dev, mlxsw_core_net(mlxsw_m->core));
 	mlxsw_m_port = netdev_priv(dev);
 	mlxsw_m_port->dev = dev;
 	mlxsw_m_port->mlxsw_m = mlxsw_m;
@@ -325,8 +334,27 @@ static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
 	kfree(mlxsw_m->ports);
 }
 
+static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
+{
+	const struct mlxsw_fw_rev *rev = &mlxsw_m->bus_info->fw_rev;
+
+	/* Validate driver and FW are compatible.
+	 * Do not check major version, since it defines chip type, while
+	 * driver is supposed to support any type.
+	 */
+	if (mlxsw_core_fw_rev_minor_subminor_validate(rev, &mlxsw_m_fw_rev))
+		return 0;
+
+	dev_err(mlxsw_m->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
+		rev->major, rev->minor, rev->subminor, rev->major,
+		mlxsw_m_fw_rev.minor, mlxsw_m_fw_rev.subminor);
+
+	return -EINVAL;
+}
+
 static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
-			const struct mlxsw_bus_info *mlxsw_bus_info)
+			const struct mlxsw_bus_info *mlxsw_bus_info,
+			struct netlink_ext_ack *extack)
 {
 	struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
@@ -334,6 +362,10 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_m->core = mlxsw_core;
 	mlxsw_m->bus_info = mlxsw_bus_info;
 
+	err = mlxsw_m_fw_rev_validate(mlxsw_m);
+	if (err)
+		return err;
+
 	err = mlxsw_m_base_mac_get(mlxsw_m);
 	if (err) {
 		dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 615455a21567..914c33e46fb4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -284,15 +284,18 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q,
 static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 			      struct mlxsw_pci_queue *q)
 {
+	int tclass;
 	int i;
 	int err;
 
 	q->producer_counter = 0;
 	q->consumer_counter = 0;
+	tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC :
+						      MLXSW_PCI_SDQ_CTL_TC;
 
 	/* Set CQ of same number of this SDQ. */
 	mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num);
-	mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3);
+	mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass);
 	mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
 	for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
 		dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
@@ -963,6 +966,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
 	eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
 
 	if (num_sdqs + num_rdqs > num_cqs ||
+	    num_sdqs < MLXSW_PCI_SDQS_MIN ||
 	    num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) {
 		dev_err(&pdev->dev, "Unsupported number of queues\n");
 		return -EINVAL;
@@ -1520,7 +1524,15 @@ static struct mlxsw_pci_queue *
 mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci,
 		   const struct mlxsw_tx_info *tx_info)
 {
-	u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci);
+	u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1;
+	u8 sdqn;
+
+	if (tx_info->is_emad) {
+		sdqn = MLXSW_PCI_SDQ_EMAD_INDEX;
+	} else {
+		BUILD_BUG_ON(MLXSW_PCI_SDQ_EMAD_INDEX != 0);
+		sdqn = 1 + (tx_info->local_port % ctl_sdq_count);
+	}
 
 	return mlxsw_pci_sdq_get(mlxsw_pci, sdqn);
 }
@@ -1790,7 +1802,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
 					     &mlxsw_pci_bus, mlxsw_pci, false,
-					     NULL);
+					     NULL, NULL);
 	if (err) {
 		dev_err(&pdev->dev, "cannot register bus device\n");
 		goto err_bus_device_register;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index e57e42e2d2b2..e0d7d2d9a0c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET			0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	20000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	900000
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS		100
 #define MLXSW_PCI_FW_READY			0xA1844
 #define MLXSW_PCI_FW_READY_MASK			0xFFFF
@@ -51,6 +51,11 @@
 #define MLXSW_PCI_EQ_ASYNC_NUM	0
 #define MLXSW_PCI_EQ_COMP_NUM	1
 
+#define MLXSW_PCI_SDQS_MIN	2 /* EMAD and control traffic */
+#define MLXSW_PCI_SDQ_EMAD_INDEX	0
+#define MLXSW_PCI_SDQ_EMAD_TC	0
+#define MLXSW_PCI_SDQ_CTL_TC	3
+
 #define MLXSW_PCI_AQ_PAGES	8
 #define MLXSW_PCI_AQ_SIZE	(MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
 #define MLXSW_PCI_WQE_SIZE	32 /* 32 bytes per element */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index a33eeef0b00c..741fd2989d12 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -24,8 +24,6 @@
 
 #define MLXSW_PORT_DONT_CARE		0xFF
 
-#define MLXSW_PORT_MODULE_MAX_WIDTH	4
-
 enum mlxsw_port_admin_status {
 	MLXSW_PORT_ADMIN_STATUS_UP = 1,
 	MLXSW_PORT_ADMIN_STATUS_DOWN = 2,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5494cf93f34c..5294a1622643 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3969,6 +3969,7 @@ MLXSW_ITEM32(reg, pmlp, local_port, 0x00, 16, 8);
  * 1 - Lane 0 is used.
  * 2 - Lanes 0 and 1 are used.
  * 4 - Lanes 0, 1, 2 and 3 are used.
+ * 8 - Lanes 0-7 are used.
  * Access: RW
  */
 MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
@@ -3983,14 +3984,14 @@ MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
  * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 4, 0x04, 0x00, false);
 
 /* reg_pmlp_rx_lane
  * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
  * equal to Tx lane.
  * Access: RW
  */
-MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 4, 0x04, 0x00, false);
 
 static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 {
@@ -4111,6 +4112,7 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4		BIT(9)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2		BIT(10)
 #define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4		BIT(12)
+#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8				BIT(15)
 
 /* reg_ptys_ext_eth_proto_cap
  * Extended Ethernet port supported speeds and protocols.
@@ -5373,6 +5375,55 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port,
 				 MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0);
 }
 
+/* PMTM - Port Module Type Mapping Register
+ * ----------------------------------------
+ * The PMTM allows query or configuration of module types.
+ */
+#define MLXSW_REG_PMTM_ID 0x5067
+#define MLXSW_REG_PMTM_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN);
+
+/* reg_pmtm_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmtm_module_type {
+	/* Backplane with 4 lanes */
+	MLXSW_REG_PMTM_MODULE_TYPE_BP_4X,
+	/* QSFP */
+	MLXSW_REG_PMTM_MODULE_TYPE_BP_QSFP,
+	/* SFP */
+	MLXSW_REG_PMTM_MODULE_TYPE_BP_SFP,
+	/* Backplane with single lane */
+	MLXSW_REG_PMTM_MODULE_TYPE_BP_1X = 4,
+	/* Backplane with two lane */
+	MLXSW_REG_PMTM_MODULE_TYPE_BP_2X = 8,
+	/* Chip2Chip */
+	MLXSW_REG_PMTM_MODULE_TYPE_C2C = 10,
+};
+
+/* reg_pmtm_module_type
+ * Module type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 4);
+
+static inline void mlxsw_reg_pmtm_pack(char *payload, u8 module)
+{
+	MLXSW_REG_ZERO(pmtm, payload);
+	mlxsw_reg_pmtm_module_set(payload, module);
+}
+
+static inline void
+mlxsw_reg_pmtm_unpack(char *payload,
+		      enum mlxsw_reg_pmtm_module_type *module_type)
+{
+	*module_type = mlxsw_reg_pmtm_module_type_get(payload);
+}
+
 /* HTGT - Host Trap Group Table
  * ----------------------------
  * Configures the properties for forwarding to CPU.
@@ -5429,6 +5480,7 @@ enum mlxsw_reg_htgt_trap_group {
 enum mlxsw_reg_htgt_discard_trap_group {
 	MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
 };
 
 /* reg_htgt_trap_group
@@ -8411,6 +8463,7 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
 MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
 
 #define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH	256
+#define MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH	128
 #define MLXSW_REG_MCIA_EEPROM_SIZE		48
 #define MLXSW_REG_MCIA_I2C_ADDR_LOW		0x50
 #define MLXSW_REG_MCIA_I2C_ADDR_HIGH		0x51
@@ -8446,6 +8499,14 @@ enum mlxsw_reg_mcia_eeprom_module_info {
  */
 MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
 
+/* This is used to access the optional upper pages (1-3) in the QSFP+
+ * memory map. Page 1 is available on offset 256 through 383, page 2 -
+ * on offset 384 through 511, page 3 - on offset 512 through 639.
+ */
+#define MLXSW_REG_MCIA_PAGE_GET(off) (((off) - \
+				MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \
+				MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1)
+
 static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
 				       u8 page_number, u16 device_addr,
 				       u8 size, u8 i2c_device_addr)
@@ -8670,7 +8731,7 @@ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
  * properties.
  */
 #define MLXSW_REG_MPAR_ID 0x901B
-#define MLXSW_REG_MPAR_LEN 0x08
+#define MLXSW_REG_MPAR_LEN 0x0C
 
 MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN);
 
@@ -9531,6 +9592,12 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
 
+/* num_of_modules
+ * Number of modules.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8);
+
 static inline void mlxsw_reg_mgpir_pack(char *payload)
 {
 	MLXSW_REG_ZERO(mgpir, payload);
@@ -9539,7 +9606,7 @@ static inline void mlxsw_reg_mgpir_pack(char *payload)
 static inline void
 mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
 		       enum mlxsw_reg_mgpir_device_type *device_type,
-		       u8 *devices_per_flash)
+		       u8 *devices_per_flash, u8 *num_of_modules)
 {
 	if (num_of_devices)
 		*num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
@@ -9548,6 +9615,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
 	if (devices_per_flash)
 		*devices_per_flash =
 				mlxsw_reg_mgpir_devices_per_flash_get(payload);
+	if (num_of_modules)
+		*num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload);
 }
 
 /* TNGCR - Tunneling NVE General Configuration Register
@@ -10526,6 +10595,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(pbmc),
 	MLXSW_REG(pspa),
 	MLXSW_REG(pplr),
+	MLXSW_REG(pmtm),
 	MLXSW_REG(htgt),
 	MLXSW_REG(hpkt),
 	MLXSW_REG(rgcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 33a9fc9ef6a4..6534184cb942 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -26,7 +26,8 @@ enum mlxsw_res_id {
 	MLXSW_RES_ID_MAX_LAG_MEMBERS,
 	MLXSW_RES_ID_LOCAL_PORTS_IN_1X,
 	MLXSW_RES_ID_LOCAL_PORTS_IN_2X,
-	MLXSW_RES_ID_MAX_BUFFER_SIZE,
+	MLXSW_RES_ID_LOCAL_PORTS_IN_4X,
+	MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER,
 	MLXSW_RES_ID_CELL_SIZE,
 	MLXSW_RES_ID_MAX_HEADROOM_SIZE,
 	MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS,
@@ -82,7 +83,8 @@ static u16 mlxsw_res_ids[] = {
 	[MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
 	[MLXSW_RES_ID_LOCAL_PORTS_IN_1X] = 0x2610,
 	[MLXSW_RES_ID_LOCAL_PORTS_IN_2X] = 0x2611,
-	[MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,	/* Bytes */
+	[MLXSW_RES_ID_LOCAL_PORTS_IN_4X] = 0x2612,
+	[MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805,	/* Bytes */
 	[MLXSW_RES_ID_CELL_SIZE] = 0x2803,	/* Bytes */
 	[MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811,	/* Bytes */
 	[MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dcf9562bce8a..471478eb1d86 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -22,6 +22,7 @@
 #include <linux/inetdevice.h>
 #include <linux/netlink.h>
 #include <linux/jhash.h>
+#include <linux/log2.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
@@ -48,7 +49,7 @@
 
 #define MLXSW_SP1_FWREV_MAJOR 13
 #define MLXSW_SP1_FWREV_MINOR 2000
-#define MLXSW_SP1_FWREV_SUBMINOR 1886
+#define MLXSW_SP1_FWREV_SUBMINOR 2308
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -63,6 +64,21 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
 	"." __stringify(MLXSW_SP1_FWREV_MINOR) \
 	"." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
 
+#define MLXSW_SP2_FWREV_MAJOR 29
+#define MLXSW_SP2_FWREV_MINOR 2000
+#define MLXSW_SP2_FWREV_SUBMINOR 2308
+
+static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
+	.major = MLXSW_SP2_FWREV_MAJOR,
+	.minor = MLXSW_SP2_FWREV_MINOR,
+	.subminor = MLXSW_SP2_FWREV_SUBMINOR,
+};
+
+#define MLXSW_SP2_FW_FILENAME \
+	"mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \
+	"." __stringify(MLXSW_SP2_FWREV_MINOR) \
+	"." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2"
+
 static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
 static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3";
@@ -409,9 +425,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 	}
 	if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
 	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
-	    (rev->minor > req_rev->minor ||
-	     (rev->minor == req_rev->minor &&
-	      rev->subminor >= req_rev->subminor)))
+	    mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
 		return 0;
 
 	dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
@@ -735,35 +749,69 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-					 u8 local_port, u8 *p_module,
-					 u8 *p_width, u8 *p_lane)
+static int
+mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+			      struct mlxsw_sp_port_mapping *port_mapping)
 {
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
+	bool separate_rxtx;
+	u8 module;
+	u8 width;
 	int err;
+	int i;
 
 	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
 	if (err)
 		return err;
-	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
-	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-	*p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0);
+	module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+	width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+	separate_rxtx = mlxsw_reg_pmlp_rxtx_get(pmlp_pl);
+
+	if (width && !is_power_of_2(width)) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: width value is not power of 2\n",
+			local_port);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < width; i++) {
+		if (mlxsw_reg_pmlp_module_get(pmlp_pl, i) != module) {
+			dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: contains multiple modules\n",
+				local_port);
+			return -EINVAL;
+		}
+		if (separate_rxtx &&
+		    mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) !=
+		    mlxsw_reg_pmlp_rx_lane_get(pmlp_pl, i)) {
+			dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are different\n",
+				local_port);
+			return -EINVAL;
+		}
+		if (mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, i) != i) {
+			dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unsupported module config: TX and RX lane numbers are not sequential\n",
+				local_port);
+			return -EINVAL;
+		}
+	}
+
+	port_mapping->module = module;
+	port_mapping->width = width;
+	port_mapping->lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0);
 	return 0;
 }
 
-static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port,
-				    u8 module, u8 width, u8 lane)
+static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	struct mlxsw_sp_port_mapping *port_mapping = &mlxsw_sp_port->mapping;
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
 	int i;
 
 	mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port);
-	mlxsw_reg_pmlp_width_set(pmlp_pl, width);
-	for (i = 0; i < width; i++) {
-		mlxsw_reg_pmlp_module_set(pmlp_pl, i, module);
-		mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, lane + i);  /* Rx & Tx */
+	mlxsw_reg_pmlp_width_set(pmlp_pl, port_mapping->width);
+	for (i = 0; i < port_mapping->width; i++) {
+		mlxsw_reg_pmlp_module_set(pmlp_pl, i, port_mapping->module);
+		mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, port_mapping->lane + i); /* Rx & Tx */
 	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
@@ -2914,9 +2962,22 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = {
 #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \
 	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4)
 
+static const enum ethtool_link_mode_bit_indices
+mlxsw_sp2_mask_ethtool_400gaui_8[] = {
+	ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
+	ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
+	ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
+	ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
+	ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT,
+};
+
+#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \
+	ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8)
+
 #define MLXSW_SP_PORT_MASK_WIDTH_1X	BIT(0)
 #define MLXSW_SP_PORT_MASK_WIDTH_2X	BIT(1)
 #define MLXSW_SP_PORT_MASK_WIDTH_4X	BIT(2)
+#define MLXSW_SP_PORT_MASK_WIDTH_8X	BIT(3)
 
 static u8 mlxsw_sp_port_mask_width_get(u8 width)
 {
@@ -2927,6 +2988,8 @@ static u8 mlxsw_sp_port_mask_width_get(u8 width)
 		return MLXSW_SP_PORT_MASK_WIDTH_2X;
 	case 4:
 		return MLXSW_SP_PORT_MASK_WIDTH_4X;
+	case 8:
+		return MLXSW_SP_PORT_MASK_WIDTH_8X;
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -2948,7 +3011,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_100,
 	},
 	{
@@ -2957,7 +3021,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_1000,
 	},
 	{
@@ -2966,7 +3031,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_2500,
 	},
 	{
@@ -2975,7 +3041,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_5000,
 	},
 	{
@@ -2984,14 +3051,16 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_10000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G,
 		.mask_ethtool	= mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g,
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN,
-		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_40000,
 	},
 	{
@@ -3000,7 +3069,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_1X |
 				  MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_25000,
 	},
 	{
@@ -3008,7 +3078,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.mask_ethtool	= mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2,
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN,
 		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_2X |
-				  MLXSW_SP_PORT_MASK_WIDTH_4X,
+				  MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_50000,
 	},
 	{
@@ -3022,7 +3093,8 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4,
 		.mask_ethtool	= mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4,
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN,
-		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_100000,
 	},
 	{
@@ -3036,9 +3108,17 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = {
 		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4,
 		.mask_ethtool	= mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4,
 		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN,
-		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_4X |
+				  MLXSW_SP_PORT_MASK_WIDTH_8X,
 		.speed		= SPEED_200000,
 	},
+	{
+		.mask		= MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8,
+		.mask_ethtool	= mlxsw_sp2_mask_ethtool_400gaui_8,
+		.m_ethtool_len	= MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN,
+		.mask_width	= MLXSW_SP_PORT_MASK_WIDTH_8X,
+		.speed		= SPEED_400000,
+	},
 };
 
 #define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode)
@@ -3435,7 +3515,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 };
 
 static int
-mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
+mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	const struct mlxsw_sp_port_type_speed_ops *ops;
@@ -3451,7 +3531,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 				   &base_speed);
 	if (err)
 		return err;
-	upper_speed = base_speed * width;
+	upper_speed = base_speed * mlxsw_sp_port->mapping.width;
 
 	eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed);
 	ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port,
@@ -3612,15 +3692,18 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				bool split, u8 module, u8 width, u8 lane)
+				u8 split_base_local_port,
+				struct mlxsw_sp_port_mapping *port_mapping)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+	bool split = !!split_base_local_port;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *dev;
 	int err;
 
 	err = mlxsw_core_port_init(mlxsw_sp->core, local_port,
-				   module + 1, split, lane / width,
+				   port_mapping->module + 1, split,
+				   port_mapping->lane / port_mapping->width,
 				   mlxsw_sp->base_mac,
 				   sizeof(mlxsw_sp->base_mac));
 	if (err) {
@@ -3635,15 +3718,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_alloc_etherdev;
 	}
 	SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev);
+	dev_net_set(dev, mlxsw_sp_net(mlxsw_sp));
 	mlxsw_sp_port = netdev_priv(dev);
 	mlxsw_sp_port->dev = dev;
 	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp_port->local_port = local_port;
 	mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID;
 	mlxsw_sp_port->split = split;
-	mlxsw_sp_port->mapping.module = module;
-	mlxsw_sp_port->mapping.width = width;
-	mlxsw_sp_port->mapping.lane = lane;
+	mlxsw_sp_port->split_base_local_port = split_base_local_port;
+	mlxsw_sp_port->mapping = *port_mapping;
 	mlxsw_sp_port->link.autoneg = 1;
 	INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list);
 	INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list);
@@ -3668,7 +3751,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
 	dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
 
-	err = mlxsw_sp_port_module_map(mlxsw_sp_port, module, width, lane);
+	err = mlxsw_sp_port_module_map(mlxsw_sp_port);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n",
 			mlxsw_sp_port->local_port);
@@ -3710,7 +3793,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_port_system_port_mapping_set;
 	}
 
-	err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
+	err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
 			mlxsw_sp_port->local_port);
@@ -3933,14 +4016,13 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 		if (mlxsw_sp_port_created(mlxsw_sp, i))
 			mlxsw_sp_port_remove(mlxsw_sp, i);
 	mlxsw_sp_cpu_port_remove(mlxsw_sp);
-	kfree(mlxsw_sp->port_to_module);
 	kfree(mlxsw_sp->ports);
 }
 
 static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 {
 	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
-	u8 module, width, lane;
+	struct mlxsw_sp_port_mapping *port_mapping;
 	size_t alloc_size;
 	int i;
 	int err;
@@ -3950,66 +4032,100 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 	if (!mlxsw_sp->ports)
 		return -ENOMEM;
 
-	mlxsw_sp->port_to_module = kmalloc_array(max_ports, sizeof(int),
-						 GFP_KERNEL);
-	if (!mlxsw_sp->port_to_module) {
-		err = -ENOMEM;
-		goto err_port_to_module_alloc;
-	}
-
 	err = mlxsw_sp_cpu_port_create(mlxsw_sp);
 	if (err)
 		goto err_cpu_port_create;
 
 	for (i = 1; i < max_ports; i++) {
-		/* Mark as invalid */
-		mlxsw_sp->port_to_module[i] = -1;
-
-		err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
-						    &width, &lane);
-		if (err)
-			goto err_port_module_info_get;
-		if (!width)
+		port_mapping = mlxsw_sp->port_mapping[i];
+		if (!port_mapping)
 			continue;
-		mlxsw_sp->port_to_module[i] = module;
-		err = mlxsw_sp_port_create(mlxsw_sp, i, false,
-					   module, width, lane);
+		err = mlxsw_sp_port_create(mlxsw_sp, i, 0, port_mapping);
 		if (err)
 			goto err_port_create;
 	}
 	return 0;
 
 err_port_create:
-err_port_module_info_get:
 	for (i--; i >= 1; i--)
 		if (mlxsw_sp_port_created(mlxsw_sp, i))
 			mlxsw_sp_port_remove(mlxsw_sp, i);
 	mlxsw_sp_cpu_port_remove(mlxsw_sp);
 err_cpu_port_create:
-	kfree(mlxsw_sp->port_to_module);
-err_port_to_module_alloc:
 	kfree(mlxsw_sp->ports);
 	return err;
 }
 
-static u8 mlxsw_sp_cluster_base_port_get(u8 local_port)
+static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp)
 {
-	u8 offset = (local_port - 1) % MLXSW_SP_PORTS_PER_CLUSTER_MAX;
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+	struct mlxsw_sp_port_mapping port_mapping;
+	int i;
+	int err;
+
+	mlxsw_sp->port_mapping = kcalloc(max_ports,
+					 sizeof(struct mlxsw_sp_port_mapping *),
+					 GFP_KERNEL);
+	if (!mlxsw_sp->port_mapping)
+		return -ENOMEM;
+
+	for (i = 1; i < max_ports; i++) {
+		err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &port_mapping);
+		if (err)
+			goto err_port_module_info_get;
+		if (!port_mapping.width)
+			continue;
+
+		mlxsw_sp->port_mapping[i] = kmemdup(&port_mapping,
+						    sizeof(port_mapping),
+						    GFP_KERNEL);
+		if (!mlxsw_sp->port_mapping[i]) {
+			err = -ENOMEM;
+			goto err_port_module_info_dup;
+		}
+	}
+	return 0;
+
+err_port_module_info_get:
+err_port_module_info_dup:
+	for (i--; i >= 1; i--)
+		kfree(mlxsw_sp->port_mapping[i]);
+	kfree(mlxsw_sp->port_mapping);
+	return err;
+}
+
+static void mlxsw_sp_port_module_info_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
+		kfree(mlxsw_sp->port_mapping[i]);
+	kfree(mlxsw_sp->port_mapping);
+}
+
+static u8 mlxsw_sp_cluster_base_port_get(u8 local_port, unsigned int max_width)
+{
+	u8 offset = (local_port - 1) % max_width;
 
 	return local_port - offset;
 }
 
-static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
-				      u8 module, unsigned int count, u8 offset)
+static int
+mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
+			   struct mlxsw_sp_port_mapping *port_mapping,
+			   unsigned int count, u8 offset)
 {
-	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
+	struct mlxsw_sp_port_mapping split_port_mapping;
 	int err, i;
 
+	split_port_mapping = *port_mapping;
+	split_port_mapping.width /= count;
 	for (i = 0; i < count; i++) {
 		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * offset,
-					   true, module, width, i * width);
+					   base_port, &split_port_mapping);
 		if (err)
 			goto err_port_create;
+		split_port_mapping.lane += split_port_mapping.width;
 	}
 
 	return 0;
@@ -4022,45 +4138,55 @@ err_port_create:
 }
 
 static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp,
-					 u8 base_port, unsigned int count)
+					 u8 base_port,
+					 unsigned int count, u8 offset)
 {
-	u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH;
+	struct mlxsw_sp_port_mapping *port_mapping;
 	int i;
 
-	/* Split by four means we need to re-create two ports, otherwise
-	 * only one.
-	 */
-	count = count / 2;
-
-	for (i = 0; i < count; i++) {
-		local_port = base_port + i * 2;
-		if (mlxsw_sp->port_to_module[local_port] < 0)
+	/* Go over original unsplit ports in the gap and recreate them. */
+	for (i = 0; i < count * offset; i++) {
+		port_mapping = mlxsw_sp->port_mapping[base_port + i];
+		if (!port_mapping)
 			continue;
-		module = mlxsw_sp->port_to_module[local_port];
-
-		mlxsw_sp_port_create(mlxsw_sp, local_port, false, module,
-				     width, 0);
+		mlxsw_sp_port_create(mlxsw_sp, base_port + i, 0, port_mapping);
 	}
 }
 
+static int mlxsw_sp_local_ports_offset(struct mlxsw_core *mlxsw_core,
+				       unsigned int count,
+				       unsigned int max_width)
+{
+	enum mlxsw_res_id local_ports_in_x_res_id;
+	int split_width = max_width / count;
+
+	if (split_width == 1)
+		local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_1X;
+	else if (split_width == 2)
+		local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_2X;
+	else if (split_width == 4)
+		local_ports_in_x_res_id = MLXSW_RES_ID_LOCAL_PORTS_IN_4X;
+	else
+		return -EINVAL;
+
+	if (!mlxsw_core_res_valid(mlxsw_core, local_ports_in_x_res_id))
+		return -EINVAL;
+	return mlxsw_core_res_get(mlxsw_core, local_ports_in_x_res_id);
+}
+
 static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 			       unsigned int count,
 			       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-	u8 local_ports_in_1x, local_ports_in_2x, offset;
+	struct mlxsw_sp_port_mapping port_mapping;
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 module, cur_width, base_port;
+	int max_width;
+	u8 base_port;
+	int offset;
 	int i;
 	int err;
 
-	if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
-	    !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
-		return -EIO;
-
-	local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
-	local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
-
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -4069,47 +4195,70 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 		return -EINVAL;
 	}
 
-	module = mlxsw_sp_port->mapping.module;
-	cur_width = mlxsw_sp_port->mapping.width;
+	/* Split ports cannot be split. */
+	if (mlxsw_sp_port->split) {
+		netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
+		NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further");
+		return -EINVAL;
+	}
+
+	max_width = mlxsw_core_module_max_width(mlxsw_core,
+						mlxsw_sp_port->mapping.module);
+	if (max_width < 0) {
+		netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n");
+		NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module");
+		return max_width;
+	}
 
-	if (count != 2 && count != 4) {
-		netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n");
-		NL_SET_ERR_MSG_MOD(extack, "Port can only be split into 2 or 4 ports");
+	/* Split port with non-max and 1 module width cannot be split. */
+	if (mlxsw_sp_port->mapping.width != max_width || max_width == 1) {
+		netdev_err(mlxsw_sp_port->dev, "Port cannot be split\n");
+		NL_SET_ERR_MSG_MOD(extack, "Port cannot be split");
 		return -EINVAL;
 	}
 
-	if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) {
-		netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
-		NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further");
+	if (count == 1 || !is_power_of_2(count) || count > max_width) {
+		netdev_err(mlxsw_sp_port->dev, "Invalid split count\n");
+		NL_SET_ERR_MSG_MOD(extack, "Invalid split count");
 		return -EINVAL;
 	}
 
-	/* Make sure we have enough slave (even) ports for the split. */
-	if (count == 2) {
-		offset = local_ports_in_2x;
-		base_port = local_port;
-		if (mlxsw_sp->ports[base_port + local_ports_in_2x]) {
-			netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
-			NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
-			return -EINVAL;
-		}
-	} else {
-		offset = local_ports_in_1x;
-		base_port = mlxsw_sp_cluster_base_port_get(local_port);
-		if (mlxsw_sp->ports[base_port + 1] ||
-		    mlxsw_sp->ports[base_port + 3]) {
+	offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width);
+	if (offset < 0) {
+		netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n");
+		NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset");
+		return -EINVAL;
+	}
+
+	/* Only in case max split is being done, the local port and
+	 * base port may differ.
+	 */
+	base_port = count == max_width ?
+		    mlxsw_sp_cluster_base_port_get(local_port, max_width) :
+		    local_port;
+
+	for (i = 0; i < count * offset; i++) {
+		/* Expect base port to exist and also the one in the middle in
+		 * case of maximal split count.
+		 */
+		if (i == 0 || (count == max_width && i == count / 2))
+			continue;
+
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i)) {
 			netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
 			NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
 			return -EINVAL;
 		}
 	}
 
+	port_mapping = mlxsw_sp_port->mapping;
+
 	for (i = 0; i < count; i++)
 		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
 			mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
-	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count,
-					 offset);
+	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, &port_mapping,
+					 count, offset);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
 		goto err_port_split_create;
@@ -4118,7 +4267,7 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 	return 0;
 
 err_port_split_create:
-	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset);
 	return err;
 }
 
@@ -4126,19 +4275,13 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port,
 				 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-	u8 local_ports_in_1x, local_ports_in_2x, offset;
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 cur_width, base_port;
 	unsigned int count;
+	int max_width;
+	u8 base_port;
+	int offset;
 	int i;
 
-	if (!MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_1X) ||
-	    !MLXSW_CORE_RES_VALID(mlxsw_core, LOCAL_PORTS_IN_2X))
-		return -EIO;
-
-	local_ports_in_1x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_1X);
-	local_ports_in_2x = MLXSW_CORE_RES_GET(mlxsw_core, LOCAL_PORTS_IN_2X);
-
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
@@ -4153,25 +4296,30 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port,
 		return -EINVAL;
 	}
 
-	cur_width = mlxsw_sp_port->mapping.width;
-	count = cur_width == 1 ? 4 : 2;
+	max_width = mlxsw_core_module_max_width(mlxsw_core,
+						mlxsw_sp_port->mapping.module);
+	if (max_width < 0) {
+		netdev_err(mlxsw_sp_port->dev, "Cannot get max width of port module\n");
+		NL_SET_ERR_MSG_MOD(extack, "Cannot get max width of port module");
+		return max_width;
+	}
 
-	if (count == 2)
-		offset = local_ports_in_2x;
-	else
-		offset = local_ports_in_1x;
+	count = max_width / mlxsw_sp_port->mapping.width;
 
-	base_port = mlxsw_sp_cluster_base_port_get(local_port);
+	offset = mlxsw_sp_local_ports_offset(mlxsw_core, count, max_width);
+	if (WARN_ON(offset < 0)) {
+		netdev_err(mlxsw_sp_port->dev, "Cannot obtain local port offset\n");
+		NL_SET_ERR_MSG_MOD(extack, "Cannot obtain local port offset");
+		return -EINVAL;
+	}
 
-	/* Determine which ports to remove. */
-	if (count == 2 && local_port >= base_port + 2)
-		base_port = base_port + 2;
+	base_port = mlxsw_sp_port->split_base_local_port;
 
 	for (i = 0; i < count; i++)
 		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i * offset))
 			mlxsw_sp_port_remove(mlxsw_sp, base_port + i * offset);
 
-	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count, offset);
 
 	return 0;
 }
@@ -4364,8 +4512,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
 			     false),
 	/* L3 traps */
-	MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
 	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
@@ -4392,8 +4538,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-	MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
-	MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
@@ -4408,7 +4552,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* Multicast Router Traps */
 	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
-	MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
 	/* NVE traps */
@@ -4738,7 +4881,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
-			 const struct mlxsw_bus_info *mlxsw_bus_info)
+			 const struct mlxsw_bus_info *mlxsw_bus_info,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
@@ -4831,7 +4975,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_acl_init;
 	}
 
-	err = mlxsw_sp_router_init(mlxsw_sp);
+	err = mlxsw_sp_router_init(mlxsw_sp, extack);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
@@ -4864,7 +5008,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	 * respin.
 	 */
 	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
-	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+					      &mlxsw_sp->netdevice_nb);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
 		goto err_netdev_notifier;
@@ -4876,6 +5021,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_dpipe_init;
 	}
 
+	err = mlxsw_sp_port_module_info_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init port module info\n");
+		goto err_port_module_info_init;
+	}
+
 	err = mlxsw_sp_ports_create(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -4885,9 +5036,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_ports_create:
+	mlxsw_sp_port_module_info_fini(mlxsw_sp);
+err_port_module_info_init:
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 err_dpipe_init:
-	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+					  &mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
 	if (mlxsw_sp->clock)
 		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
@@ -4924,7 +5078,8 @@ err_fids_init:
 }
 
 static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
-			  const struct mlxsw_bus_info *mlxsw_bus_info)
+			  const struct mlxsw_bus_info *mlxsw_bus_info,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
@@ -4944,14 +5099,17 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->listeners = mlxsw_sp1_listener;
 	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 
-	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
 
 static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
-			  const struct mlxsw_bus_info *mlxsw_bus_info)
+			  const struct mlxsw_bus_info *mlxsw_bus_info,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->req_rev = &mlxsw_sp2_fw_rev;
+	mlxsw_sp->fw_filename = MLXSW_SP2_FW_FILENAME;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -4964,7 +5122,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
 
-	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
 
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
@@ -4972,8 +5130,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
+	mlxsw_sp_port_module_info_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
-	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
+					  &mlxsw_sp->netdevice_nb);
 	if (mlxsw_sp->clock) {
 		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
 		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
@@ -5165,14 +5325,61 @@ static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core)
 					 &kvd_size_params);
 }
 
+static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct devlink_resource_size_params span_size_params;
+	u32 max_span;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SPAN))
+		return -EIO;
+
+	max_span = MLXSW_CORE_RES_GET(mlxsw_core, MAX_SPAN);
+	devlink_resource_size_params_init(&span_size_params, max_span, max_span,
+					  1, DEVLINK_RESOURCE_UNIT_ENTRY);
+
+	return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_SPAN,
+					 max_span, MLXSW_SP_RESOURCE_SPAN,
+					 DEVLINK_RESOURCE_ID_PARENT_TOP,
+					 &span_size_params);
+}
+
 static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
 {
-	return mlxsw_sp1_resources_kvd_register(mlxsw_core);
+	int err;
+
+	err = mlxsw_sp1_resources_kvd_register(mlxsw_core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_resources_span_register(mlxsw_core);
+	if (err)
+		goto err_resources_span_register;
+
+	return 0;
+
+err_resources_span_register:
+	devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+	return err;
 }
 
 static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
 {
-	return mlxsw_sp2_resources_kvd_register(mlxsw_core);
+	int err;
+
+	err = mlxsw_sp2_resources_kvd_register(mlxsw_core);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_resources_span_register(mlxsw_core);
+	if (err)
+		goto err_resources_span_register;
+
+	return 0;
+
+err_resources_span_register:
+	devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+	return err;
 }
 
 static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
@@ -6565,3 +6772,4 @@ MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
 MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
 MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table);
 MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
+MODULE_FIRMWARE(MLXSW_SP2_FW_FILENAME);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index b2a0028b1694..347bec9d1ecf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -14,6 +14,7 @@
 #include <linux/dcbnl.h>
 #include <linux/in6.h>
 #include <linux/notifier.h>
+#include <linux/net_namespace.h>
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
@@ -31,8 +32,6 @@
 
 #define MLXSW_SP_MID_MAX 7000
 
-#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
-
 #define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */
 #define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */
 
@@ -47,6 +46,8 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
+#define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents"
+
 enum mlxsw_sp_resource_id {
 	MLXSW_SP_RESOURCE_KVD = 1,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
@@ -55,6 +56,7 @@ enum mlxsw_sp_resource_id {
 	MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
 	MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+	MLXSW_SP_RESOURCE_SPAN,
 };
 
 struct mlxsw_sp_port;
@@ -139,6 +141,12 @@ struct mlxsw_sp_port_type_speed_ops;
 struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
 
+struct mlxsw_sp_port_mapping {
+	u8 module;
+	u8 width;
+	u8 lane;
+};
+
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
 	struct mlxsw_core *core;
@@ -146,7 +154,7 @@ struct mlxsw_sp {
 	unsigned char base_mac[ETH_ALEN];
 	const unsigned char *mac_mask;
 	struct mlxsw_sp_upper *lags;
-	int *port_to_module;
+	struct mlxsw_sp_port_mapping **port_mapping;
 	struct mlxsw_sp_sb *sb;
 	struct mlxsw_sp_bridge *bridge;
 	struct mlxsw_sp_router *router;
@@ -255,11 +263,11 @@ struct mlxsw_sp_port {
 		struct ieee_pfc *pfc;
 		enum mlxsw_reg_qpts_trust_state trust_state;
 	} dcb;
-	struct {
-		u8 module;
-		u8 width;
-		u8 lane;
-	} mapping;
+	struct mlxsw_sp_port_mapping mapping; /* mapping is constant during the
+					       * mlxsw_sp_port lifetime, however
+					       * the same localport can have
+					       * different mapping.
+					       */
 	/* TC handles */
 	struct list_head mall_tc_list;
 	struct {
@@ -283,6 +291,7 @@ struct mlxsw_sp_port {
 		u16 egr_types;
 		struct mlxsw_sp_ptp_port_stats stats;
 	} ptp;
+	u8 split_base_local_port;
 };
 
 struct mlxsw_sp_port_type_speed_ops {
@@ -524,7 +533,8 @@ union mlxsw_sp_l3addr {
 	struct in6_addr addr6;
 };
 
-int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
+			 struct netlink_ext_ack *extack);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
 					 unsigned long event, void *ptr);
@@ -982,4 +992,9 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core,
 int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
 			     const struct devlink_trap_group *group);
 
+static inline struct net *mlxsw_sp_net(struct mlxsw_sp *mlxsw_sp)
+{
+	return mlxsw_core_net(mlxsw_sp->core);
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index b9eeae37a4dc..968f0902e4fe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -35,6 +35,7 @@ struct mlxsw_sp_sb_cm {
 };
 
 #define MLXSW_SP_SB_INFI -1U
+#define MLXSW_SP_SB_REST -2U
 
 struct mlxsw_sp_sb_pm {
 	u32 min_buff;
@@ -421,19 +422,16 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
 		.freeze_size = _freeze_size,				\
 	}
 
-#define MLXSW_SP1_SB_PR_INGRESS_SIZE	12440000
-#define MLXSW_SP1_SB_PR_EGRESS_SIZE	13232000
 #define MLXSW_SP1_SB_PR_CPU_SIZE	(256 * 1000)
 
 /* Order according to mlxsw_sp1_sb_pool_dess */
 static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-		       MLXSW_SP1_SB_PR_INGRESS_SIZE),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
-	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
-			   MLXSW_SP1_SB_PR_EGRESS_SIZE, true, false),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST,
+			   true, false),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
@@ -445,19 +443,16 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = {
 			   MLXSW_SP1_SB_PR_CPU_SIZE, true, false),
 };
 
-#define MLXSW_SP2_SB_PR_INGRESS_SIZE	35297568
-#define MLXSW_SP2_SB_PR_EGRESS_SIZE	35297568
 #define MLXSW_SP2_SB_PR_CPU_SIZE	(256 * 1000)
 
 /* Order according to mlxsw_sp2_sb_pool_dess */
 static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
-	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
-		       MLXSW_SP2_SB_PR_INGRESS_SIZE),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
-	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC,
-			   MLXSW_SP2_SB_PR_EGRESS_SIZE, true, false),
+	MLXSW_SP_SB_PR_EXT(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_REST,
+			   true, false),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
@@ -471,11 +466,33 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
 
 static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
 				const struct mlxsw_sp_sb_pr *prs,
+				const struct mlxsw_sp_sb_pool_des *pool_dess,
 				size_t prs_len)
 {
+	/* Round down, unlike mlxsw_sp_bytes_cells(). */
+	u32 sb_cells = div_u64(mlxsw_sp->sb->sb_size, mlxsw_sp->sb->cell_size);
+	u32 rest_cells[2] = {sb_cells, sb_cells};
 	int i;
 	int err;
 
+	/* Calculate how much space to give to the "REST" pools in either
+	 * direction.
+	 */
+	for (i = 0; i < prs_len; i++) {
+		enum mlxsw_reg_sbxx_dir dir = pool_dess[i].dir;
+		u32 size = prs[i].size;
+		u32 size_cells;
+
+		if (size == MLXSW_SP_SB_INFI || size == MLXSW_SP_SB_REST)
+			continue;
+
+		size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+		if (WARN_ON_ONCE(size_cells > rest_cells[dir]))
+			continue;
+
+		rest_cells[dir] -= size_cells;
+	}
+
 	for (i = 0; i < prs_len; i++) {
 		u32 size = prs[i].size;
 		u32 size_cells;
@@ -483,6 +500,10 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
 		if (size == MLXSW_SP_SB_INFI) {
 			err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
 						   0, true);
+		} else if (size == MLXSW_SP_SB_REST) {
+			size_cells = rest_cells[pool_dess[i].dir];
+			err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+						   size_cells, false);
 		} else {
 			size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
 			err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
@@ -904,7 +925,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
 		return -EIO;
 
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, GUARANTEED_SHARED_BUFFER))
 		return -EIO;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE))
@@ -915,7 +936,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 		return -ENOMEM;
 	mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
 	mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-						   MAX_BUFFER_SIZE);
+						   GUARANTEED_SHARED_BUFFER);
 	max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
 					       MAX_HEADROOM_SIZE);
 	/* Round down, because this limit must not be overstepped. */
@@ -926,6 +947,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_sb_ports_init;
 	err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs,
+				   mlxsw_sp->sb_vals->pool_dess,
 				   mlxsw_sp->sb_vals->pool_count);
 	if (err)
 		goto err_sb_prs_init;
@@ -1013,7 +1035,8 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
 	mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
 	pr = &mlxsw_sp->sb_vals->prs[pool_index];
 
-	if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+	if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core,
+				      GUARANTEED_SHARED_BUFFER)) {
 		NL_SET_ERR_MSG_MOD(extack, "Exceeded shared buffer size");
 		return -EINVAL;
 	}
@@ -1021,12 +1044,12 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
 	if (pr->freeze_mode && pr->mode != mode) {
 		NL_SET_ERR_MSG_MOD(extack, "Changing this pool's threshold type is forbidden");
 		return -EINVAL;
-	};
+	}
 
 	if (pr->freeze_size && pr->size != size) {
 		NL_SET_ERR_MSG_MOD(extack, "Changing this pool's size is forbidden");
 		return -EINVAL;
-	};
+	}
 
 	return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
 				    pool_size, false);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
index 17f334b46c40..2153bcc4b585 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -870,7 +870,7 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
 		    mlxsw_sp_fid_vni(fid, &vni)))
 		goto out;
 
-	nve_dev = dev_get_by_index(&init_net, nve_ifindex);
+	nve_dev = dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex);
 	if (!nve_dev)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index bdf53cf350f6..68cc6737d45c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -305,7 +305,8 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
 			p->max);
 		return -EINVAL;
 	}
-	if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+	if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core,
+					GUARANTEED_SHARED_BUFFER)) {
 		dev_err(mlxsw_sp->bus_info->dev,
 			"spectrum: RED: max value %u is too big\n", p->max);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a330b369e899..1aa436054490 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -16,6 +16,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/refcount.h>
 #include <linux/jhash.h>
+#include <linux/net_namespace.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
@@ -76,6 +77,7 @@ struct mlxsw_sp_router {
 	struct notifier_block inet6addr_nb;
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
+	u32 adj_discard_index;
 };
 
 struct mlxsw_sp_rif {
@@ -366,6 +368,7 @@ enum mlxsw_sp_fib_entry_type {
 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
+	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
 
 	/* This is a special case of local delivery, where a packet should be
 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
@@ -2551,14 +2554,14 @@ static int mlxsw_sp_router_schedule_work(struct net *net,
 	struct mlxsw_sp_netevent_work *net_work;
 	struct mlxsw_sp_router *router;
 
-	if (!net_eq(net, &init_net))
+	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
+	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
 		return NOTIFY_DONE;
 
 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
 	if (!net_work)
 		return NOTIFY_BAD;
 
-	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
 	INIT_WORK(&net_work->work, cb);
 	net_work->mlxsw_sp = router->mlxsw_sp;
 	mlxsw_core_schedule_work(&net_work->work);
@@ -4195,15 +4198,31 @@ mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
 	}
 }
 
+static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
+{
+	u32 adj_discard_index = mlxsw_sp->router->adj_discard_index;
+	enum mlxsw_reg_ratr_trap_action trap_action;
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+
+	trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
+	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
+			    MLXSW_REG_RATR_TYPE_ETHERNET, adj_discard_index,
+			    rif_index);
+	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_fib_entry *fib_entry,
 					enum mlxsw_reg_ralue_op op)
 {
+	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
 	char ralue_pl[MLXSW_REG_RALUE_LEN];
 	enum mlxsw_reg_ralue_trap_action trap_action;
 	u16 trap_id = 0;
 	u32 adjacency_index = 0;
 	u16 ecmp_size = 0;
+	int err;
 
 	/* In case the nexthop group adjacency index is valid, use it
 	 * with provided ECMP size. Otherwise, setup trap and pass
@@ -4213,6 +4232,15 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
 		adjacency_index = fib_entry->nh_group->adj_index;
 		ecmp_size = fib_entry->nh_group->ecmp_size;
+	} else if (!nh_group->adj_index_valid && nh_group->count &&
+		   nh_group->nh_rif) {
+		err = mlxsw_sp_adj_discard_write(mlxsw_sp,
+						 nh_group->nh_rif->rif_index);
+		if (err)
+			return err;
+		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+		adjacency_index = mlxsw_sp->router->adj_discard_index;
+		ecmp_size = 1;
 	} else {
 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
@@ -4273,6 +4301,23 @@ static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
+mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry,
+				  enum mlxsw_reg_ralue_op op)
+{
+	enum mlxsw_reg_ralue_trap_action trap_action;
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	u16 trap_id;
+
+	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
+
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int
 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_fib_entry *fib_entry,
 				 enum mlxsw_reg_ralue_op op)
@@ -4313,6 +4358,9 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
+		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
+							 op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
 							fib_entry, op);
@@ -4390,7 +4438,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 		 * can do so with a lower priority than packets directed
 		 * at the host, so use action type local instead of trap.
 		 */
-		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
 		return 0;
 	case RTN_UNICAST:
 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
@@ -5350,7 +5398,7 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 	else if (rt->fib6_type == RTN_BLACKHOLE)
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
 	else if (rt->fib6_flags & RTF_REJECT)
-		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
 	else
@@ -6019,12 +6067,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
 		fib_info_put(fib_work->fen_info.fi);
 		break;
-	case FIB_EVENT_RULE_ADD:
-		/* if we get here, a rule was added that we do not support.
-		 * just do the fib_abort
-		 */
-		mlxsw_sp_router_fib_abort(mlxsw_sp);
-		break;
 	case FIB_EVENT_NH_ADD: /* fall through */
 	case FIB_EVENT_NH_DEL:
 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
@@ -6065,12 +6107,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 					 fib_work->fib6_work.nrt6);
 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
 		break;
-	case FIB_EVENT_RULE_ADD:
-		/* if we get here, a rule was added that we do not support.
-		 * just do the fib_abort
-		 */
-		mlxsw_sp_router_fib_abort(mlxsw_sp);
-		break;
 	}
 	rtnl_unlock();
 	kfree(fib_work);
@@ -6112,12 +6148,6 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
 					      &fib_work->ven_info);
 		dev_put(fib_work->ven_info.dev);
 		break;
-	case FIB_EVENT_RULE_ADD:
-		/* if we get here, a rule was added that we do not support.
-		 * just do the fib_abort
-		 */
-		mlxsw_sp_router_fib_abort(mlxsw_sp);
-		break;
 	}
 	rtnl_unlock();
 	kfree(fib_work);
@@ -6213,7 +6243,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 	rule = fr_info->rule;
 
 	/* Rule only affects locally generated traffic */
-	if (rule->iifindex == info->net->loopback_dev->ifindex)
+	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
 		return 0;
 
 	switch (info->family) {
@@ -6250,8 +6280,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	struct mlxsw_sp_router *router;
 	int err;
 
-	if (!net_eq(info->net, &init_net) ||
-	    (info->family != AF_INET && info->family != AF_INET6 &&
+	if ((info->family != AF_INET && info->family != AF_INET6 &&
 	     info->family != RTNL_FAMILY_IPMR &&
 	     info->family != RTNL_FAMILY_IP6MR))
 		return NOTIFY_DONE;
@@ -6263,9 +6292,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	case FIB_EVENT_RULE_DEL:
 		err = mlxsw_sp_router_fib_rule_event(event, info,
 						     router->mlxsw_sp);
-		if (!err || info->extack)
-			return notifier_from_errno(err);
-		break;
+		return notifier_from_errno(err);
 	case FIB_EVENT_ENTRY_ADD:
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_APPEND:  /* fall through */
@@ -7974,9 +8001,10 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
 }
 
-static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
+static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
-	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
+	struct net *net = mlxsw_sp_net(mlxsw_sp);
+	bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
 
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
@@ -7991,9 +8019,9 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
 }
 
-static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
+static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
-	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+	bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
 
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
@@ -8021,8 +8049,8 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 
 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
 	mlxsw_reg_recr2_pack(recr2_pl, seed);
-	mlxsw_sp_mp4_hash_init(recr2_pl);
-	mlxsw_sp_mp6_hash_init(recr2_pl);
+	mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
+	mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
 }
@@ -8053,7 +8081,8 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
 
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
-	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
+	struct net *net = mlxsw_sp_net(mlxsw_sp);
+	bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
 	u64 max_rifs;
 	int err;
@@ -8079,7 +8108,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
-int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_router *router;
 	int err;
@@ -8140,6 +8170,11 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_neigh_init;
 
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+				  &router->adj_discard_index);
+	if (err)
+		goto err_adj_discard_index_alloc;
+
 	mlxsw_sp->router->netevent_nb.notifier_call =
 		mlxsw_sp_router_netevent_event;
 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
@@ -8155,8 +8190,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 		goto err_dscp_init;
 
 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
-	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
-				    mlxsw_sp_router_fib_dump_flush);
+	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
+				    &mlxsw_sp->router->fib_nb,
+				    mlxsw_sp_router_fib_dump_flush, extack);
 	if (err)
 		goto err_register_fib_notifier;
 
@@ -8167,6 +8203,9 @@ err_dscp_init:
 err_mp_hash_init:
 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
 err_register_netevent_notifier:
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+			   router->adj_discard_index);
+err_adj_discard_index_alloc:
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
 	mlxsw_sp_vrs_fini(mlxsw_sp);
@@ -8195,8 +8234,11 @@ err_register_inetaddr_notifier:
 
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
+	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
+				&mlxsw_sp->router->fib_nb);
 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+			   mlxsw_sp->router->adj_discard_index);
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 	mlxsw_sp_vrs_fini(mlxsw_sp);
 	mlxsw_sp_mr_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 560a60e522f9..200d324e6d99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -14,8 +14,23 @@
 #include "spectrum_span.h"
 #include "spectrum_switchdev.h"
 
+static u64 mlxsw_sp_span_occ_get(void *priv)
+{
+	const struct mlxsw_sp *mlxsw_sp = priv;
+	u64 occ = 0;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		if (mlxsw_sp->span.entries[i].ref_count)
+			occ++;
+	}
+
+	return occ;
+}
+
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	int i;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
@@ -36,13 +51,19 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 		curr->id = i;
 	}
 
+	devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN,
+					  mlxsw_sp_span_occ_get, mlxsw_sp);
+
 	return 0;
 }
 
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	int i;
 
+	devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN);
+
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 5ecb45118400..a3af171c6358 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2591,7 +2591,7 @@ __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
-	dev = __dev_get_by_index(&init_net, nve_ifindex);
+	dev = __dev_get_by_index(mlxsw_sp_net(mlxsw_sp), nve_ifindex);
 	if (!dev)
 		return -EINVAL;
 	*nve_dev = dev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 7c03b661ae7e..e0d7c49ffae0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -13,16 +13,27 @@
 
 static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
 				      void *priv);
+static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
+					   void *trap_ctx);
 
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
 			     MLXSW_SP_TRAP_METADATA)
 
+#define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id)		      \
+	DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,			      \
+			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
+			     MLXSW_SP_TRAP_METADATA)
+
 #define MLXSW_SP_RXL_DISCARD(_id, _group_id)				      \
 	MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT,   \
 		  false, SP_##_group_id, DISCARD)
 
+#define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action)			      \
+	MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id,			      \
+		   _action, false, SP_##_group_id, DISCARD)
+
 static struct devlink_trap mlxsw_sp_traps_arr[] = {
 	MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS),
 	MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
@@ -30,6 +41,23 @@ static struct devlink_trap mlxsw_sp_traps_arr[] = {
 	MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
 	MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
 	MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
+	MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS),
 };
 
 static struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
@@ -40,6 +68,28 @@ static struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
 	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS),
 	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS),
 	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS),
+	MLXSW_SP_RXL_EXCEPTION(MTUERROR, ROUTER_EXP, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(TTLERROR, ROUTER_EXP, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(RPF, RPF, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, REMOTE_ROUTE, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, HOST_MISS, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, HOST_MISS, TRAP_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, REMOTE_ROUTE,
+			       TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, ROUTER_EXP,
+			       TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP,
+			       TRAP_EXCEPTION_TO_CPU),
 };
 
 /* Mapping between hardware trap and devlink trap. Multiple hardware traps can
@@ -54,6 +104,25 @@ static u16 mlxsw_sp_listener_devlink_map[] = {
 	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
 	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
 	DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE,
+	DEVLINK_TRAP_GENERIC_ID_NON_IP_PACKET,
+	DEVLINK_TRAP_GENERIC_ID_UC_DIP_MC_DMAC,
+	DEVLINK_TRAP_GENERIC_ID_DIP_LB,
+	DEVLINK_TRAP_GENERIC_ID_SIP_MC,
+	DEVLINK_TRAP_GENERIC_ID_SIP_LB,
+	DEVLINK_TRAP_GENERIC_ID_CORRUPTED_IP_HDR,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_SIP_BC,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_RESERVED_SCOPE,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE,
+	DEVLINK_TRAP_GENERIC_ID_MTU_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_TTL_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_RPF,
+	DEVLINK_TRAP_GENERIC_ID_REJECT_ROUTE,
+	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
+	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
+	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS,
 };
 
 static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
@@ -104,6 +173,30 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
 	consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
+					   void *trap_ctx)
+{
+	struct devlink_port *in_devlink_port;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp;
+	struct devlink *devlink;
+
+	mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+
+	if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port))
+		return;
+
+	devlink = priv_to_devlink(mlxsw_sp->core);
+	in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
+							   local_port);
+	skb_push(skb, ETH_HLEN);
+	devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port);
+	skb_pull(skb, ETH_HLEN);
+	skb->offload_fwd_mark = 1;
+	netif_receive_skb(skb);
+}
+
 int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
@@ -211,6 +304,7 @@ mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp,
 	u32 rate;
 
 	switch (group->id) {
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS:/* fall through */
 	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS:
 		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
 		ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
@@ -242,6 +336,12 @@ __mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp,
 		priority = 0;
 		tc = 1;
 		break;
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS:
+		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS;
+		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
+		priority = 0;
+		tc = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
index 0d9356b3f65d..4ff1e623aa76 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
@@ -446,7 +446,8 @@ static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 }
 
 static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
-			  const struct mlxsw_bus_info *mlxsw_bus_info)
+			  const struct mlxsw_bus_info *mlxsw_bus_info,
+			  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 1c14c051ee52..de6cb22f68b1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -992,6 +992,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
 	if (!dev)
 		return -ENOMEM;
 	SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
+	dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core));
 	mlxsw_sx_port = netdev_priv(dev);
 	mlxsw_sx_port->dev = dev;
 	mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
@@ -1563,7 +1564,8 @@ static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 }
 
 static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
-			 const struct mlxsw_bus_info *mlxsw_bus_info)
+			 const struct mlxsw_bus_info *mlxsw_bus_info,
+			 struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 7618f084cae9..0c1c142bb6b0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -49,6 +49,7 @@ enum {
 	MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
 	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_RTR_INGRESS1 = 0x71,
 	MLXSW_TRAP_ID_IPV6_PIM = 0x79,
 	MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
 	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
@@ -66,6 +67,8 @@ enum {
 	MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
+	MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
+	MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,
 	MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140,
 	MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VTAG_ALLOW = 0x148,
 	MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VLAN = 0x149,
@@ -73,6 +76,18 @@ enum {
 	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_UC = 0x150,
 	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_MC_NULL = 0x151,
 	MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_LB = 0x152,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_NON_IP_PACKET = 0x160,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B,
+	MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B,
+	MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C,
+	MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0,
+	MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE = 0x1B1,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
 	/* Multicast trap used for routes with trap action */
 	MLXSW_TRAP_ID_ACL1 = 0x1C1,
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index aac115136720..723724bdc139 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -364,12 +364,12 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 
 	for_each_available_child_of_node(ports, portnp) {
 		struct device_node *phy_node;
+		phy_interface_t phy_mode;
 		struct phy_device *phy;
 		struct resource *res;
 		struct phy *serdes;
 		void __iomem *regs;
 		char res_name[8];
-		int phy_mode;
 		u32 port;
 
 		if (of_property_read_u32(portnp, "reg", &port))
@@ -398,11 +398,11 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 			goto out_put_ports;
 		}
 
-		phy_mode = of_get_phy_mode(portnp);
-		if (phy_mode < 0)
-			ocelot->ports[port]->phy_mode = PHY_INTERFACE_MODE_NA;
-		else
-			ocelot->ports[port]->phy_mode = phy_mode;
+		err = of_get_phy_mode(portnp, &phy_mode);
+		if (err && err != -ENODEV)
+			goto out_put_ports;
+
+		ocelot->ports[port]->phy_mode = phy_mode;
 
 		switch (ocelot->ports[port]->phy_mode) {
 		case PHY_INTERFACE_MODE_NA:
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 5afcb3c4c2ef..c80bb83c8ac9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -3952,7 +3952,7 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
 {
 	struct nfp_insn_meta *meta1, *meta2;
-	const s32 exp_mask[] = {
+	static const s32 exp_mask[] = {
 		[BPF_B] = 0x000000ffU,
 		[BPF_H] = 0x0000ffffU,
 		[BPF_W] = 0xffffffffU,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 61aabffc8888..bcdcd6de7dea 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -872,7 +872,8 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 
 		/* jump forward, a TX may have gotten lost, need to sync TX */
 		if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
-			tls_offload_tx_resync_request(nskb->sk);
+			tls_offload_tx_resync_request(nskb->sk, seq,
+						      ntls->next_seq);
 
 		*nr_frags = 0;
 		return nskb;
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 2761f3a3ae50..49c7987c2abd 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1346,10 +1346,9 @@ static int nixge_probe(struct platform_device *pdev)
 		}
 	}
 
-	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-	if ((int)priv->phy_mode < 0) {
+	err = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
+	if (err) {
 		netdev_err(ndev, "not find \"phy-mode\" property\n");
-		err = -EINVAL;
 		goto unregister_mdio;
 	}
 
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 05d2b478c99b..6b54cb3b681d 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2225,6 +2225,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct nv_skb_map *prev_tx_ctx;
 	struct nv_skb_map *tmp_tx_ctx = NULL, *start_tx_ctx = NULL;
 	unsigned long flags;
+	netdev_tx_t ret = NETDEV_TX_OK;
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
@@ -2240,7 +2241,12 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 		np->tx_stop = 1;
 		spin_unlock_irqrestore(&np->lock, flags);
-		return NETDEV_TX_BUSY;
+
+		/* When normal packets and/or xmit_more packets fill up
+		 * tx_desc, it is necessary to trigger NIC tx reg.
+		 */
+		ret = NETDEV_TX_BUSY;
+		goto txkick;
 	}
 	spin_unlock_irqrestore(&np->lock, flags);
 
@@ -2259,7 +2265,10 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			u64_stats_update_begin(&np->swstats_tx_syncp);
 			nv_txrx_stats_inc(stat_tx_dropped);
 			u64_stats_update_end(&np->swstats_tx_syncp);
-			return NETDEV_TX_OK;
+
+			ret = NETDEV_TX_OK;
+
+			goto dma_error;
 		}
 		np->put_tx_ctx->dma_len = bcnt;
 		np->put_tx_ctx->dma_single = 1;
@@ -2305,7 +2314,10 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				u64_stats_update_begin(&np->swstats_tx_syncp);
 				nv_txrx_stats_inc(stat_tx_dropped);
 				u64_stats_update_end(&np->swstats_tx_syncp);
-				return NETDEV_TX_OK;
+
+				ret = NETDEV_TX_OK;
+
+				goto dma_error;
 			}
 
 			np->put_tx_ctx->dma_len = bcnt;
@@ -2357,8 +2369,15 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	spin_unlock_irqrestore(&np->lock, flags);
 
-	writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
-	return NETDEV_TX_OK;
+txkick:
+	if (netif_queue_stopped(dev) || !netdev_xmit_more()) {
+		u32 txrxctl_kick;
+dma_error:
+		txrxctl_kick = NVREG_TXRXCTL_KICK | np->txrxctl_bits;
+		writel(txrxctl_kick, get_hwbase(dev) + NvRegTxRxControl);
+	}
+
+	return ret;
 }
 
 static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
@@ -2381,6 +2400,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 	struct nv_skb_map *start_tx_ctx = NULL;
 	struct nv_skb_map *tmp_tx_ctx = NULL;
 	unsigned long flags;
+	netdev_tx_t ret = NETDEV_TX_OK;
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
@@ -2396,7 +2416,13 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 		netif_stop_queue(dev);
 		np->tx_stop = 1;
 		spin_unlock_irqrestore(&np->lock, flags);
-		return NETDEV_TX_BUSY;
+
+		/* When normal packets and/or xmit_more packets fill up
+		 * tx_desc, it is necessary to trigger NIC tx reg.
+		 */
+		ret = NETDEV_TX_BUSY;
+
+		goto txkick;
 	}
 	spin_unlock_irqrestore(&np->lock, flags);
 
@@ -2416,7 +2442,10 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 			u64_stats_update_begin(&np->swstats_tx_syncp);
 			nv_txrx_stats_inc(stat_tx_dropped);
 			u64_stats_update_end(&np->swstats_tx_syncp);
-			return NETDEV_TX_OK;
+
+			ret = NETDEV_TX_OK;
+
+			goto dma_error;
 		}
 		np->put_tx_ctx->dma_len = bcnt;
 		np->put_tx_ctx->dma_single = 1;
@@ -2463,7 +2492,10 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 				u64_stats_update_begin(&np->swstats_tx_syncp);
 				nv_txrx_stats_inc(stat_tx_dropped);
 				u64_stats_update_end(&np->swstats_tx_syncp);
-				return NETDEV_TX_OK;
+
+				ret = NETDEV_TX_OK;
+
+				goto dma_error;
 			}
 			np->put_tx_ctx->dma_len = bcnt;
 			np->put_tx_ctx->dma_single = 0;
@@ -2542,8 +2574,15 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 
 	spin_unlock_irqrestore(&np->lock, flags);
 
-	writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
-	return NETDEV_TX_OK;
+txkick:
+	if (netif_queue_stopped(dev) || !netdev_xmit_more()) {
+		u32 txrxctl_kick;
+dma_error:
+		txrxctl_kick = NVREG_TXRXCTL_KICK | np->txrxctl_bits;
+		writel(txrxctl_kick, get_hwbase(dev) + NvRegTxRxControl);
+	}
+
+	return ret;
 }
 
 static inline void nv_tx_flip_ownership(struct net_device *dev)
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 544012a67221..ebb81d6d4ca1 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -15,6 +15,7 @@
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
@@ -391,6 +392,7 @@ struct rx_status_t {
 struct netdata_local {
 	struct platform_device	*pdev;
 	struct net_device	*ndev;
+	struct device_node	*phy_node;
 	spinlock_t		lock;
 	void __iomem		*net_base;
 	u32			msg_enable;
@@ -749,22 +751,26 @@ static void lpc_handle_link_change(struct net_device *ndev)
 static int lpc_mii_probe(struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
-	struct phy_device *phydev = phy_find_first(pldat->mii_bus);
-
-	if (!phydev) {
-		netdev_err(ndev, "no PHY found\n");
-		return -ENODEV;
-	}
+	struct phy_device *phydev;
 
 	/* Attach to the PHY */
 	if (lpc_phy_interface_mode(&pldat->pdev->dev) == PHY_INTERFACE_MODE_MII)
 		netdev_info(ndev, "using MII interface\n");
 	else
 		netdev_info(ndev, "using RMII interface\n");
+
+	if (pldat->phy_node)
+		phydev =  of_phy_find_device(pldat->phy_node);
+	else
+		phydev = phy_find_first(pldat->mii_bus);
+	if (!phydev) {
+		netdev_err(ndev, "no PHY found\n");
+		return -ENODEV;
+	}
+
 	phydev = phy_connect(ndev, phydev_name(phydev),
 			     &lpc_handle_link_change,
 			     lpc_phy_interface_mode(&pldat->pdev->dev));
-
 	if (IS_ERR(phydev)) {
 		netdev_err(ndev, "Could not attach to PHY\n");
 		return PTR_ERR(phydev);
@@ -783,6 +789,7 @@ static int lpc_mii_probe(struct net_device *ndev)
 
 static int lpc_mii_init(struct netdata_local *pldat)
 {
+	struct device_node *node;
 	int err = -ENXIO;
 
 	pldat->mii_bus = mdiobus_alloc();
@@ -812,7 +819,10 @@ static int lpc_mii_init(struct netdata_local *pldat)
 
 	platform_set_drvdata(pldat->pdev, pldat->mii_bus);
 
-	if (mdiobus_register(pldat->mii_bus))
+	node = of_get_child_by_name(pldat->pdev->dev.of_node, "mdio");
+	err = of_mdiobus_register(pldat->mii_bus, node);
+	of_node_put(node);
+	if (err)
 		goto err_out_unregister_bus;
 
 	if (lpc_mii_probe(pldat->ndev) != 0)
@@ -1345,6 +1355,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	netdev_dbg(ndev, "DMA buffer V address :0x%p\n",
 			pldat->dma_buff_base_v);
 
+	pldat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+
 	/* Get MAC address from current HW setting (POR state is all zeros) */
 	__lpc_get_mac(pldat, ndev->dev_addr);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 7a7060677f15..98e102af7756 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -12,7 +12,7 @@ struct ionic_lif;
 
 #define IONIC_DRV_NAME		"ionic"
 #define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
-#define IONIC_DRV_VERSION	"0.15.0-k"
+#define IONIC_DRV_VERSION	"0.18.0-k"
 
 #define PCI_VENDOR_ID_PENSANDO			0x1dd8
 
@@ -46,6 +46,8 @@ struct ionic {
 	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
 	struct work_struct nb_work;
 	struct notifier_block nb;
+	struct timer_list watchdog_timer;
+	int watchdog_period;
 };
 
 struct ionic_admin_ctx {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index d168a6435322..5f9d2ec70446 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -11,6 +11,16 @@
 #include "ionic_dev.h"
 #include "ionic_lif.h"
 
+static void ionic_watchdog_cb(struct timer_list *t)
+{
+	struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
+
+	mod_timer(&ionic->watchdog_timer,
+		  round_jiffies(jiffies + ionic->watchdog_period));
+
+	ionic_heartbeat_check(ionic);
+}
+
 void ionic_init_devinfo(struct ionic *ionic)
 {
 	struct ionic_dev *idev = &ionic->idev;
@@ -72,6 +82,11 @@ int ionic_dev_setup(struct ionic *ionic)
 		return -EFAULT;
 	}
 
+	timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
+	ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+	mod_timer(&ionic->watchdog_timer,
+		  round_jiffies(jiffies + ionic->watchdog_period));
+
 	idev->db_pages = bar->vaddr;
 	idev->phy_db_pages = bar->bus_addr;
 
@@ -80,10 +95,53 @@ int ionic_dev_setup(struct ionic *ionic)
 
 void ionic_dev_teardown(struct ionic *ionic)
 {
-	/* place holder */
+	del_timer_sync(&ionic->watchdog_timer);
 }
 
 /* Devcmd Interface */
+int ionic_heartbeat_check(struct ionic *ionic)
+{
+	struct ionic_dev *idev = &ionic->idev;
+	unsigned long hb_time;
+	u32 fw_status;
+	u32 hb;
+
+	/* wait a little more than one second before testing again */
+	hb_time = jiffies;
+	if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
+		return 0;
+
+	/* firmware is useful only if fw_status is non-zero */
+	fw_status = ioread32(&idev->dev_info_regs->fw_status);
+	if (!fw_status)
+		return -ENXIO;
+
+	/* early FW has no heartbeat, else FW will return non-zero */
+	hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
+	if (!hb)
+		return 0;
+
+	/* are we stalled? */
+	if (hb == idev->last_hb) {
+		/* only complain once for each stall seen */
+		if (idev->last_hb_time != 1) {
+			dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
+				 idev->last_hb);
+			idev->last_hb_time = 1;
+		}
+
+		return -ENXIO;
+	}
+
+	if (idev->last_hb_time == 1)
+		dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
+
+	idev->last_hb = hb;
+	idev->last_hb_time = hb_time;
+
+	return 0;
+}
+
 u8 ionic_dev_cmd_status(struct ionic_dev *idev)
 {
 	return ioread8(&idev->dev_cmd_regs->comp.comp.status);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 9610aeb7d5f4..4665c5dc5324 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -16,6 +16,7 @@
 #define IONIC_MIN_TXRX_DESC		16
 #define IONIC_DEF_TXRX_DESC		4096
 #define IONIC_LIFS_MAX			1024
+#define IONIC_WATCHDOG_SECS		5
 #define IONIC_ITR_COAL_USEC_DEFAULT	64
 
 #define IONIC_DEV_CMD_REG_VERSION	1
@@ -123,6 +124,9 @@ struct ionic_dev {
 	union ionic_dev_info_regs __iomem *dev_info_regs;
 	union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
 
+	unsigned long last_hb_time;
+	u32 last_hb;
+
 	u64 __iomem *db_pages;
 	dma_addr_t phy_db_pages;
 
@@ -151,12 +155,19 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
+struct ionic_page_info {
+	struct page *page;
+	dma_addr_t dma_addr;
+};
+
 struct ionic_desc_info {
 	void *desc;
 	void *sg_desc;
 	struct ionic_desc_info *next;
 	unsigned int index;
 	unsigned int left;
+	unsigned int npages;
+	struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
@@ -295,5 +306,6 @@ void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
 void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 		     unsigned int stop_index);
+int ionic_heartbeat_check(struct ionic *ionic);
 
 #endif /* _IONIC_DEV_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
index af1647afa4e8..6fb27dcc5787 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -19,31 +19,30 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 
 	err = devlink_info_driver_name_put(req, IONIC_DRV_NAME);
 	if (err)
-		goto info_out;
+		return err;
 
 	err = devlink_info_version_running_put(req,
 					       DEVLINK_INFO_VERSION_GENERIC_FW,
 					       idev->dev_info.fw_version);
 	if (err)
-		goto info_out;
+		return err;
 
 	snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type);
 	err = devlink_info_version_fixed_put(req,
 					     DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
 					     buf);
 	if (err)
-		goto info_out;
+		return err;
 
 	snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev);
 	err = devlink_info_version_fixed_put(req,
 					     DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
 					     buf);
 	if (err)
-		goto info_out;
+		return err;
 
 	err = devlink_info_serial_number_put(req, idev->dev_info.serial_num);
 
-info_out:
 	return err;
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 7d10265f782a..f778fff034f5 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -254,12 +254,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
 	struct ionic_lif *lif = netdev_priv(netdev);
 	struct ionic *ionic = lif->ionic;
 	struct ionic_dev *idev;
-	u32 req_rs, req_fc;
-	u8 fec_type;
 	int err = 0;
 
 	idev = &lif->ionic->idev;
-	fec_type = IONIC_PORT_FEC_TYPE_NONE;
 
 	/* set autoneg */
 	if (ks->base.autoneg != idev->port_info->config.an_enable) {
@@ -281,29 +278,6 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
 			return err;
 	}
 
-	/* set FEC */
-	req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS);
-	req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER);
-	if (req_rs && req_fc) {
-		netdev_info(netdev, "Only select one FEC mode at a time\n");
-		return -EINVAL;
-	} else if (req_fc) {
-		fec_type = IONIC_PORT_FEC_TYPE_FC;
-	} else if (req_rs) {
-		fec_type = IONIC_PORT_FEC_TYPE_RS;
-	} else if (!(req_rs | req_fc)) {
-		fec_type = IONIC_PORT_FEC_TYPE_NONE;
-	}
-
-	if (fec_type != idev->port_info->config.fec_type) {
-		mutex_lock(&ionic->dev_cmd_lock);
-		ionic_dev_cmd_port_fec(idev, fec_type);
-		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
-		mutex_unlock(&ionic->dev_cmd_lock);
-		if (err)
-			return err;
-	}
-
 	return 0;
 }
 
@@ -353,6 +327,70 @@ static int ionic_set_pauseparam(struct net_device *netdev,
 	return 0;
 }
 
+static int ionic_get_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fec)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	switch (lif->ionic->idev.port_info->config.fec_type) {
+	case IONIC_PORT_FEC_TYPE_NONE:
+		fec->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	case IONIC_PORT_FEC_TYPE_RS:
+		fec->active_fec = ETHTOOL_FEC_RS;
+		break;
+	case IONIC_PORT_FEC_TYPE_FC:
+		fec->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	}
+
+	fec->fec = ETHTOOL_FEC_OFF | ETHTOOL_FEC_RS | ETHTOOL_FEC_BASER;
+
+	return 0;
+}
+
+static int ionic_set_fecparam(struct net_device *netdev,
+			      struct ethtool_fecparam *fec)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u8 fec_type;
+	int ret = 0;
+
+	if (lif->ionic->idev.port_info->config.an_enable) {
+		netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n");
+		return -EINVAL;
+	}
+
+	switch (fec->fec) {
+	case ETHTOOL_FEC_NONE:
+		fec_type = IONIC_PORT_FEC_TYPE_NONE;
+		break;
+	case ETHTOOL_FEC_OFF:
+		fec_type = IONIC_PORT_FEC_TYPE_NONE;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec_type = IONIC_PORT_FEC_TYPE_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec_type = IONIC_PORT_FEC_TYPE_FC;
+		break;
+	case ETHTOOL_FEC_AUTO:
+	default:
+		netdev_err(netdev, "FEC request 0x%04x not supported\n",
+			   fec->fec);
+		return -EINVAL;
+	}
+
+	if (fec_type != lif->ionic->idev.port_info->config.fec_type) {
+		mutex_lock(&lif->ionic->dev_cmd_lock);
+		ionic_dev_cmd_port_fec(&lif->ionic->idev, fec_type);
+		ret = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+		mutex_unlock(&lif->ionic->dev_cmd_lock);
+	}
+
+	return ret;
+}
+
 static int ionic_get_coalesce(struct net_device *netdev,
 			      struct ethtool_coalesce *coalesce)
 {
@@ -372,7 +410,6 @@ static int ionic_set_coalesce(struct net_device *netdev,
 	struct ionic_identity *ident;
 	struct ionic_qcq *qcq;
 	unsigned int i;
-	u32 usecs;
 	u32 coal;
 
 	if (coalesce->rx_max_coalesced_frames ||
@@ -410,26 +447,27 @@ static int ionic_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	/* Convert the usec request to a HW useable value.  If they asked
+	 * for non-zero and it resolved to zero, bump it up
+	 */
 	coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs);
-
-	if (coal > IONIC_INTR_CTRL_COAL_MAX)
-		return -ERANGE;
-
-	/* If they asked for non-zero and it resolved to zero, bump it up */
 	if (!coal && coalesce->rx_coalesce_usecs)
 		coal = 1;
 
-	/* Convert it back to get device resolution */
-	usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+	if (coal > IONIC_INTR_CTRL_COAL_MAX)
+		return -ERANGE;
 
-	if (usecs != lif->rx_coalesce_usecs) {
-		lif->rx_coalesce_usecs = usecs;
+	/* Save the new value */
+	lif->rx_coalesce_usecs = coalesce->rx_coalesce_usecs;
+	if (coal != lif->rx_coalesce_hw) {
+		lif->rx_coalesce_hw = coal;
 
 		if (test_bit(IONIC_LIF_UP, lif->state)) {
 			for (i = 0; i < lif->nxqs; i++) {
 				qcq = lif->rxqcqs[i].qcq;
 				ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-						     qcq->intr.index, coal);
+						     qcq->intr.index,
+						     lif->rx_coalesce_hw);
 			}
 		}
 	}
@@ -453,6 +491,7 @@ static int ionic_set_ringparam(struct net_device *netdev,
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	bool running;
+	int err;
 
 	if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
 		netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
@@ -470,8 +509,9 @@ static int ionic_set_ringparam(struct net_device *netdev,
 	    ring->rx_pending == lif->nrxq_descs)
 		return 0;
 
-	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-		return -EBUSY;
+	err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+	if (err)
+		return err;
 
 	running = test_bit(IONIC_LIF_UP, lif->state);
 	if (running)
@@ -504,6 +544,7 @@ static int ionic_set_channels(struct net_device *netdev,
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	bool running;
+	int err;
 
 	if (!ch->combined_count || ch->other_count ||
 	    ch->rx_count || ch->tx_count)
@@ -512,8 +553,9 @@ static int ionic_set_channels(struct net_device *netdev,
 	if (ch->combined_count == lif->nxqs)
 		return 0;
 
-	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-		return -EBUSY;
+	err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+	if (err)
+		return err;
 
 	running = test_bit(IONIC_LIF_UP, lif->state);
 	if (running)
@@ -747,6 +789,7 @@ static const struct ethtool_ops ionic_ethtool_ops = {
 	.get_regs		= ionic_get_regs,
 	.get_link		= ethtool_op_get_link,
 	.get_link_ksettings	= ionic_get_link_ksettings,
+	.set_link_ksettings	= ionic_set_link_ksettings,
 	.get_coalesce		= ionic_get_coalesce,
 	.set_coalesce		= ionic_set_coalesce,
 	.get_ringparam		= ionic_get_ringparam,
@@ -769,7 +812,8 @@ static const struct ethtool_ops ionic_ethtool_ops = {
 	.get_module_eeprom	= ionic_get_module_eeprom,
 	.get_pauseparam		= ionic_get_pauseparam,
 	.set_pauseparam		= ionic_set_pauseparam,
-	.set_link_ksettings	= ionic_set_link_ksettings,
+	.get_fecparam		= ionic_get_fecparam,
+	.set_fecparam		= ionic_set_fecparam,
 	.nway_reset		= ionic_nway_reset,
 };
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 5bfdda19f64d..dbdb7c5ae8f1 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -111,7 +111,7 @@ struct ionic_admin_cmd {
 };
 
 /**
- * struct admin_comp - General admin command completion format
+ * struct ionic_admin_comp - General admin command completion format
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -134,7 +134,7 @@ static inline u8 color_match(u8 color, u8 done_color)
 }
 
 /**
- * struct nop_cmd - NOP command
+ * struct ionic_nop_cmd - NOP command
  * @opcode: opcode
  */
 struct ionic_nop_cmd {
@@ -143,7 +143,7 @@ struct ionic_nop_cmd {
 };
 
 /**
- * struct nop_comp - NOP command completion
+ * struct ionic_nop_comp - NOP command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_nop_comp {
@@ -152,7 +152,7 @@ struct ionic_nop_comp {
 };
 
 /**
- * struct dev_init_cmd - Device init command
+ * struct ionic_dev_init_cmd - Device init command
  * @opcode:    opcode
  * @type:      device type
  */
@@ -172,7 +172,7 @@ struct ionic_dev_init_comp {
 };
 
 /**
- * struct dev_reset_cmd - Device reset command
+ * struct ionic_dev_reset_cmd - Device reset command
  * @opcode: opcode
  */
 struct ionic_dev_reset_cmd {
@@ -192,7 +192,7 @@ struct ionic_dev_reset_comp {
 #define IONIC_IDENTITY_VERSION_1	1
 
 /**
- * struct dev_identify_cmd - Driver/device identify command
+ * struct ionic_dev_identify_cmd - Driver/device identify command
  * @opcode:  opcode
  * @ver:     Highest version of identify supported by driver
  */
@@ -284,7 +284,7 @@ enum ionic_lif_type {
 };
 
 /**
- * struct lif_identify_cmd - lif identify command
+ * struct ionic_lif_identify_cmd - lif identify command
  * @opcode:  opcode
  * @type:    lif type (enum lif_type)
  * @ver:     version of identify returned by device
@@ -297,7 +297,7 @@ struct ionic_lif_identify_cmd {
 };
 
 /**
- * struct lif_identify_comp - lif identify command completion
+ * struct ionic_lif_identify_comp - lif identify command completion
  * @status:  status of the command (enum status_code)
  * @ver:     version of identify returned by device
  */
@@ -325,7 +325,7 @@ enum ionic_logical_qtype {
 };
 
 /**
- * struct lif_logical_qtype - Descriptor of logical to hardware queue type.
+ * struct ionic_lif_logical_qtype - Descriptor of logical to hardware queue type.
  * @qtype:          Hardware Queue Type.
  * @qid_count:      Number of Queue IDs of the logical type.
  * @qid_base:       Minimum Queue ID of the logical type.
@@ -349,7 +349,7 @@ enum ionic_lif_state {
  * @name:           lif name
  * @mtu:            mtu
  * @mac:            station mac address
- * @features:       features (enum eth_hw_features)
+ * @features:       features (enum ionic_eth_hw_features)
  * @queue_count:    queue counts per queue-type
  */
 union ionic_lif_config {
@@ -367,7 +367,7 @@ union ionic_lif_config {
 };
 
 /**
- * struct lif_identity - lif identity information (type-specific)
+ * struct ionic_lif_identity - lif identity information (type-specific)
  *
  * @capabilities    LIF capabilities
  *
@@ -441,11 +441,11 @@ union ionic_lif_identity {
 };
 
 /**
- * struct lif_init_cmd - LIF init command
+ * struct ionic_lif_init_cmd - LIF init command
  * @opcode:       opcode
  * @type:         LIF type (enum lif_type)
  * @index:        LIF index
- * @info_pa:      destination address for lif info (struct lif_info)
+ * @info_pa:      destination address for lif info (struct ionic_lif_info)
  */
 struct ionic_lif_init_cmd {
 	u8     opcode;
@@ -457,7 +457,7 @@ struct ionic_lif_init_cmd {
 };
 
 /**
- * struct lif_init_comp - LIF init command completion
+ * struct ionic_lif_init_comp - LIF init command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_lif_init_comp {
@@ -468,7 +468,7 @@ struct ionic_lif_init_comp {
 };
 
 /**
- * struct q_init_cmd - Queue init command
+ * struct ionic_q_init_cmd - Queue init command
  * @opcode:       opcode
  * @type:         Logical queue type
  * @ver:          Queue version (defines opcode/descriptor scope)
@@ -525,7 +525,7 @@ struct ionic_q_init_cmd {
 };
 
 /**
- * struct q_init_comp - Queue init command completion
+ * struct ionic_q_init_comp - Queue init command completion
  * @status:     The status of the command (enum status_code)
  * @ver:        Queue version (defines opcode/descriptor scope)
  * @comp_index: The index in the descriptor ring for which this
@@ -556,7 +556,7 @@ enum ionic_txq_desc_opcode {
 };
 
 /**
- * struct txq_desc - Ethernet Tx queue descriptor format
+ * struct ionic_txq_desc - Ethernet Tx queue descriptor format
  * @opcode:       Tx operation, see TXQ_DESC_OPCODE_*:
  *
  *                   IONIC_TXQ_DESC_OPCODE_CSUM_NONE:
@@ -735,7 +735,7 @@ static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags,
 #define IONIC_RX_MAX_SG_ELEMS	8
 
 /**
- * struct txq_sg_desc - Transmit scatter-gather (SG) list
+ * struct ionic_txq_sg_desc - Transmit scatter-gather (SG) list
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
@@ -748,7 +748,7 @@ struct ionic_txq_sg_desc {
 };
 
 /**
- * struct txq_comp - Ethernet transmit queue completion descriptor
+ * struct ionic_txq_comp - Ethernet transmit queue completion descriptor
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *                 is the completion.
@@ -768,7 +768,7 @@ enum ionic_rxq_desc_opcode {
 };
 
 /**
- * struct rxq_desc - Ethernet Rx queue descriptor format
+ * struct ionic_rxq_desc - Ethernet Rx queue descriptor format
  * @opcode:       Rx operation, see RXQ_DESC_OPCODE_*:
  *
  *                   RXQ_DESC_OPCODE_SIMPLE:
@@ -789,7 +789,7 @@ struct ionic_rxq_desc {
 };
 
 /**
- * struct rxq_sg_desc - Receive scatter-gather (SG) list
+ * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) list
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
@@ -802,7 +802,7 @@ struct ionic_rxq_sg_desc {
 };
 
 /**
- * struct rxq_comp - Ethernet receive queue completion descriptor
+ * struct ionic_rxq_comp - Ethernet receive queue completion descriptor
  * @status:       The status of the command (enum status_code)
  * @num_sg_elems: Number of SG elements used by this descriptor
  * @comp_index:   The index in the descriptor ring for which this
@@ -896,7 +896,7 @@ enum ionic_eth_hw_features {
 };
 
 /**
- * struct q_control_cmd - Queue control command
+ * struct ionic_q_control_cmd - Queue control command
  * @opcode:     opcode
  * @type:       Queue type
  * @lif_index:  LIF index
@@ -1033,8 +1033,8 @@ enum ionic_port_loopback_mode {
 
 /**
  * Transceiver Status information
- * @state:    Transceiver status (enum xcvr_state)
- * @phy:      Physical connection type (enum phy_type)
+ * @state:    Transceiver status (enum ionic_xcvr_state)
+ * @phy:      Physical connection type (enum ionic_phy_type)
  * @pid:      Transceiver link mode (enum pid)
  * @sprom:    Transceiver sprom contents
  */
@@ -1051,9 +1051,9 @@ struct ionic_xcvr_status {
  * @mtu:                mtu
  * @state:              port admin state (enum port_admin_state)
  * @an_enable:          autoneg enable
- * @fec_type:           fec type (enum port_fec_type)
- * @pause_type:         pause type (enum port_pause_type)
- * @loopback_mode:      loopback mode (enum port_loopback_mode)
+ * @fec_type:           fec type (enum ionic_port_fec_type)
+ * @pause_type:         pause type (enum ionic_port_pause_type)
+ * @loopback_mode:      loopback mode (enum ionic_port_loopback_mode)
  */
 union ionic_port_config {
 	struct {
@@ -1080,7 +1080,7 @@ union ionic_port_config {
 
 /**
  * Port Status information
- * @status:             link status (enum port_oper_status)
+ * @status:             link status (enum ionic_port_oper_status)
  * @id:                 port id
  * @speed:              link speed (in Mbps)
  * @xcvr:               tranceiver status
@@ -1094,7 +1094,7 @@ struct ionic_port_status {
 };
 
 /**
- * struct port_identify_cmd - Port identify command
+ * struct ionic_port_identify_cmd - Port identify command
  * @opcode:     opcode
  * @index:      port index
  * @ver:        Highest version of identify supported by driver
@@ -1107,7 +1107,7 @@ struct ionic_port_identify_cmd {
 };
 
 /**
- * struct port_identify_comp - Port identify command completion
+ * struct ionic_port_identify_comp - Port identify command completion
  * @status: The status of the command (enum status_code)
  * @ver:    Version of identify returned by device
  */
@@ -1118,10 +1118,10 @@ struct ionic_port_identify_comp {
 };
 
 /**
- * struct port_init_cmd - Port initialization command
+ * struct ionic_port_init_cmd - Port initialization command
  * @opcode:     opcode
  * @index:      port index
- * @info_pa:    destination address for port info (struct port_info)
+ * @info_pa:    destination address for port info (struct ionic_port_info)
  */
 struct ionic_port_init_cmd {
 	u8     opcode;
@@ -1132,7 +1132,7 @@ struct ionic_port_init_cmd {
 };
 
 /**
- * struct port_init_comp - Port initialization command completion
+ * struct ionic_port_init_comp - Port initialization command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_port_init_comp {
@@ -1141,7 +1141,7 @@ struct ionic_port_init_comp {
 };
 
 /**
- * struct port_reset_cmd - Port reset command
+ * struct ionic_port_reset_cmd - Port reset command
  * @opcode:     opcode
  * @index:      port index
  */
@@ -1152,7 +1152,7 @@ struct ionic_port_reset_cmd {
 };
 
 /**
- * struct port_reset_comp - Port reset command completion
+ * struct ionic_port_reset_comp - Port reset command completion
  * @status: The status of the command (enum status_code)
  */
 struct ionic_port_reset_comp {
@@ -1183,7 +1183,7 @@ enum ionic_port_attr {
 };
 
 /**
- * struct port_setattr_cmd - Set port attributes on the NIC
+ * struct ionic_port_setattr_cmd - Set port attributes on the NIC
  * @opcode:     Opcode
  * @index:      port index
  * @attr:       Attribute type (enum ionic_port_attr)
@@ -1207,7 +1207,7 @@ struct ionic_port_setattr_cmd {
 };
 
 /**
- * struct port_setattr_comp - Port set attr command completion
+ * struct ionic_port_setattr_comp - Port set attr command completion
  * @status:     The status of the command (enum status_code)
  * @color:      Color bit
  */
@@ -1218,7 +1218,7 @@ struct ionic_port_setattr_comp {
 };
 
 /**
- * struct port_getattr_cmd - Get port attributes from the NIC
+ * struct ionic_port_getattr_cmd - Get port attributes from the NIC
  * @opcode:     Opcode
  * @index:      port index
  * @attr:       Attribute type (enum ionic_port_attr)
@@ -1231,7 +1231,7 @@ struct ionic_port_getattr_cmd {
 };
 
 /**
- * struct port_getattr_comp - Port get attr command completion
+ * struct ionic_port_getattr_comp - Port get attr command completion
  * @status:     The status of the command (enum status_code)
  * @color:      Color bit
  */
@@ -1252,10 +1252,10 @@ struct ionic_port_getattr_comp {
 };
 
 /**
- * struct lif_status - Lif status register
+ * struct ionic_lif_status - Lif status register
  * @eid:             most recent NotifyQ event id
  * @port_num:        port the lif is connected to
- * @link_status:     port status (enum port_oper_status)
+ * @link_status:     port status (enum ionic_port_oper_status)
  * @link_speed:      speed of link in Mbps
  * @link_down_count: number of times link status changes
  */
@@ -1270,7 +1270,7 @@ struct ionic_lif_status {
 };
 
 /**
- * struct lif_reset_cmd - LIF reset command
+ * struct ionic_lif_reset_cmd - LIF reset command
  * @opcode:    opcode
  * @index:     LIF index
  */
@@ -1290,7 +1290,7 @@ enum ionic_dev_state {
 };
 
 /**
- * enum dev_attr - List of device attributes
+ * enum ionic_dev_attr - List of device attributes
  */
 enum ionic_dev_attr {
 	IONIC_DEV_ATTR_STATE    = 0,
@@ -1299,10 +1299,10 @@ enum ionic_dev_attr {
 };
 
 /**
- * struct dev_setattr_cmd - Set Device attributes on the NIC
+ * struct ionic_dev_setattr_cmd - Set Device attributes on the NIC
  * @opcode:     Opcode
- * @attr:       Attribute type (enum dev_attr)
- * @state:      Device state (enum dev_state)
+ * @attr:       Attribute type (enum ionic_dev_attr)
+ * @state:      Device state (enum ionic_dev_state)
  * @name:       The bus info, e.g. PCI slot-device-function, 0 terminated
  * @features:   Device features
  */
@@ -1319,7 +1319,7 @@ struct ionic_dev_setattr_cmd {
 };
 
 /**
- * struct dev_setattr_comp - Device set attr command completion
+ * struct ionic_dev_setattr_comp - Device set attr command completion
  * @status:     The status of the command (enum status_code)
  * @features:   Device features
  * @color:      Color bit
@@ -1335,9 +1335,9 @@ struct ionic_dev_setattr_comp {
 };
 
 /**
- * struct dev_getattr_cmd - Get Device attributes from the NIC
+ * struct ionic_dev_getattr_cmd - Get Device attributes from the NIC
  * @opcode:     opcode
- * @attr:       Attribute type (enum dev_attr)
+ * @attr:       Attribute type (enum ionic_dev_attr)
  */
 struct ionic_dev_getattr_cmd {
 	u8     opcode;
@@ -1346,7 +1346,7 @@ struct ionic_dev_getattr_cmd {
 };
 
 /**
- * struct dev_setattr_comp - Device set attr command completion
+ * struct ionic_dev_setattr_comp - Device set attr command completion
  * @status:     The status of the command (enum status_code)
  * @features:   Device features
  * @color:      Color bit
@@ -1376,7 +1376,7 @@ enum ionic_rss_hash_types {
 };
 
 /**
- * enum lif_attr - List of LIF attributes
+ * enum ionic_lif_attr - List of LIF attributes
  */
 enum ionic_lif_attr {
 	IONIC_LIF_ATTR_STATE        = 0,
@@ -1389,15 +1389,15 @@ enum ionic_lif_attr {
 };
 
 /**
- * struct lif_setattr_cmd - Set LIF attributes on the NIC
+ * struct ionic_lif_setattr_cmd - Set LIF attributes on the NIC
  * @opcode:     Opcode
- * @type:       Attribute type (enum lif_attr)
+ * @type:       Attribute type (enum ionic_lif_attr)
  * @index:      LIF index
  * @state:      lif state (enum lif_state)
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
- * @features:   Features (enum eth_hw_features)
+ * @features:   Features (enum ionic_eth_hw_features)
  * @rss:        RSS properties
  *              @types:     The hash types to enable (see rss_hash_types).
  *              @key:       The hash secret key.
@@ -1426,11 +1426,11 @@ struct ionic_lif_setattr_cmd {
 };
 
 /**
- * struct lif_setattr_comp - LIF set attr command completion
+ * struct ionic_lif_setattr_comp - LIF set attr command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
- * @features:   features (enum eth_hw_features)
+ * @features:   features (enum ionic_eth_hw_features)
  * @color:      Color bit
  */
 struct ionic_lif_setattr_comp {
@@ -1445,9 +1445,9 @@ struct ionic_lif_setattr_comp {
 };
 
 /**
- * struct lif_getattr_cmd - Get LIF attributes from the NIC
+ * struct ionic_lif_getattr_cmd - Get LIF attributes from the NIC
  * @opcode:     Opcode
- * @attr:       Attribute type (enum lif_attr)
+ * @attr:       Attribute type (enum ionic_lif_attr)
  * @index:      LIF index
  */
 struct ionic_lif_getattr_cmd {
@@ -1458,7 +1458,7 @@ struct ionic_lif_getattr_cmd {
 };
 
 /**
- * struct lif_getattr_comp - LIF get attr command completion
+ * struct ionic_lif_getattr_comp - LIF get attr command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -1466,7 +1466,7 @@ struct ionic_lif_getattr_cmd {
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
- * @features:   Features (enum eth_hw_features)
+ * @features:   Features (enum ionic_eth_hw_features)
  * @color:      Color bit
  */
 struct ionic_lif_getattr_comp {
@@ -1492,7 +1492,7 @@ enum ionic_rx_mode {
 };
 
 /**
- * struct rx_mode_set_cmd - Set LIF's Rx mode command
+ * struct ionic_rx_mode_set_cmd - Set LIF's Rx mode command
  * @opcode:     opcode
  * @lif_index:  LIF index
  * @rx_mode:    Rx mode flags:
@@ -1519,7 +1519,7 @@ enum ionic_rx_filter_match_type {
 };
 
 /**
- * struct rx_filter_add_cmd - Add LIF Rx filter command
+ * struct ionic_rx_filter_add_cmd - Add LIF Rx filter command
  * @opcode:     opcode
  * @qtype:      Queue type
  * @lif_index:  LIF index
@@ -1550,7 +1550,7 @@ struct ionic_rx_filter_add_cmd {
 };
 
 /**
- * struct rx_filter_add_comp - Add LIF Rx filter command completion
+ * struct ionic_rx_filter_add_comp - Add LIF Rx filter command completion
  * @status:     The status of the command (enum status_code)
  * @comp_index: The index in the descriptor ring for which this
  *              is the completion.
@@ -1567,7 +1567,7 @@ struct ionic_rx_filter_add_comp {
 };
 
 /**
- * struct rx_filter_del_cmd - Delete LIF Rx filter command
+ * struct ionic_rx_filter_del_cmd - Delete LIF Rx filter command
  * @opcode:     opcode
  * @lif_index:  LIF index
  * @filter_id:  Filter ID
@@ -1583,7 +1583,7 @@ struct ionic_rx_filter_del_cmd {
 typedef struct ionic_admin_comp ionic_rx_filter_del_comp;
 
 /**
- * struct qos_identify_cmd - QoS identify command
+ * struct ionic_qos_identify_cmd - QoS identify command
  * @opcode:    opcode
  * @ver:     Highest version of identify supported by driver
  *
@@ -1595,7 +1595,7 @@ struct ionic_qos_identify_cmd {
 };
 
 /**
- * struct qos_identify_comp - QoS identify command completion
+ * struct ionic_qos_identify_comp - QoS identify command completion
  * @status: The status of the command (enum status_code)
  * @ver:    Version of identify returned by device
  */
@@ -1610,7 +1610,7 @@ struct ionic_qos_identify_comp {
 #define IONIC_QOS_DSCP_MAX_VALUES	64
 
 /**
- * enum qos_class
+ * enum ionic_qos_class
  */
 enum ionic_qos_class {
 	IONIC_QOS_CLASS_DEFAULT		= 0,
@@ -1623,7 +1623,7 @@ enum ionic_qos_class {
 };
 
 /**
- * enum qos_class_type - Traffic classification criteria
+ * enum ionic_qos_class_type - Traffic classification criteria
  */
 enum ionic_qos_class_type {
 	IONIC_QOS_CLASS_TYPE_NONE	= 0,
@@ -1632,7 +1632,7 @@ enum ionic_qos_class_type {
 };
 
 /**
- * enum qos_sched_type - Qos class scheduling type
+ * enum ionic_qos_sched_type - Qos class scheduling type
  */
 enum ionic_qos_sched_type {
 	IONIC_QOS_SCHED_TYPE_STRICT	= 0,	/* Strict priority */
@@ -1640,15 +1640,15 @@ enum ionic_qos_sched_type {
 };
 
 /**
- * union qos_config - Qos configuration structure
+ * union ionic_qos_config - Qos configuration structure
  * @flags:		Configuration flags
  *	IONIC_QOS_CONFIG_F_ENABLE		enable
  *	IONIC_QOS_CONFIG_F_DROP			drop/nodrop
  *	IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP		enable dot1q pcp rewrite
  *	IONIC_QOS_CONFIG_F_RW_IP_DSCP		enable ip dscp rewrite
- * @sched_type:		Qos class scheduling type (enum qos_sched_type)
- * @class_type:		Qos class type (enum qos_class_type)
- * @pause_type:		Qos pause type (enum qos_pause_type)
+ * @sched_type:		Qos class scheduling type (enum ionic_qos_sched_type)
+ * @class_type:		Qos class type (enum ionic_qos_class_type)
+ * @pause_type:		Qos pause type (enum ionic_qos_pause_type)
  * @name:		Qos class name
  * @mtu:		MTU of the class
  * @pfc_dot1q_pcp:	Pcp value for pause frames (valid iff F_NODROP)
@@ -1697,7 +1697,7 @@ union ionic_qos_config {
 };
 
 /**
- * union qos_identity - QoS identity structure
+ * union ionic_qos_identity - QoS identity structure
  * @version:	Version of the identify structure
  * @type:	QoS system type
  * @nclasses:	Number of usable QoS classes
@@ -1730,7 +1730,7 @@ struct ionic_qos_init_cmd {
 typedef struct ionic_admin_comp ionic_qos_init_comp;
 
 /**
- * struct qos_reset_cmd - Qos config reset command
+ * struct ionic_qos_reset_cmd - Qos config reset command
  * @opcode:	Opcode
  */
 struct ionic_qos_reset_cmd {
@@ -1742,7 +1742,7 @@ struct ionic_qos_reset_cmd {
 typedef struct ionic_admin_comp ionic_qos_reset_comp;
 
 /**
- * struct fw_download_cmd - Firmware download command
+ * struct ionic_fw_download_cmd - Firmware download command
  * @opcode:	opcode
  * @addr:	dma address of the firmware buffer
  * @offset:	offset of the firmware buffer within the full image
@@ -1765,9 +1765,9 @@ enum ionic_fw_control_oper {
 };
 
 /**
- * struct fw_control_cmd - Firmware control command
+ * struct ionic_fw_control_cmd - Firmware control command
  * @opcode:    opcode
- * @oper:      firmware control operation (enum fw_control_oper)
+ * @oper:      firmware control operation (enum ionic_fw_control_oper)
  * @slot:      slot to activate
  */
 struct ionic_fw_control_cmd {
@@ -1779,7 +1779,7 @@ struct ionic_fw_control_cmd {
 };
 
 /**
- * struct fw_control_comp - Firmware control copletion
+ * struct ionic_fw_control_comp - Firmware control copletion
  * @opcode:    opcode
  * @slot:      slot where the firmware was installed
  */
@@ -1797,13 +1797,13 @@ struct ionic_fw_control_comp {
  ******************************************************************/
 
 /**
- * struct rdma_reset_cmd - Reset RDMA LIF cmd
+ * struct ionic_rdma_reset_cmd - Reset RDMA LIF cmd
  * @opcode:        opcode
  * @lif_index:     lif index
  *
  * There is no rdma specific dev command completion struct.  Completion uses
- * the common struct admin_comp.  Only the status is indicated.  Nonzero status
- * means the LIF does not support rdma.
+ * the common struct ionic_admin_comp.  Only the status is indicated.
+ * Nonzero status means the LIF does not support rdma.
  **/
 struct ionic_rdma_reset_cmd {
 	u8     opcode;
@@ -1813,7 +1813,7 @@ struct ionic_rdma_reset_cmd {
 };
 
 /**
- * struct rdma_queue_cmd - Create RDMA Queue command
+ * struct ionic_rdma_queue_cmd - Create RDMA Queue command
  * @opcode:        opcode, 52, 53
  * @lif_index      lif index
  * @qid_ver:       (qid | (rdma version << 24))
@@ -1839,7 +1839,7 @@ struct ionic_rdma_reset_cmd {
  * memory registration.
  *
  * There is no rdma specific dev command completion struct.  Completion uses
- * the common struct admin_comp.  Only the status is indicated.
+ * the common struct ionic_admin_comp.  Only the status is indicated.
  **/
 struct ionic_rdma_queue_cmd {
 	u8     opcode;
@@ -1860,7 +1860,7 @@ struct ionic_rdma_queue_cmd {
  ******************************************************************/
 
 /**
- * struct notifyq_event
+ * struct ionic_notifyq_event
  * @eid:   event number
  * @ecode: event code
  * @data:  unspecified data about the event
@@ -1875,7 +1875,7 @@ struct ionic_notifyq_event {
 };
 
 /**
- * struct link_change_event
+ * struct ionic_link_change_event
  * @eid:		event number
  * @ecode:		event code = EVENT_OPCODE_LINK_CHANGE
  * @link_status:	link up or down, with error bits (enum port_status)
@@ -1892,7 +1892,7 @@ struct ionic_link_change_event {
 };
 
 /**
- * struct reset_event
+ * struct ionic_reset_event
  * @eid:		event number
  * @ecode:		event code = EVENT_OPCODE_RESET
  * @reset_code:		reset type
@@ -1910,7 +1910,7 @@ struct ionic_reset_event {
 };
 
 /**
- * struct heartbeat_event
+ * struct ionic_heartbeat_event
  * @eid:	event number
  * @ecode:	event code = EVENT_OPCODE_HEARTBEAT
  *
@@ -1923,7 +1923,7 @@ struct ionic_heartbeat_event {
 };
 
 /**
- * struct log_event
+ * struct ionic_log_event
  * @eid:	event number
  * @ecode:	event code = EVENT_OPCODE_LOG
  * @data:	log data
@@ -1937,7 +1937,7 @@ struct ionic_log_event {
 };
 
 /**
- * struct port_stats
+ * struct ionic_port_stats
  */
 struct ionic_port_stats {
 	__le64 frames_rx_ok;
@@ -2067,7 +2067,7 @@ struct ionic_mgmt_port_stats {
 };
 
 /**
- * struct port_identity - port identity structure
+ * struct ionic_port_identity - port identity structure
  * @version:        identity structure version
  * @type:           type of port (enum port_type)
  * @num_lanes:      number of lanes for the port
@@ -2099,7 +2099,7 @@ union ionic_port_identity {
 };
 
 /**
- * struct port_info - port info structure
+ * struct ionic_port_info - port info structure
  * @port_status:     port status
  * @port_stats:      port stats
  */
@@ -2110,7 +2110,7 @@ struct ionic_port_info {
 };
 
 /**
- * struct lif_stats
+ * struct ionic_lif_stats
  */
 struct ionic_lif_stats {
 	/* RX */
@@ -2264,7 +2264,7 @@ struct ionic_lif_stats {
 };
 
 /**
- * struct lif_info - lif info structure
+ * struct ionic_lif_info - lif info structure
  */
 struct ionic_lif_info {
 	union ionic_lif_config config;
@@ -2357,7 +2357,7 @@ union ionic_dev_info_regs {
 };
 
 /**
- * union dev_cmd_regs - Device command register format (read-write)
+ * union ionic_dev_cmd_regs - Device command register format (read-write)
  * @doorbell:        Device Cmd Doorbell, write-only.
  *                   Write a 1 to signal device to process cmd,
  *                   poll done for completion.
@@ -2379,7 +2379,7 @@ union ionic_dev_cmd_regs {
 };
 
 /**
- * union dev_regs - Device register format in for bar 0 page 0
+ * union ionic_dev_regs - Device register format in for bar 0 page 0
  * @info:            Device info registers
  * @devcmd:          Device command registers
  */
@@ -2433,7 +2433,7 @@ union ionic_adminq_comp {
 #define IONIC_ASIC_TYPE_CAPRI			0
 
 /**
- * struct doorbell - Doorbell register layout
+ * struct ionic_doorbell - Doorbell register layout
  * @p_index: Producer index
  * @ring:    Selects the specific ring of the queue to update.
  *           Type-specific meaning:
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 20faa8d24c9f..60fd14df49d7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -244,6 +244,21 @@ static int ionic_qcq_disable(struct ionic_qcq *qcq)
 	return ionic_adminq_post_wait(lif, &ctx);
 }
 
+static void ionic_lif_quiesce(struct ionic_lif *lif)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.attr = IONIC_LIF_ATTR_STATE,
+			.index = lif->index,
+			.state = IONIC_LIF_DISABLE
+		},
+	};
+
+	ionic_adminq_post_wait(lif, &ctx);
+}
+
 static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
 {
 	struct ionic_dev *idev = &lif->ionic->idev;
@@ -609,12 +624,14 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 			.lif_index = cpu_to_le16(lif->index),
 			.type = q->type,
 			.index = cpu_to_le32(q->index),
-			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ),
+			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+					     IONIC_QINIT_F_SG),
 			.intr_index = cpu_to_le16(cq->bound_intr->index),
 			.pid = cpu_to_le16(q->pid),
 			.ring_size = ilog2(q->num_descs),
 			.ring_base = cpu_to_le64(q->base_pa),
 			.cq_ring_base = cpu_to_le64(cq->base_pa),
+			.sg_ring_base = cpu_to_le64(q->sg_base_pa),
 		},
 	};
 	int err;
@@ -1432,7 +1449,6 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 	unsigned int flags;
 	unsigned int i;
 	int err = 0;
-	u32 coal;
 
 	flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
 	for (i = 0; i < lif->nxqs; i++) {
@@ -1448,21 +1464,22 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 		lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
 	}
 
-	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR;
-	coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs);
+	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR;
 	for (i = 0; i < lif->nxqs; i++) {
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
 				      lif->nrxq_descs,
 				      sizeof(struct ionic_rxq_desc),
 				      sizeof(struct ionic_rxq_comp),
-				      0, lif->kern_pid, &lif->rxqcqs[i].qcq);
+				      sizeof(struct ionic_rxq_sg_desc),
+				      lif->kern_pid, &lif->rxqcqs[i].qcq);
 		if (err)
 			goto err_out;
 
 		lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
 
 		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-				     lif->rxqcqs[i].qcq->intr.index, coal);
+				     lif->rxqcqs[i].qcq->intr.index,
+				     lif->rx_coalesce_hw);
 		ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
 					  lif->txqcqs[i].qcq);
 	}
@@ -1592,6 +1609,7 @@ int ionic_stop(struct net_device *netdev)
 	netif_tx_disable(netdev);
 
 	ionic_txrx_disable(lif);
+	ionic_lif_quiesce(lif);
 	ionic_txrx_deinit(lif);
 	ionic_txrx_free(lif);
 
@@ -1621,8 +1639,9 @@ int ionic_reset_queues(struct ionic_lif *lif)
 	/* Put off the next watchdog timeout */
 	netif_trans_update(lif->netdev);
 
-	if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
-		return -EBUSY;
+	err = ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET);
+	if (err)
+		return err;
 
 	running = netif_running(lif->netdev);
 	if (running)
@@ -1641,7 +1660,6 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 	struct net_device *netdev;
 	struct ionic_lif *lif;
 	int tbl_sz;
-	u32 coal;
 	int err;
 
 	netdev = alloc_etherdev_mqs(sizeof(*lif),
@@ -1672,8 +1690,9 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
 	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
 
 	/* Convert the default coalesce value to actual hw resolution */
-	coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT);
-	lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+	lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
+	lif->rx_coalesce_hw = ionic_coal_usec_to_hw(lif->ionic,
+						    lif->rx_coalesce_usecs);
 
 	snprintf(lif->name, sizeof(lif->name), "lif%u", index);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 6a95b42a8d8c..a55fd1f8c31b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -175,7 +175,9 @@ struct ionic_lif {
 	unsigned long *dbid_inuse;
 	unsigned int dbid_count;
 	struct dentry *dentry;
-	u32 rx_coalesce_usecs;
+	u32 rx_coalesce_usecs;		/* what the user asked for */
+	u32 rx_coalesce_hw;		/* what the hw is using */
+
 	u32 flags;
 	struct work_struct tx_timeout_work;
 };
@@ -187,15 +189,10 @@ struct ionic_lif {
 #define lif_to_txq(lif, i)	(&lif_to_txqcq((lif), i)->q)
 #define lif_to_rxq(lif, i)	(&lif_to_txqcq((lif), i)->q)
 
+/* return 0 if successfully set the bit, else non-zero */
 static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname)
 {
-	unsigned long tlimit = jiffies + HZ;
-
-	while (test_and_set_bit(bitname, lif->state) &&
-	       time_before(jiffies, tlimit))
-		usleep_range(100, 200);
-
-	return test_bit(bitname, lif->state);
+	return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE);
 }
 
 static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index aab311413412..3590ea7fd88a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -247,6 +247,10 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 		goto err_out;
 	}
 
+	err = ionic_heartbeat_check(lif->ionic);
+	if (err)
+		goto err_out;
+
 	memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
 
 	dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
@@ -307,6 +311,14 @@ int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
 	return work_done;
 }
 
+static void ionic_dev_cmd_clean(struct ionic *ionic)
+{
+	union ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
+
+	iowrite32(0, &regs->doorbell);
+	memset_io(&regs->cmd, 0, sizeof(regs->cmd));
+}
+
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 {
 	struct ionic_dev *idev = &ionic->idev;
@@ -316,6 +328,7 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 	int opcode;
 	int done;
 	int err;
+	int hb;
 
 	WARN_ON(in_interrupt());
 
@@ -330,7 +343,8 @@ try_again:
 		if (done)
 			break;
 		msleep(20);
-	} while (!done && time_before(jiffies, max_wait));
+		hb = ionic_heartbeat_check(ionic);
+	} while (!done && !hb && time_before(jiffies, max_wait));
 	duration = jiffies - start_time;
 
 	opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
@@ -338,7 +352,15 @@ try_again:
 		ionic_opcode_to_str(opcode), opcode,
 		done, duration / HZ, duration);
 
+	if (!done && hb) {
+		ionic_dev_cmd_clean(ionic);
+		dev_warn(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
+			 ionic_opcode_to_str(opcode), opcode);
+		return -ENXIO;
+	}
+
 	if (!done && !time_before(jiffies, max_wait)) {
+		ionic_dev_cmd_clean(ionic);
 		dev_warn(ionic->dev, "DEVCMD %s (%d) timeout after %ld secs\n",
 			 ionic_opcode_to_str(opcode), opcode, max_seconds);
 		return -ETIMEDOUT;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index ab6663d94f42..97e79949b359 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -34,52 +34,110 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
 	return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static void ionic_rx_recycle(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-			     struct sk_buff *skb)
+static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
+					  unsigned int len, bool frags)
 {
-	struct ionic_rxq_desc *old = desc_info->desc;
-	struct ionic_rxq_desc *new = q->head->desc;
+	struct ionic_lif *lif = q->lif;
+	struct ionic_rx_stats *stats;
+	struct net_device *netdev;
+	struct sk_buff *skb;
+
+	netdev = lif->netdev;
+	stats = q_to_rx_stats(q);
+
+	if (frags)
+		skb = napi_get_frags(&q_to_qcq(q)->napi);
+	else
+		skb = netdev_alloc_skb_ip_align(netdev, len);
 
-	new->addr = old->addr;
-	new->len = old->len;
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
+		return NULL;
+	}
 
-	ionic_rxq_post(q, true, ionic_rx_clean, skb);
+	return skb;
 }
 
-static bool ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-			       struct ionic_cq_info *cq_info, struct sk_buff **skb)
+static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
+				      struct ionic_desc_info *desc_info,
+				      struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct ionic_rxq_desc *desc = desc_info->desc;
-	struct net_device *netdev = q->lif->netdev;
 	struct device *dev = q->lif->ionic->dev;
-	struct sk_buff *new_skb;
-	u16 clen, dlen;
-
-	clen = le16_to_cpu(comp->len);
-	dlen = le16_to_cpu(desc->len);
-	if (clen > q->lif->rx_copybreak) {
-		dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-				 dlen, DMA_FROM_DEVICE);
-		return false;
-	}
+	struct ionic_page_info *page_info;
+	struct sk_buff *skb;
+	unsigned int i;
+	u16 frag_len;
+	u16 len;
 
-	new_skb = netdev_alloc_skb_ip_align(netdev, clen);
-	if (!new_skb) {
-		dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-				 dlen, DMA_FROM_DEVICE);
-		return false;
-	}
+	page_info = &desc_info->pages[0];
+	len = le16_to_cpu(comp->len);
 
-	dma_sync_single_for_cpu(dev, (dma_addr_t)le64_to_cpu(desc->addr),
-				clen, DMA_FROM_DEVICE);
+	prefetch(page_address(page_info->page) + NET_IP_ALIGN);
 
-	memcpy(new_skb->data, (*skb)->data, clen);
+	skb = ionic_rx_skb_alloc(q, len, true);
+	if (unlikely(!skb))
+		return NULL;
 
-	ionic_rx_recycle(q, desc_info, *skb);
-	*skb = new_skb;
+	i = comp->num_sg_elems + 1;
+	do {
+		if (unlikely(!page_info->page)) {
+			struct napi_struct *napi = &q_to_qcq(q)->napi;
 
-	return true;
+			napi->skb = NULL;
+			dev_kfree_skb(skb);
+			return NULL;
+		}
+
+		frag_len = min(len, (u16)PAGE_SIZE);
+		len -= frag_len;
+
+		dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				page_info->page, 0, frag_len, PAGE_SIZE);
+		page_info->page = NULL;
+		page_info++;
+		i--;
+	} while (i > 0);
+
+	return skb;
+}
+
+static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
+					  struct ionic_desc_info *desc_info,
+					  struct ionic_cq_info *cq_info)
+{
+	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct device *dev = q->lif->ionic->dev;
+	struct ionic_page_info *page_info;
+	struct sk_buff *skb;
+	u16 len;
+
+	page_info = &desc_info->pages[0];
+	len = le16_to_cpu(comp->len);
+
+	skb = ionic_rx_skb_alloc(q, len, false);
+	if (unlikely(!skb))
+		return NULL;
+
+	if (unlikely(!page_info->page)) {
+		dev_kfree_skb(skb);
+		return NULL;
+	}
+
+	dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+				len, DMA_FROM_DEVICE);
+	skb_copy_to_linear_data(skb, page_address(page_info->page), len);
+	dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+				   len, DMA_FROM_DEVICE);
+
+	skb_put(skb, len);
+	skb->protocol = eth_type_trans(skb, q->lif->netdev);
+
+	return skb;
 }
 
 static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
@@ -87,35 +145,34 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct ionic_qcq *qcq = q_to_qcq(q);
-	struct sk_buff *skb = cb_arg;
 	struct ionic_rx_stats *stats;
 	struct net_device *netdev;
+	struct sk_buff *skb;
 
 	stats = q_to_rx_stats(q);
 	netdev = q->lif->netdev;
 
-	if (comp->status) {
-		ionic_rx_recycle(q, desc_info, skb);
+	if (comp->status)
 		return;
-	}
 
-	if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
-		/* no packet processing while resetting */
-		ionic_rx_recycle(q, desc_info, skb);
+	/* no packet processing while resetting */
+	if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state)))
 		return;
-	}
 
 	stats->pkts++;
 	stats->bytes += le16_to_cpu(comp->len);
 
-	ionic_rx_copybreak(q, desc_info, cq_info, &skb);
+	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
+		skb = ionic_rx_copybreak(q, desc_info, cq_info);
+	else
+		skb = ionic_rx_frags(q, desc_info, cq_info);
 
-	skb_put(skb, le16_to_cpu(comp->len));
-	skb->protocol = eth_type_trans(skb, netdev);
+	if (unlikely(!skb))
+		return;
 
 	skb_record_rx_queue(skb, q->index);
 
-	if (netdev->features & NETIF_F_RXHASH) {
+	if (likely(netdev->features & NETIF_F_RXHASH)) {
 		switch (comp->pkt_type_color & IONIC_RXQ_COMP_PKT_TYPE_MASK) {
 		case IONIC_PKT_TYPE_IPV4:
 		case IONIC_PKT_TYPE_IPV6:
@@ -132,7 +189,7 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 		}
 	}
 
-	if (netdev->features & NETIF_F_RXCSUM) {
+	if (likely(netdev->features & NETIF_F_RXCSUM)) {
 		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
 			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = (__wsum)le16_to_cpu(comp->csum);
@@ -142,18 +199,21 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 		stats->csum_none++;
 	}
 
-	if ((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
-	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
-	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD))
+	if (unlikely((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
+		     (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
+		     (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD)))
 		stats->csum_error++;
 
-	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+	if (likely(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       le16_to_cpu(comp->vlan_tci));
 	}
 
-	napi_gro_receive(&qcq->napi, skb);
+	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
+		napi_gro_receive(&qcq->napi, skb);
+	else
+		napi_gro_frags(&qcq->napi);
 }
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
@@ -213,66 +273,125 @@ void ionic_rx_flush(struct ionic_cq *cq)
 				   work_done, IONIC_INTR_CRED_RESET_COALESCE);
 }
 
-static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, unsigned int len,
-					  dma_addr_t *dma_addr)
+static struct page *ionic_rx_page_alloc(struct ionic_queue *q,
+					dma_addr_t *dma_addr)
 {
 	struct ionic_lif *lif = q->lif;
 	struct ionic_rx_stats *stats;
 	struct net_device *netdev;
-	struct sk_buff *skb;
 	struct device *dev;
+	struct page *page;
 
 	netdev = lif->netdev;
 	dev = lif->ionic->dev;
 	stats = q_to_rx_stats(q);
-	skb = netdev_alloc_skb_ip_align(netdev, len);
-	if (!skb) {
-		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-				     netdev->name, q->name);
+	page = alloc_page(GFP_ATOMIC);
+	if (unlikely(!page)) {
+		net_err_ratelimited("%s: Page alloc failed on %s!\n",
+				    netdev->name, q->name);
 		stats->alloc_err++;
 		return NULL;
 	}
 
-	*dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, *dma_addr)) {
-		dev_kfree_skb(skb);
-		net_warn_ratelimited("%s: DMA single map failed on %s!\n",
-				     netdev->name, q->name);
+	*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, *dma_addr))) {
+		__free_page(page);
+		net_err_ratelimited("%s: DMA single map failed on %s!\n",
+				    netdev->name, q->name);
 		stats->dma_map_err++;
 		return NULL;
 	}
 
-	return skb;
+	return page;
+}
+
+static void ionic_rx_page_free(struct ionic_queue *q, struct page *page,
+			       dma_addr_t dma_addr)
+{
+	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+
+	if (unlikely(!page)) {
+		net_err_ratelimited("%s: Trying to free unallocated buffer on %s!\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	__free_page(page);
 }
 
-#define IONIC_RX_RING_DOORBELL_STRIDE		((1 << 2) - 1)
+#define IONIC_RX_RING_DOORBELL_STRIDE		((1 << 5) - 1)
+#define IONIC_RX_RING_HEAD_BUF_SZ		2048
 
 void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
+	struct ionic_desc_info *desc_info;
+	struct ionic_page_info *page_info;
+	struct ionic_rxq_sg_desc *sg_desc;
+	struct ionic_rxq_sg_elem *sg_elem;
 	struct ionic_rxq_desc *desc;
-	struct sk_buff *skb;
-	dma_addr_t dma_addr;
+	unsigned int nfrags;
 	bool ring_doorbell;
+	unsigned int i, j;
 	unsigned int len;
-	unsigned int i;
 
 	len = netdev->mtu + ETH_HLEN;
+	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
-		skb = ionic_rx_skb_alloc(q, len, &dma_addr);
-		if (!skb)
-			return;
+		desc_info = q->head;
+		desc = desc_info->desc;
+		sg_desc = desc_info->sg_desc;
+		page_info = &desc_info->pages[0];
+
+		if (page_info->page) { /* recycle the buffer */
+			ring_doorbell = ((q->head->index + 1) &
+					IONIC_RX_RING_DOORBELL_STRIDE) == 0;
+			ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, NULL);
+			continue;
+		}
 
-		desc = q->head->desc;
-		desc->addr = cpu_to_le64(dma_addr);
-		desc->len = cpu_to_le16(len);
-		desc->opcode = IONIC_RXQ_DESC_OPCODE_SIMPLE;
+		/* fill main descriptor - pages[0] */
+		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
+		desc_info->npages = nfrags;
+		page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
+		if (unlikely(!page_info->page)) {
+			desc->addr = 0;
+			desc->len = 0;
+			return;
+		}
+		desc->addr = cpu_to_le64(page_info->dma_addr);
+		desc->len = cpu_to_le16(PAGE_SIZE);
+		page_info++;
+
+		/* fill sg descriptors - pages[1..n] */
+		for (j = 0; j < nfrags - 1; j++) {
+			if (page_info->page) /* recycle the sg buffer */
+				continue;
+
+			sg_elem = &sg_desc->elems[j];
+			page_info->page = ionic_rx_page_alloc(q, &page_info->dma_addr);
+			if (unlikely(!page_info->page)) {
+				sg_elem->addr = 0;
+				sg_elem->len = 0;
+				return;
+			}
+			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
+			sg_elem->len = cpu_to_le16(PAGE_SIZE);
+			page_info++;
+		}
 
 		ring_doorbell = ((q->head->index + 1) &
 				IONIC_RX_RING_DOORBELL_STRIDE) == 0;
-
-		ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, skb);
+		ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, NULL);
 	}
 }
 
@@ -283,15 +402,24 @@ static void ionic_rx_fill_cb(void *arg)
 
 void ionic_rx_empty(struct ionic_queue *q)
 {
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_desc_info *cur;
 	struct ionic_rxq_desc *desc;
+	unsigned int i;
 
 	for (cur = q->tail; cur != q->head; cur = cur->next) {
 		desc = cur->desc;
-		dma_unmap_single(dev, le64_to_cpu(desc->addr),
-				 le16_to_cpu(desc->len), DMA_FROM_DEVICE);
-		dev_kfree_skb(cur->cb_arg);
+		desc->addr = 0;
+		desc->len = 0;
+
+		for (i = 0; i < cur->npages; i++) {
+			if (likely(cur->pages[i].page)) {
+				ionic_rx_page_free(q, cur->pages[i].page,
+						   cur->pages[i].dma_addr);
+				cur->pages[i].page = NULL;
+				cur->pages[i].dma_addr = 0;
+			}
+		}
+
 		cur->cb_arg = NULL;
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index d473b522afc5..9ad568d93ae6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -37,14 +37,14 @@
 #include <linux/slab.h>
 #include "qed.h"
 
-/* Fields of IGU PF CONFIGRATION REGISTER */
+/* Fields of IGU PF CONFIGURATION REGISTER */
 #define IGU_PF_CONF_FUNC_EN       (0x1 << 0)    /* function enable        */
 #define IGU_PF_CONF_MSI_MSIX_EN   (0x1 << 1)    /* MSI/MSIX enable        */
 #define IGU_PF_CONF_INT_LINE_EN   (0x1 << 2)    /* INT enable             */
 #define IGU_PF_CONF_ATTN_BIT_EN   (0x1 << 3)    /* attention enable       */
 #define IGU_PF_CONF_SINGLE_ISR_EN (0x1 << 4)    /* single ISR mode enable */
 #define IGU_PF_CONF_SIMD_MODE     (0x1 << 5)    /* simd all ones mode     */
-/* Fields of IGU VF CONFIGRATION REGISTER */
+/* Fields of IGU VF CONFIGURATION REGISTER */
 #define IGU_VF_CONF_FUNC_EN        (0x1 << 0)	/* function enable        */
 #define IGU_VF_CONF_MSI_MSIX_EN    (0x1 << 1)	/* MSI/MSIX enable        */
 #define IGU_VF_CONF_SINGLE_ISR_EN  (0x1 << 4)	/* single ISR mode enable */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 9a8fd79611f2..368e88565783 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -305,7 +305,7 @@ void qed_iov_bulletin_set_udp_ports(struct qed_hwfn *p_hwfn,
 
 /**
  * @brief Read sriov related information and allocated resources
- *  reads from configuraiton space, shmem, etc.
+ *  reads from configuration space, shmem, etc.
  *
  * @param p_hwfn
  *
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 9a6a9a008714..d6cfe4ffbaf3 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1298,7 +1298,7 @@ void qede_config_rx_mode(struct net_device *ndev)
 	rx_mode.type = QED_FILTER_TYPE_RX_MODE;
 
 	/* Remove all previous unicast secondary macs and multicast macs
-	 * (configrue / leave the primary mac)
+	 * (configure / leave the primary mac)
 	 */
 	rc = qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_REPLACE,
 				   edev->ndev->dev_addr);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index c84ab052ef26..98f92268cbaa 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -213,9 +213,9 @@ static int emac_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct emac_adapter *adpt = netdev_priv(netdev);
 
-	netif_info(adpt, hw, adpt->netdev,
-		   "changing MTU from %d to %d\n", netdev->mtu,
-		   new_mtu);
+	netif_dbg(adpt, hw, adpt->netdev,
+		  "changing MTU from %d to %d\n", netdev->mtu,
+		  new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index c4e961ea44d5..8a7b22301296 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -741,12 +741,6 @@ static void rtl_unlock_config_regs(struct rtl8169_private *tp)
 	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 }
 
-static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
-{
-	pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
-					   PCI_EXP_DEVCTL_READRQ, force);
-}
-
 static bool rtl_is_8125(struct rtl8169_private *tp)
 {
 	return tp->mac_version >= RTL_GIGA_MAC_VER_60;
@@ -1571,7 +1565,7 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
 
 	if (dev->mtu > JUMBO_1K &&
 	    tp->mac_version > RTL_GIGA_MAC_VER_06)
-		features &= ~NETIF_F_IP_CSUM;
+		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO);
 
 	return features;
 }
@@ -4039,14 +4033,12 @@ static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -4064,7 +4056,6 @@ static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, 0x3f);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
@@ -4072,32 +4063,15 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, 0x0c);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-}
-
-static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
-{
-	rtl_tx_performance_tweak(tp,
-		PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
-}
-
-static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
-{
-	rtl_tx_performance_tweak(tp,
-		PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	r8168b_0_hw_jumbo_enable(tp);
-
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	r8168b_0_hw_jumbo_disable(tp);
-
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
@@ -4105,9 +4079,6 @@ static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
 	rtl_unlock_config_regs(tp);
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_11:
-		r8168b_0_hw_jumbo_enable(tp);
-		break;
 	case RTL_GIGA_MAC_VER_12:
 	case RTL_GIGA_MAC_VER_17:
 		r8168b_1_hw_jumbo_enable(tp);
@@ -4131,9 +4102,6 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
 	rtl_unlock_config_regs(tp);
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_11:
-		r8168b_0_hw_jumbo_disable(tp);
-		break;
 	case RTL_GIGA_MAC_VER_12:
 	case RTL_GIGA_MAC_VER_17:
 		r8168b_1_hw_jumbo_disable(tp);
@@ -4454,18 +4422,11 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp,
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, high);
 }
 
-static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
+static void rtl_hw_start_8168b(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
-static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
-{
-	rtl_hw_start_8168bb(tp);
-
-	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
-}
-
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
@@ -4557,19 +4518,6 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_disable_clock_request(tp);
-
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-}
-
-static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
-{
-	rtl_set_def_aspm_entry_latency(tp);
-
-	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
-	rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
@@ -4583,8 +4531,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_ephy_init(tp, e_info_8168d_4);
 
 	rtl_enable_clock_request(tp);
@@ -4659,8 +4605,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
@@ -4723,8 +4667,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_reset_packet_filter(tp);
 	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f);
 
@@ -4775,8 +4717,7 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+	rtl_hw_aspm_clkreq_enable(tp, false);
 	rtl_ephy_init(tp, e_info_8168g_2);
 }
 
@@ -4961,8 +4902,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_reset_packet_filter(tp);
 
 	rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_1111, BIT(4));
@@ -5020,8 +4959,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_reset_packet_filter(tp);
 
 	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f80);
@@ -5124,8 +5061,6 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -5141,8 +5076,6 @@ static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
 	rtl_set_def_aspm_entry_latency(tp);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
@@ -5203,8 +5136,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8402);
 
-	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
 	rtl_set_fifo_size(tp, 0x00, 0x00, 0x02, 0x06);
 	rtl_reset_packet_filter(tp);
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
@@ -5358,13 +5289,13 @@ static void rtl_hw_config(struct rtl8169_private *tp)
 		[RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
 		[RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
 		[RTL_GIGA_MAC_VER_10] = NULL,
-		[RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168bb,
-		[RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168bef,
+		[RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168b,
+		[RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168b,
 		[RTL_GIGA_MAC_VER_13] = NULL,
 		[RTL_GIGA_MAC_VER_14] = NULL,
 		[RTL_GIGA_MAC_VER_15] = NULL,
 		[RTL_GIGA_MAC_VER_16] = NULL,
-		[RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168bef,
+		[RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168b,
 		[RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
 		[RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
 		[RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
@@ -5378,7 +5309,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
 		[RTL_GIGA_MAC_VER_28] = rtl_hw_start_8168d_4,
 		[RTL_GIGA_MAC_VER_29] = rtl_hw_start_8105e_1,
 		[RTL_GIGA_MAC_VER_30] = rtl_hw_start_8105e_2,
-		[RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168dp,
+		[RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168d,
 		[RTL_GIGA_MAC_VER_32] = rtl_hw_start_8168e_1,
 		[RTL_GIGA_MAC_VER_33] = rtl_hw_start_8168e_1,
 		[RTL_GIGA_MAC_VER_34] = rtl_hw_start_8168e_2,
@@ -5420,11 +5351,6 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168(struct rtl8169_private *tp)
 {
-	if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_16)
-		pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
-					 PCI_EXP_DEVCTL_NOSNOOP_EN);
-
 	if (rtl_is_8168evl_up(tp))
 		RTL_W8(tp, MaxTxPacketSize, EarlySize);
 	else
@@ -5573,18 +5499,15 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 
 		data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
 		if (!data) {
-			rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
-			goto err_out;
+			rtl8169_rx_clear(tp);
+			return -ENOMEM;
 		}
 		tp->Rx_databuff[i] = data;
 	}
 
 	rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
-	return 0;
 
-err_out:
-	rtl8169_rx_clear(tp);
-	return -ENOMEM;
+	return 0;
 }
 
 static int rtl8169_init_ring(struct rtl8169_private *tp)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index de9aa8c47f1c..5ea14b5fbed8 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2046,7 +2046,9 @@ static int ravb_probe(struct platform_device *pdev)
 	spin_lock_init(&priv->lock);
 	INIT_WORK(&priv->work, ravb_tx_timeout_work);
 
-	priv->phy_interface = of_get_phy_mode(np);
+	error = of_get_phy_mode(np, &priv->phy_interface);
+	if (error && error != -ENODEV)
+		goto out_release;
 
 	priv->no_avb_link = of_property_read_bool(np, "renesas,no-ether-link");
 	priv->avb_link_active_low =
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 7ba35a0bdb29..e19b49c4013e 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3183,6 +3183,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 {
 	struct device_node *np = dev->of_node;
 	struct sh_eth_plat_data *pdata;
+	phy_interface_t interface;
 	const char *mac_addr;
 	int ret;
 
@@ -3190,10 +3191,10 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 	if (!pdata)
 		return NULL;
 
-	ret = of_get_phy_mode(np);
-	if (ret < 0)
+	ret = of_get_phy_mode(np, &interface);
+	if (ret)
 		return NULL;
-	pdata->phy_interface = ret;
+	pdata->phy_interface = interface;
 
 	mac_addr = of_get_mac_address(np);
 	if (!IS_ERR(mac_addr))
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 786b158bd305..bc4f951315da 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2189,9 +2189,6 @@ static int rocker_router_fib_event(struct notifier_block *nb,
 	struct rocker_fib_event_work *fib_work;
 	struct fib_notifier_info *info = ptr;
 
-	if (!net_eq(info->net, &init_net))
-		return NOTIFY_DONE;
-
 	if (info->family != AF_INET)
 		return NOTIFY_DONE;
 
@@ -2994,7 +2991,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * the device, so no need to pass a callback.
 	 */
 	rocker->fib_nb.notifier_call = rocker_router_fib_event;
-	err = register_fib_notifier(&rocker->fib_nb, NULL);
+	err = register_fib_notifier(&init_net, &rocker->fib_nb, NULL, NULL);
 	if (err)
 		goto err_register_fib_notifier;
 
@@ -3021,7 +3018,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 err_register_switchdev_blocking_notifier:
 	unregister_switchdev_notifier(&rocker_switchdev_notifier);
 err_register_switchdev_notifier:
-	unregister_fib_notifier(&rocker->fib_nb);
+	unregister_fib_notifier(&init_net, &rocker->fib_nb);
 err_register_fib_notifier:
 	rocker_remove_ports(rocker);
 err_probe_ports:
@@ -3057,7 +3054,7 @@ static void rocker_remove(struct pci_dev *pdev)
 	unregister_switchdev_blocking_notifier(nb);
 
 	unregister_switchdev_notifier(&rocker_switchdev_notifier);
-	unregister_fib_notifier(&rocker->fib_nb);
+	unregister_fib_notifier(&init_net, &rocker->fib_nb);
 	rocker_remove_ports(rocker);
 	rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
 	destroy_workqueue(rocker->rocker_owq);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index 2412c87561e0..33f79402850d 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -30,12 +30,15 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev,
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct sxgbe_dma_cfg *dma_cfg;
+	int err;
 
 	if (!np)
 		return -ENODEV;
 
 	*mac = of_get_mac_address(np);
-	plat->interface = of_get_phy_mode(np);
+	err = of_get_phy_mode(np, &plat->interface);
+	if (err && err != -ENODEV)
+		return err;
 
 	plat->bus_id = of_alias_get_id(np, "ethernet");
 	if (plat->bus_id < 0)
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 0ec13f520e90..ad68eb0cb8fd 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
 		/* Extra channels, even those with TXQs (PTP), do not require
 		 * PIO resources.
 		 */
-		if (!channel->type->want_pio)
+		if (!channel->type->want_pio ||
+		    channel->channel >= efx->xdp_channel_offset)
 			continue;
+
 		efx_for_each_channel_tx_queue(tx_queue, channel) {
 			/* We assign the PIO buffers to queues in
 			 * reverse order to allow for the following
@@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	int rc;
 
 	channel_vis = max(efx->n_channels,
-			  (efx->n_tx_channels + efx->n_extra_tx_channels) *
-			  EFX_TXQ_TYPES);
+			  ((efx->n_tx_channels + efx->n_extra_tx_channels) *
+			   EFX_TXQ_TYPES) +
+			   efx->n_xdp_channels * efx->xdp_tx_per_channel);
 
 #ifdef EFX_USE_PIO
 	/* Try to allocate PIO buffers if wanted and if the full
@@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 	/* TSOv2 is a limited resource that can only be configured on a limited
 	 * number of queues. TSO without checksum offload is not really a thing,
 	 * so we only enable it for those queues.
-	 * TSOv2 cannot be used with Hardware timestamping.
+	 * TSOv2 cannot be used with Hardware timestamping, and is never needed
+	 * for XDP tx.
 	 */
 	if (csum_offload && (nic_data->datapath_caps2 &
 			(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
-	    !tx_queue->timestamping) {
+	    !tx_queue->timestamping && !tx_queue->xdp_tx) {
 		tso_v2 = true;
 		netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
 				channel->channel);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 2fef7402233e..0fa9972027db 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -226,6 +226,10 @@ static void efx_fini_napi_channel(struct efx_channel *channel);
 static void efx_fini_struct(struct efx_nic *efx);
 static void efx_start_all(struct efx_nic *efx);
 static void efx_stop_all(struct efx_nic *efx);
+static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
+static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
+			u32 flags);
 
 #define EFX_ASSERT_RESET_SERIALISED(efx)		\
 	do {						\
@@ -340,6 +344,8 @@ static int efx_poll(struct napi_struct *napi, int budget)
 
 	spent = efx_process_channel(channel, budget);
 
+	xdp_do_flush_map();
+
 	if (spent < budget) {
 		if (efx_channel_has_rx_queue(channel) &&
 		    efx->irq_rx_adaptive &&
@@ -579,9 +585,14 @@ efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 	int number;
 
 	number = channel->channel;
-	if (efx->tx_channel_offset == 0) {
+
+	if (number >= efx->xdp_channel_offset &&
+	    !WARN_ON_ONCE(!efx->n_xdp_channels)) {
+		type = "-xdp";
+		number -= efx->xdp_channel_offset;
+	} else if (efx->tx_channel_offset == 0) {
 		type = "";
-	} else if (channel->channel < efx->tx_channel_offset) {
+	} else if (number < efx->tx_channel_offset) {
 		type = "-rx";
 	} else {
 		type = "-tx";
@@ -651,7 +662,7 @@ static void efx_start_datapath(struct efx_nic *efx)
 	efx->rx_dma_len = (efx->rx_prefix_size +
 			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
 			   efx->type->rx_buffer_padding);
-	rx_buf_len = (sizeof(struct efx_rx_page_state) +
+	rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
 		      efx->rx_ip_align + efx->rx_dma_len);
 	if (rx_buf_len <= PAGE_SIZE) {
 		efx->rx_scatter = efx->type->always_rx_scatter;
@@ -774,6 +785,7 @@ static void efx_stop_datapath(struct efx_nic *efx)
 		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 			efx_fini_tx_queue(tx_queue);
 	}
+	efx->xdp_rxq_info_failed = false;
 }
 
 static void efx_remove_channel(struct efx_channel *channel)
@@ -798,6 +810,8 @@ static void efx_remove_channels(struct efx_nic *efx)
 
 	efx_for_each_channel(channel, efx)
 		efx_remove_channel(channel);
+
+	kfree(efx->xdp_tx_queues);
 }
 
 int
@@ -1435,6 +1449,101 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
 	return count;
 }
 
+static int efx_allocate_msix_channels(struct efx_nic *efx,
+				      unsigned int max_channels,
+				      unsigned int extra_channels,
+				      unsigned int parallelism)
+{
+	unsigned int n_channels = parallelism;
+	int vec_count;
+	int n_xdp_tx;
+	int n_xdp_ev;
+
+	if (efx_separate_tx_channels)
+		n_channels *= 2;
+	n_channels += extra_channels;
+
+	/* To allow XDP transmit to happen from arbitrary NAPI contexts
+	 * we allocate a TX queue per CPU. We share event queues across
+	 * multiple tx queues, assuming tx and ev queues are both
+	 * maximum size.
+	 */
+
+	n_xdp_tx = num_possible_cpus();
+	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+
+	/* Check resources.
+	 * We need a channel per event queue, plus a VI per tx queue.
+	 * This may be more pessimistic than it needs to be.
+	 */
+	if (n_channels + n_xdp_ev > max_channels) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
+			  n_xdp_ev, n_channels, max_channels);
+		efx->n_xdp_channels = 0;
+		efx->xdp_tx_per_channel = 0;
+		efx->xdp_tx_queue_count = 0;
+	} else {
+		efx->n_xdp_channels = n_xdp_ev;
+		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+		efx->xdp_tx_queue_count = n_xdp_tx;
+		n_channels += n_xdp_ev;
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Allocating %d TX and %d event queues for XDP\n",
+			  n_xdp_tx, n_xdp_ev);
+	}
+
+	n_channels = min(n_channels, max_channels);
+
+	vec_count = pci_msix_vec_count(efx->pci_dev);
+	if (vec_count < 0)
+		return vec_count;
+	if (vec_count < n_channels) {
+		netif_err(efx, drv, efx->net_dev,
+			  "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
+			  vec_count, n_channels);
+		netif_err(efx, drv, efx->net_dev,
+			  "WARNING: Performance may be reduced.\n");
+		n_channels = vec_count;
+	}
+
+	efx->n_channels = n_channels;
+
+	/* Do not create the PTP TX queue(s) if PTP uses the MC directly. */
+	if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx))
+		n_channels--;
+
+	/* Ignore XDP tx channels when creating rx channels. */
+	n_channels -= efx->n_xdp_channels;
+
+	if (efx_separate_tx_channels) {
+		efx->n_tx_channels =
+			min(max(n_channels / 2, 1U),
+			    efx->max_tx_channels);
+		efx->tx_channel_offset =
+			n_channels - efx->n_tx_channels;
+		efx->n_rx_channels =
+			max(n_channels -
+			    efx->n_tx_channels, 1U);
+	} else {
+		efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
+		efx->tx_channel_offset = 0;
+		efx->n_rx_channels = n_channels;
+	}
+
+	if (efx->n_xdp_channels)
+		efx->xdp_channel_offset = efx->tx_channel_offset +
+					  efx->n_tx_channels;
+	else
+		efx->xdp_channel_offset = efx->n_channels;
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "Allocating %u RX channels\n",
+		  efx->n_rx_channels);
+
+	return efx->n_channels;
+}
+
 /* Probe the number and type of interrupts we are able to obtain, and
  * the resulting numbers of channels and RX queues.
  */
@@ -1449,19 +1558,19 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 			++extra_channels;
 
 	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+		unsigned int parallelism = efx_wanted_parallelism(efx);
 		struct msix_entry xentries[EFX_MAX_CHANNELS];
 		unsigned int n_channels;
 
-		n_channels = efx_wanted_parallelism(efx);
-		if (efx_separate_tx_channels)
-			n_channels *= 2;
-		n_channels += extra_channels;
-		n_channels = min(n_channels, efx->max_channels);
-
-		for (i = 0; i < n_channels; i++)
-			xentries[i].entry = i;
-		rc = pci_enable_msix_range(efx->pci_dev,
-					   xentries, 1, n_channels);
+		rc = efx_allocate_msix_channels(efx, efx->max_channels,
+						extra_channels, parallelism);
+		if (rc >= 0) {
+			n_channels = rc;
+			for (i = 0; i < n_channels; i++)
+				xentries[i].entry = i;
+			rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
+						   n_channels);
+		}
 		if (rc < 0) {
 			/* Fall back to single channel MSI */
 			netif_err(efx, drv, efx->net_dev,
@@ -1480,21 +1589,6 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 		}
 
 		if (rc > 0) {
-			efx->n_channels = n_channels;
-			if (n_channels > extra_channels)
-				n_channels -= extra_channels;
-			if (efx_separate_tx_channels) {
-				efx->n_tx_channels = min(max(n_channels / 2,
-							     1U),
-							 efx->max_tx_channels);
-				efx->n_rx_channels = max(n_channels -
-							 efx->n_tx_channels,
-							 1U);
-			} else {
-				efx->n_tx_channels = min(n_channels,
-							 efx->max_tx_channels);
-				efx->n_rx_channels = n_channels;
-			}
 			for (i = 0; i < efx->n_channels; i++)
 				efx_get_channel(efx, i)->irq =
 					xentries[i].vector;
@@ -1506,6 +1600,8 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 		efx->n_channels = 1;
 		efx->n_rx_channels = 1;
 		efx->n_tx_channels = 1;
+		efx->n_xdp_channels = 0;
+		efx->xdp_channel_offset = efx->n_channels;
 		rc = pci_enable_msi(efx->pci_dev);
 		if (rc == 0) {
 			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
@@ -1524,12 +1620,14 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
 		efx->n_rx_channels = 1;
 		efx->n_tx_channels = 1;
+		efx->n_xdp_channels = 0;
+		efx->xdp_channel_offset = efx->n_channels;
 		efx->legacy_irq = efx->pci_dev->irq;
 	}
 
-	/* Assign extra channels if possible */
+	/* Assign extra channels if possible, before XDP channels */
 	efx->n_extra_tx_channels = 0;
-	j = efx->n_channels;
+	j = efx->xdp_channel_offset;
 	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
 		if (!efx->extra_channel_type[i])
 			continue;
@@ -1724,29 +1822,50 @@ static void efx_remove_interrupts(struct efx_nic *efx)
 	efx->legacy_irq = 0;
 }
 
-static void efx_set_channels(struct efx_nic *efx)
+static int efx_set_channels(struct efx_nic *efx)
 {
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
+	int xdp_queue_number;
 
 	efx->tx_channel_offset =
 		efx_separate_tx_channels ?
 		efx->n_channels - efx->n_tx_channels : 0;
 
+	if (efx->xdp_tx_queue_count) {
+		EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+		/* Allocate array for XDP TX queue lookup. */
+		efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
+					     sizeof(*efx->xdp_tx_queues),
+					     GFP_KERNEL);
+		if (!efx->xdp_tx_queues)
+			return -ENOMEM;
+	}
+
 	/* We need to mark which channels really have RX and TX
 	 * queues, and adjust the TX queue numbers if we have separate
 	 * RX-only and TX-only channels.
 	 */
+	xdp_queue_number = 0;
 	efx_for_each_channel(channel, efx) {
 		if (channel->channel < efx->n_rx_channels)
 			channel->rx_queue.core_index = channel->channel;
 		else
 			channel->rx_queue.core_index = -1;
 
-		efx_for_each_channel_tx_queue(tx_queue, channel)
+		efx_for_each_channel_tx_queue(tx_queue, channel) {
 			tx_queue->queue -= (efx->tx_channel_offset *
 					    EFX_TXQ_TYPES);
+
+			if (efx_channel_is_xdp_tx(channel) &&
+			    xdp_queue_number < efx->xdp_tx_queue_count) {
+				efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+				xdp_queue_number++;
+			}
+		}
 	}
+	return 0;
 }
 
 static int efx_probe_nic(struct efx_nic *efx)
@@ -1776,7 +1895,9 @@ static int efx_probe_nic(struct efx_nic *efx)
 		if (rc)
 			goto fail1;
 
-		efx_set_channels(efx);
+		rc = efx_set_channels(efx);
+		if (rc)
+			goto fail1;
 
 		/* dimension_resources can fail with EAGAIN */
 		rc = efx->type->dimension_resources(efx);
@@ -2022,6 +2143,10 @@ static void efx_stop_all(struct efx_nic *efx)
 
 static void efx_remove_all(struct efx_nic *efx)
 {
+	rtnl_lock();
+	efx_xdp_setup_prog(efx, NULL);
+	rtnl_unlock();
+
 	efx_remove_channels(efx);
 	efx_remove_filters(efx);
 #ifdef CONFIG_SFC_SRIOV
@@ -2082,6 +2207,8 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 			channel->irq_moderation_us = rx_usecs;
 		else if (efx_channel_has_tx_queues(channel))
 			channel->irq_moderation_us = tx_usecs;
+		else if (efx_channel_is_xdp_tx(channel))
+			channel->irq_moderation_us = tx_usecs;
 	}
 
 	return 0;
@@ -2277,6 +2404,17 @@ static void efx_watchdog(struct net_device *net_dev)
 	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
 }
 
+static unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
+{
+	/* The maximum MTU that we can fit in a single page, allowing for
+	 * framing, overhead and XDP headroom.
+	 */
+	int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
+		       efx->rx_prefix_size + efx->type->rx_buffer_padding +
+		       efx->rx_ip_align + XDP_PACKET_HEADROOM;
+
+	return PAGE_SIZE - overhead;
+}
 
 /* Context: process, rtnl_lock() held. */
 static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
@@ -2288,6 +2426,14 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
 	if (rc)
 		return rc;
 
+	if (rtnl_dereference(efx->xdp_prog) &&
+	    new_mtu > efx_xdp_max_mtu(efx)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Requested MTU of %d too big for XDP (max: %d)\n",
+			  new_mtu, efx_xdp_max_mtu(efx));
+		return -EINVAL;
+	}
+
 	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
 	efx_device_detach_sync(efx);
@@ -2489,8 +2635,65 @@ static const struct net_device_ops efx_netdev_ops = {
 #endif
 	.ndo_udp_tunnel_add	= efx_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= efx_udp_tunnel_del,
+	.ndo_xdp_xmit		= efx_xdp_xmit,
+	.ndo_bpf		= efx_xdp
 };
 
+static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+
+	if (efx->xdp_rxq_info_failed) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Unable to bind XDP program due to previous failure of rxq_info\n");
+		return -EINVAL;
+	}
+
+	if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Unable to configure XDP with MTU of %d (max: %d)\n",
+			  efx->net_dev->mtu, efx_xdp_max_mtu(efx));
+		return -EINVAL;
+	}
+
+	old_prog = rtnl_dereference(efx->xdp_prog);
+	rcu_assign_pointer(efx->xdp_prog, prog);
+	/* Release the reference that was originally passed by the caller. */
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+/* Context: process, rtnl_lock() held. */
+static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct efx_nic *efx = netdev_priv(dev);
+	struct bpf_prog *xdp_prog;
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return efx_xdp_setup_prog(efx, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp_prog = rtnl_dereference(efx->xdp_prog);
+		xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
+			u32 flags)
+{
+	struct efx_nic *efx = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
+}
+
 static void efx_update_name(struct efx_nic *efx)
 {
 	strcpy(efx->name, efx->net_dev->name);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 04fed7c06618..45c7ae4114ec 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -322,4 +322,7 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem)
 	return true;
 }
 
+int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+		       bool flush);
+
 #endif /* EFX_EFX_H */
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 86b965875540..8db593fb9699 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -83,6 +83,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
 	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
 	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
 	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
 };
 
 #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
@@ -399,6 +403,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
 			}
 		}
 	}
+	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+		unsigned short xdp;
+
+		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+			n_stats++;
+			if (strings) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "tx-xdp-cpu-%hu.tx_packets", xdp);
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+
 	return n_stats;
 }
 
@@ -509,6 +526,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
 			data++;
 		}
 	}
+	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+		int xdp;
+
+		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+			data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
+			data++;
+		}
+	}
 
 	efx_ptp_update_stats(efx, data);
 }
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 284a1b047ac2..04e49eac7327 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -27,6 +27,7 @@
 #include <linux/i2c.h>
 #include <linux/mtd/mtd.h>
 #include <net/busy_poll.h>
+#include <net/xdp.h>
 
 #include "enum.h"
 #include "bitfield.h"
@@ -136,7 +137,8 @@ struct efx_special_buffer {
  * struct efx_tx_buffer - buffer state for a TX descriptor
  * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
  *	freed when descriptor completes
- * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor.
+ * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
+ *	member is the associated buffer to drop a page reference on.
  * @dma_addr: DMA address of the fragment.
  * @flags: Flags for allocation and DMA mapping type
  * @len: Length of this fragment.
@@ -146,7 +148,10 @@ struct efx_special_buffer {
  * Only valid if @unmap_len != 0.
  */
 struct efx_tx_buffer {
-	const struct sk_buff *skb;
+	union {
+		const struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
 	union {
 		efx_qword_t option;
 		dma_addr_t dma_addr;
@@ -160,6 +165,7 @@ struct efx_tx_buffer {
 #define EFX_TX_BUF_SKB		2	/* buffer is last part of skb */
 #define EFX_TX_BUF_MAP_SINGLE	8	/* buffer was mapped with dma_map_single() */
 #define EFX_TX_BUF_OPTION	0x10	/* empty buffer for option descriptor */
+#define EFX_TX_BUF_XDP		0x20	/* buffer was sent with XDP */
 
 /**
  * struct efx_tx_queue - An Efx TX queue
@@ -189,6 +195,7 @@ struct efx_tx_buffer {
  * @piobuf_offset: Buffer offset to be specified in PIO descriptors
  * @initialised: Has hardware queue been initialised?
  * @timestamping: Is timestamping enabled for this channel?
+ * @xdp_tx: Is this an XDP tx queue?
  * @handle_tso: TSO xmit preparation handler.  Sets up the TSO metadata and
  *	may also map tx data, depending on the nature of the TSO implementation.
  * @read_count: Current read pointer.
@@ -250,6 +257,7 @@ struct efx_tx_queue {
 	unsigned int piobuf_offset;
 	bool initialised;
 	bool timestamping;
+	bool xdp_tx;
 
 	/* Function pointers used in the fast path. */
 	int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
@@ -363,6 +371,8 @@ struct efx_rx_page_state {
  *	refill was triggered.
  * @recycle_count: RX buffer recycle counter.
  * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
+ * @xdp_rxq_info: XDP specific RX queue information.
+ * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?.
  */
 struct efx_rx_queue {
 	struct efx_nic *efx;
@@ -394,6 +404,8 @@ struct efx_rx_queue {
 	unsigned int slow_fill_count;
 	/* Statistics to supplement MAC stats */
 	unsigned long rx_packets;
+	struct xdp_rxq_info xdp_rxq_info;
+	bool xdp_rxq_info_valid;
 };
 
 enum efx_sync_events_state {
@@ -441,6 +453,10 @@ enum efx_sync_events_state {
  *	lack of descriptors
  * @n_rx_merge_events: Number of RX merged completion events
  * @n_rx_merge_packets: Number of RX packets completed by merged events
+ * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP
+ * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors
+ * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP
+ * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP
  * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
  *	__efx_rx_packet(), or zero if there is none
  * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
@@ -494,6 +510,10 @@ struct efx_channel {
 	unsigned int n_rx_nodesc_trunc;
 	unsigned int n_rx_merge_events;
 	unsigned int n_rx_merge_packets;
+	unsigned int n_rx_xdp_drops;
+	unsigned int n_rx_xdp_bad_drops;
+	unsigned int n_rx_xdp_tx;
+	unsigned int n_rx_xdp_redirect;
 
 	unsigned int rx_pkt_n_frags;
 	unsigned int rx_pkt_index;
@@ -818,6 +838,8 @@ struct efx_async_filter_insertion {
  * @msi_context: Context for each MSI
  * @extra_channel_types: Types of extra (non-traffic) channels that
  *	should be allocated for this NIC
+ * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues.
+ * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit.
  * @rxq_entries: Size of receive queues requested by user.
  * @txq_entries: Size of transmit queues requested by user.
  * @txq_stop_thresh: TX queue fill level at or above which we stop it.
@@ -830,6 +852,9 @@ struct efx_async_filter_insertion {
  * @n_rx_channels: Number of channels used for RX (= number of RX queues)
  * @n_tx_channels: Number of channels used for TX
  * @n_extra_tx_channels: Number of extra channels with TX queues
+ * @n_xdp_channels: Number of channels used for XDP TX
+ * @xdp_channel_offset: Offset of zeroth channel used for XPD TX.
+ * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel.
  * @rx_ip_align: RX DMA address offset to have IP header aligned in
  *	in accordance with NET_IP_ALIGN
  * @rx_dma_len: Current maximum RX DMA length
@@ -894,6 +919,7 @@ struct efx_async_filter_insertion {
  * @loopback_mode: Loopback status
  * @loopback_modes: Supported loopback mode bitmask
  * @loopback_selftest: Offline self-test private state
+ * @xdp_prog: Current XDP programme for this interface
  * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
  * @filter_state: Architecture-dependent filter table state
  * @rps_mutex: Protects RPS state of all channels
@@ -919,6 +945,8 @@ struct efx_async_filter_insertion {
  * @ptp_data: PTP state data
  * @ptp_warned: has this NIC seen and warned about unexpected PTP events?
  * @vpd_sn: Serial number read from VPD
+ * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
+ *      xdp_rxq_info structures?
  * @monitor_work: Hardware monitor workitem
  * @biu_lock: BIU (bus interface unit) lock
  * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
@@ -966,6 +994,9 @@ struct efx_nic {
 	const struct efx_channel_type *
 	extra_channel_type[EFX_MAX_EXTRA_CHANNELS];
 
+	unsigned int xdp_tx_queue_count;
+	struct efx_tx_queue **xdp_tx_queues;
+
 	unsigned rxq_entries;
 	unsigned txq_entries;
 	unsigned int txq_stop_thresh;
@@ -984,6 +1015,9 @@ struct efx_nic {
 	unsigned tx_channel_offset;
 	unsigned n_tx_channels;
 	unsigned n_extra_tx_channels;
+	unsigned int n_xdp_channels;
+	unsigned int xdp_channel_offset;
+	unsigned int xdp_tx_per_channel;
 	unsigned int rx_ip_align;
 	unsigned int rx_dma_len;
 	unsigned int rx_buffer_order;
@@ -1053,6 +1087,10 @@ struct efx_nic {
 	u64 loopback_modes;
 
 	void *loopback_selftest;
+	/* We access loopback_selftest immediately before running XDP,
+	 * so we want them next to each other.
+	 */
+	struct bpf_prog __rcu *xdp_prog;
 
 	struct rw_semaphore filter_sem;
 	void *filter_state;
@@ -1082,6 +1120,7 @@ struct efx_nic {
 	bool ptp_warned;
 
 	char *vpd_sn;
+	bool xdp_rxq_info_failed;
 
 	/* The following fields may be written more often */
 
@@ -1473,10 +1512,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
 	return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
 }
 
+static inline struct efx_channel *
+efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
+{
+	EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels);
+	return efx->channel[efx->xdp_channel_offset + index];
+}
+
+static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
+{
+	return channel->channel - channel->efx->xdp_channel_offset <
+	       channel->efx->n_xdp_channels;
+}
+
 static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
 {
-	return channel->type && channel->type->want_txqs &&
-				channel->type->want_txqs(channel);
+	return efx_channel_is_xdp_tx(channel) ||
+	       (channel->type && channel->type->want_txqs &&
+		channel->type->want_txqs(channel));
 }
 
 static inline struct efx_tx_queue *
@@ -1500,7 +1553,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue)
 	else								\
 		for (_tx_queue = (_channel)->tx_queue;			\
 		     _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \
-			     efx_tx_queue_used(_tx_queue);		\
+			     (efx_tx_queue_used(_tx_queue) ||            \
+			      efx_channel_is_xdp_tx(_channel));		\
 		     _tx_queue++)
 
 /* Iterate over all possible TX queues belonging to a channel */
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 85ec07f5a674..a7d9841105d8 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -17,6 +17,8 @@
 #include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/xdp.h>
+#include <linux/bpf_trace.h>
 #include "net_driver.h"
 #include "efx.h"
 #include "filter.h"
@@ -27,6 +29,9 @@
 /* Preferred number of descriptors to fill at once */
 #define EFX_RX_PREFERRED_BATCH 8U
 
+/* Maximum rx prefix used by any architecture. */
+#define EFX_MAX_RX_PREFIX_SIZE 16
+
 /* Number of RX buffers to recycle pages for.  When creating the RX page recycle
  * ring, this number is divided by the number of buffers per page to calculate
  * the number of pages to store in the RX page recycle ring.
@@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx)
 				      EFX_RX_BUF_ALIGNMENT);
 	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
 		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
-		 efx->rx_page_buf_step);
+		(efx->rx_page_buf_step + XDP_PACKET_HEADROOM));
 	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
 		efx->rx_bufs_per_page;
 	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
@@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
 		page_offset = sizeof(struct efx_rx_page_state);
 
 		do {
+			page_offset += XDP_PACKET_HEADROOM;
+			dma_addr += XDP_PACKET_HEADROOM;
+
 			index = rx_queue->added_count & rx_queue->ptr_mask;
 			rx_buf = efx_rx_buffer(rx_queue, index);
 			rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
@@ -635,6 +643,123 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
 		netif_receive_skb(skb);
 }
 
+/** efx_do_xdp: perform XDP processing on a received packet
+ *
+ * Returns true if packet should still be delivered.
+ */
+static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
+		       struct efx_rx_buffer *rx_buf, u8 **ehp)
+{
+	u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
+	struct efx_rx_queue *rx_queue;
+	struct bpf_prog *xdp_prog;
+	struct xdp_frame *xdpf;
+	struct xdp_buff xdp;
+	u32 xdp_act;
+	s16 offset;
+	int err;
+
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(efx->xdp_prog);
+	if (!xdp_prog) {
+		rcu_read_unlock();
+		return true;
+	}
+
+	rx_queue = efx_channel_get_rx_queue(channel);
+
+	if (unlikely(channel->rx_pkt_n_frags > 1)) {
+		/* We can't do XDP on fragmented packets - drop. */
+		rcu_read_unlock();
+		efx_free_rx_buffers(rx_queue, rx_buf,
+				    channel->rx_pkt_n_frags);
+		if (net_ratelimit())
+			netif_err(efx, rx_err, efx->net_dev,
+				  "XDP is not possible with multiple receive fragments (%d)\n",
+				  channel->rx_pkt_n_frags);
+		channel->n_rx_xdp_bad_drops++;
+		return false;
+	}
+
+	dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
+				rx_buf->len, DMA_FROM_DEVICE);
+
+	/* Save the rx prefix. */
+	EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
+	memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
+	       efx->rx_prefix_size);
+
+	xdp.data = *ehp;
+	xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+
+	/* No support yet for XDP metadata */
+	xdp_set_data_meta_invalid(&xdp);
+	xdp.data_end = xdp.data + rx_buf->len;
+	xdp.rxq = &rx_queue->xdp_rxq_info;
+
+	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	rcu_read_unlock();
+
+	offset = (u8 *)xdp.data - *ehp;
+
+	switch (xdp_act) {
+	case XDP_PASS:
+		/* Fix up rx prefix. */
+		if (offset) {
+			*ehp += offset;
+			rx_buf->page_offset += offset;
+			rx_buf->len -= offset;
+			memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
+			       efx->rx_prefix_size);
+		}
+		break;
+
+	case XDP_TX:
+		/* Buffer ownership passes to tx on success. */
+		xdpf = convert_to_xdp_frame(&xdp);
+		err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
+		if (unlikely(err != 1)) {
+			efx_free_rx_buffers(rx_queue, rx_buf, 1);
+			if (net_ratelimit())
+				netif_err(efx, rx_err, efx->net_dev,
+					  "XDP TX failed (%d)\n", err);
+			channel->n_rx_xdp_bad_drops++;
+		} else {
+			channel->n_rx_xdp_tx++;
+		}
+		break;
+
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
+		if (unlikely(err)) {
+			efx_free_rx_buffers(rx_queue, rx_buf, 1);
+			if (net_ratelimit())
+				netif_err(efx, rx_err, efx->net_dev,
+					  "XDP redirect failed (%d)\n", err);
+			channel->n_rx_xdp_bad_drops++;
+		} else {
+			channel->n_rx_xdp_redirect++;
+		}
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(xdp_act);
+		efx_free_rx_buffers(rx_queue, rx_buf, 1);
+		channel->n_rx_xdp_bad_drops++;
+		break;
+
+	case XDP_ABORTED:
+		trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+		/* Fall through */
+	case XDP_DROP:
+		efx_free_rx_buffers(rx_queue, rx_buf, 1);
+		channel->n_rx_xdp_drops++;
+		break;
+	}
+
+	return xdp_act == XDP_PASS;
+}
+
 /* Handle a received packet.  Second half: Touches packet payload. */
 void __efx_rx_packet(struct efx_channel *channel)
 {
@@ -663,6 +788,9 @@ void __efx_rx_packet(struct efx_channel *channel)
 		goto out;
 	}
 
+	if (!efx_do_xdp(efx, channel, rx_buf, &eh))
+		goto out;
+
 	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
 		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
 
@@ -731,6 +859,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 {
 	struct efx_nic *efx = rx_queue->efx;
 	unsigned int max_fill, trigger, max_trigger;
+	int rc = 0;
 
 	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
 		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
@@ -764,6 +893,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
 	rx_queue->fast_fill_trigger = trigger;
 	rx_queue->refill_enabled = true;
 
+	/* Initialise XDP queue information */
+	rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+			      rx_queue->core_index);
+
+	if (rc) {
+		netif_err(efx, rx_err, efx->net_dev,
+			  "Failure to initialise XDP queue information rc=%d\n",
+			  rc);
+		efx->xdp_rxq_info_failed = true;
+	} else {
+		rx_queue->xdp_rxq_info_valid = true;
+	}
+
 	/* Set up RX descriptor ring */
 	efx_nic_init_rx(rx_queue);
 }
@@ -805,6 +947,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
 	}
 	kfree(rx_queue->page_ring);
 	rx_queue->page_ring = NULL;
+
+	if (rx_queue->xdp_rxq_info_valid)
+		xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+	rx_queue->xdp_rxq_info_valid = false;
 }
 
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 65e81ec1b314..00c1c4402451 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -95,6 +95,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
 			   "TX queue %d transmission id %x complete\n",
 			   tx_queue->queue, tx_queue->read_count);
+	} else if (buffer->flags & EFX_TX_BUF_XDP) {
+		xdp_return_frame_rx_napi(buffer->xdpf);
 	}
 
 	buffer->len = 0;
@@ -597,6 +599,94 @@ err:
 	return NETDEV_TX_OK;
 }
 
+static void efx_xdp_return_frames(int n,  struct xdp_frame **xdpfs)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		xdp_return_frame_rx_napi(xdpfs[i]);
+}
+
+/* Transmit a packet from an XDP buffer
+ *
+ * Returns number of packets sent on success, error code otherwise.
+ * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC
+ * (for XDP redirect).
+ */
+int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+		       bool flush)
+{
+	struct efx_tx_buffer *tx_buffer;
+	struct efx_tx_queue *tx_queue;
+	struct xdp_frame *xdpf;
+	dma_addr_t dma_addr;
+	unsigned int len;
+	int space;
+	int cpu;
+	int i;
+
+	cpu = raw_smp_processor_id();
+
+	if (!efx->xdp_tx_queue_count ||
+	    unlikely(cpu >= efx->xdp_tx_queue_count))
+		return -EINVAL;
+
+	tx_queue = efx->xdp_tx_queues[cpu];
+	if (unlikely(!tx_queue))
+		return -EINVAL;
+
+	if (unlikely(n && !xdpfs))
+		return -EINVAL;
+
+	if (!n)
+		return 0;
+
+	/* Check for available space. We should never need multiple
+	 * descriptors per frame.
+	 */
+	space = efx->txq_entries +
+		tx_queue->read_count - tx_queue->insert_count;
+
+	for (i = 0; i < n; i++) {
+		xdpf = xdpfs[i];
+
+		if (i >= space)
+			break;
+
+		/* We'll want a descriptor for this tx. */
+		prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue));
+
+		len = xdpf->len;
+
+		/* Map for DMA. */
+		dma_addr = dma_map_single(&efx->pci_dev->dev,
+					  xdpf->data, len,
+					  DMA_TO_DEVICE);
+		if (dma_mapping_error(&efx->pci_dev->dev, dma_addr))
+			break;
+
+		/*  Create descriptor and set up for unmapping DMA. */
+		tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+		tx_buffer->xdpf = xdpf;
+		tx_buffer->flags = EFX_TX_BUF_XDP |
+				   EFX_TX_BUF_MAP_SINGLE;
+		tx_buffer->dma_offset = 0;
+		tx_buffer->unmap_len = len;
+		tx_queue->tx_packets++;
+	}
+
+	/* Pass mapped frames to hardware. */
+	if (flush && i > 0)
+		efx_nic_push_buffers(tx_queue);
+
+	if (i == 0)
+		return -EIO;
+
+	efx_xdp_return_frames(n - i, xdpfs + i);
+
+	return i;
+}
+
 /* Remove packets from the TX queue
  *
  * This removes packets from the TX queue, up to and including the
@@ -857,6 +947,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 	tx_queue->completed_timestamp_major = 0;
 	tx_queue->completed_timestamp_minor = 0;
 
+	tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+
 	/* Set up default function pointers. These may get replaced by
 	 * efx_nic_init_tx() based off NIC/queue capabilities.
 	 */
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index deb636d653f3..d242906ae233 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -48,7 +48,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/skbuff.h>
-#include <linux/dma-direct.h>
+#include <linux/dma-mapping.h>
 
 #include <net/ip.h>
 
@@ -89,6 +89,7 @@ struct ioc3_private {
 	struct device *dma_dev;
 	u32 *ssram;
 	unsigned long *rxr;		/* pointer to receiver ring */
+	void *tx_ring;
 	struct ioc3_etxd *txr;
 	dma_addr_t rxr_dma;
 	dma_addr_t txr_dma;
@@ -1173,26 +1174,14 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct ioc3 *ioc3;
 	unsigned long ioc3_base, ioc3_size;
 	u32 vendor, model, rev;
-	int err, pci_using_dac;
+	int err;
 
 	/* Configure DMA attributes. */
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (!err) {
-		pci_using_dac = 1;
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (err < 0) {
-			pr_err("%s: Unable to obtain 64 bit DMA for consistent allocations\n",
-			       pci_name(pdev));
-			goto out;
-		}
-	} else {
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			pr_err("%s: No usable DMA configuration, aborting.\n",
-			       pci_name(pdev));
-			goto out;
-		}
-		pci_using_dac = 0;
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		pr_err("%s: No usable DMA configuration, aborting.\n",
+		       pci_name(pdev));
+		goto out;
 	}
 
 	if (pci_enable_device(pdev))
@@ -1204,9 +1193,6 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_disable;
 	}
 
-	if (pci_using_dac)
-		dev->features |= NETIF_F_HIGHDMA;
-
 	err = pci_request_regions(pdev, "ioc3");
 	if (err)
 		goto out_free;
@@ -1242,8 +1228,8 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ioc3_stop(ip);
 
 	/* Allocate rx ring.  4kb = 512 entries, must be 4kb aligned */
-	ip->rxr = dma_direct_alloc_pages(ip->dma_dev, RX_RING_SIZE,
-					 &ip->rxr_dma, GFP_ATOMIC, 0);
+	ip->rxr = dma_alloc_coherent(ip->dma_dev, RX_RING_SIZE, &ip->rxr_dma,
+				     GFP_KERNEL);
 	if (!ip->rxr) {
 		pr_err("ioc3-eth: rx ring allocation failed\n");
 		err = -ENOMEM;
@@ -1251,14 +1237,16 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Allocate tx rings.  16kb = 128 bufs, must be 16kb aligned  */
-	ip->txr = dma_direct_alloc_pages(ip->dma_dev, TX_RING_SIZE,
-					 &ip->txr_dma,
-					 GFP_KERNEL | __GFP_ZERO, 0);
-	if (!ip->txr) {
+	ip->tx_ring = dma_alloc_coherent(ip->dma_dev, TX_RING_SIZE + SZ_16K - 1,
+					 &ip->txr_dma, GFP_KERNEL);
+	if (!ip->tx_ring) {
 		pr_err("ioc3-eth: tx ring allocation failed\n");
 		err = -ENOMEM;
 		goto out_stop;
 	}
+	/* Align TX ring */
+	ip->txr = PTR_ALIGN(ip->tx_ring, SZ_16K);
+	ip->txr_dma = ALIGN(ip->txr_dma, SZ_16K);
 
 	ioc3_init(dev);
 
@@ -1288,7 +1276,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->netdev_ops		= &ioc3_netdev_ops;
 	dev->ethtool_ops	= &ioc3_ethtool_ops;
 	dev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-	dev->features		= NETIF_F_IP_CSUM;
+	dev->features		= NETIF_F_IP_CSUM | NETIF_F_HIGHDMA;
 
 	sw_physid1 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID1);
 	sw_physid2 = ioc3_mdio_read(dev, ip->mii.phy_id, MII_PHYSID2);
@@ -1313,11 +1301,11 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 out_stop:
 	del_timer_sync(&ip->ioc3_timer);
 	if (ip->rxr)
-		dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
-				      ip->rxr_dma, 0);
-	if (ip->txr)
-		dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
-				      ip->txr_dma, 0);
+		dma_free_coherent(ip->dma_dev, RX_RING_SIZE, ip->rxr,
+				  ip->rxr_dma);
+	if (ip->tx_ring)
+		dma_free_coherent(ip->dma_dev, TX_RING_SIZE, ip->tx_ring,
+				  ip->txr_dma);
 out_res:
 	pci_release_regions(pdev);
 out_free:
@@ -1335,10 +1323,8 @@ static void ioc3_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct ioc3_private *ip = netdev_priv(dev);
 
-	dma_direct_free_pages(ip->dma_dev, RX_RING_SIZE, ip->rxr,
-			      ip->rxr_dma, 0);
-	dma_direct_free_pages(ip->dma_dev, TX_RING_SIZE, ip->txr,
-			      ip->txr_dma, 0);
+	dma_free_coherent(ip->dma_dev, RX_RING_SIZE, ip->rxr, ip->rxr_dma);
+	dma_free_coherent(ip->dma_dev, TX_RING_SIZE, ip->tx_ring, ip->txr_dma);
 
 	unregister_netdev(dev);
 	del_timer_sync(&ip->ioc3_timer);
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f9e6744d8fd6..869a498e3b5e 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -252,7 +252,6 @@
 #define NETSEC_XDP_CONSUMED      BIT(0)
 #define NETSEC_XDP_TX            BIT(1)
 #define NETSEC_XDP_REDIR         BIT(2)
-#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
 
 enum ring_id {
 	NETSEC_RING_TX = 0,
@@ -661,6 +660,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
 			bytes += desc->skb->len;
 			dev_kfree_skb(desc->skb);
 		} else {
+			bytes += desc->xdpf->len;
 			xdp_return_frame(desc->xdpf);
 		}
 next:
@@ -847,8 +847,8 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
 		enum dma_data_direction dma_dir =
 			page_pool_get_dma_dir(rx_ring->page_pool);
 
-		dma_handle = page_pool_get_dma_addr(page) +
-			NETSEC_RXBUF_HEADROOM;
+		dma_handle = page_pool_get_dma_addr(page) + xdpf->headroom +
+			sizeof(*xdpf);
 		dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
 					   dma_dir);
 		tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
@@ -858,6 +858,7 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
 	tx_desc.addr = xdpf->data;
 	tx_desc.len = xdpf->len;
 
+	netdev_sent_queue(priv->ndev, xdpf->len);
 	netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
 
 	return NETSEC_XDP_TX;
@@ -1030,7 +1031,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 
 next:
 		if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
-		    xdp_result & NETSEC_XDP_RX_OK) {
+		    xdp_result) {
 			ndev->stats.rx_packets++;
 			ndev->stats.rx_bytes += xdp.data_end - xdp.data;
 		}
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 6e984d5a729f..f7e927ad67fa 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1565,10 +1565,10 @@ static int ave_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	np = dev->of_node;
-	phy_mode = of_get_phy_mode(np);
-	if ((int)phy_mode < 0) {
+	ret = of_get_phy_mode(np, &phy_mode);
+	if (ret) {
 		dev_err(dev, "phy-mode not found\n");
-		return -EINVAL;
+		return ret;
 	}
 
 	irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 527f93320a5a..d0d2d0fc5f0a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -61,9 +61,10 @@ static void anarion_gmac_exit(struct platform_device *pdev, void *priv)
 
 static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
 {
-	int phy_mode;
-	void __iomem *ctl_block;
 	struct anarion_gmac *gmac;
+	phy_interface_t phy_mode;
+	void __iomem *ctl_block;
+	int err;
 
 	ctl_block = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(ctl_block)) {
@@ -78,7 +79,10 @@ static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
 
 	gmac->ctl_block = (uintptr_t)ctl_block;
 
-	phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	err = of_get_phy_mode(pdev->dev.of_node, &phy_mode);
+	if (err)
+		return ERR_PTR(err);
+
 	switch (phy_mode) {
 	case PHY_INTERFACE_MODE_RGMII:		/* Fall through */
 	case PHY_INTERFACE_MODE_RGMII_ID	/* Fall through */:
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 0d21082ceb93..6ae13dc19510 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -189,9 +189,10 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed)
 static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 {
 	struct device *dev = &gmac->pdev->dev;
+	int ret;
 
-	gmac->phy_mode = of_get_phy_mode(dev->of_node);
-	if ((int)gmac->phy_mode < 0) {
+	ret = of_get_phy_mode(dev->of_node, &gmac->phy_mode);
+	if (ret) {
 		dev_err(dev, "missing phy mode property\n");
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 79f2ee37afed..bdb80421acac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -54,7 +54,7 @@ struct mediatek_dwmac_plat_data {
 	struct device_node *np;
 	struct regmap *peri_regmap;
 	struct device *dev;
-	int phy_mode;
+	phy_interface_t phy_mode;
 	bool rmii_rxc;
 };
 
@@ -130,6 +130,31 @@ static void mt2712_delay_ps2stage(struct mediatek_dwmac_plat_data *plat)
 	}
 }
 
+static void mt2712_delay_stage2ps(struct mediatek_dwmac_plat_data *plat)
+{
+	struct mac_delay_struct *mac_delay = &plat->mac_delay;
+
+	switch (plat->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_RMII:
+		/* 550ps per stage for MII/RMII */
+		mac_delay->tx_delay *= 550;
+		mac_delay->rx_delay *= 550;
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		/* 170ps per stage for RGMII */
+		mac_delay->tx_delay *= 170;
+		mac_delay->rx_delay *= 170;
+		break;
+	default:
+		dev_err(plat->dev, "phy interface not supported\n");
+		break;
+	}
+}
+
 static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
 {
 	struct mac_delay_struct *mac_delay = &plat->mac_delay;
@@ -199,6 +224,8 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
 	regmap_write(plat->peri_regmap, PERI_ETH_DLY, delay_val);
 	regmap_write(plat->peri_regmap, PERI_ETH_DLY_FINE, fine_val);
 
+	mt2712_delay_stage2ps(plat);
+
 	return 0;
 }
 
@@ -216,6 +243,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
 {
 	struct mac_delay_struct *mac_delay = &plat->mac_delay;
 	u32 tx_delay_ps, rx_delay_ps;
+	int err;
 
 	plat->peri_regmap = syscon_regmap_lookup_by_phandle(plat->np, "mediatek,pericfg");
 	if (IS_ERR(plat->peri_regmap)) {
@@ -223,10 +251,10 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
 		return PTR_ERR(plat->peri_regmap);
 	}
 
-	plat->phy_mode = of_get_phy_mode(plat->np);
-	if (plat->phy_mode < 0) {
+	err = of_get_phy_mode(plat->np, &plat->phy_mode);
+	if (err) {
 		dev_err(plat->dev, "not find phy-mode\n");
-		return -EINVAL;
+		return err;
 	}
 
 	if (!of_property_read_u32(plat->np, "mediatek,tx-delay-ps", &tx_delay_ps)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 306da8f6b7d5..bd6c01004913 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -338,10 +338,9 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 	}
 
 	dwmac->dev = &pdev->dev;
-	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-	if ((int)dwmac->phy_mode < 0) {
+	ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode);
+	if (ret) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
-		ret = -EINVAL;
 		goto err_remove_config_dt;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index e2e469c37a4d..dc50ba13a746 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -37,7 +37,7 @@ struct rk_gmac_ops {
 
 struct rk_priv_data {
 	struct platform_device *pdev;
-	int phy_iface;
+	phy_interface_t phy_iface;
 	struct regulator *regulator;
 	bool suspended;
 	const struct rk_gmac_ops *ops;
@@ -1224,7 +1224,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 	if (!bsp_priv)
 		return ERR_PTR(-ENOMEM);
 
-	bsp_priv->phy_iface = of_get_phy_mode(dev->of_node);
+	of_get_phy_mode(dev->of_node, &bsp_priv->phy_iface);
 	bsp_priv->ops = ops;
 
 	bsp_priv->regulator = devm_regulator_get_optional(dev, "phy");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index e9fd661f7995..e1b63df6f96f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -116,7 +116,7 @@
 #define ETH_PHY_SEL_MII		0x0
 
 struct sti_dwmac {
-	int interface;		/* MII interface */
+	phy_interface_t interface;	/* MII interface */
 	bool ext_phyclk;	/* Clock from external PHY */
 	u32 tx_retime_src;	/* TXCLK Retiming*/
 	struct clk *clk;	/* PHY clock */
@@ -269,7 +269,12 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 		return err;
 	}
 
-	dwmac->interface = of_get_phy_mode(np);
+	err = of_get_phy_mode(np, &dwmac->interface);
+	if (err && err != -ENODEV) {
+		dev_err(dev, "Can't get phy-mode\n");
+		return err;
+	}
+
 	dwmac->regmap = regmap;
 	dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en");
 	dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 4ef041bdf6a1..95ffaec4ab42 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -155,18 +155,14 @@ static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
 		ret = clk_prepare_enable(dwmac->syscfg_clk);
 		if (ret)
 			return ret;
-
-		if (dwmac->clk_eth_ck) {
-			ret = clk_prepare_enable(dwmac->clk_eth_ck);
-			if (ret) {
-				clk_disable_unprepare(dwmac->syscfg_clk);
+		ret = clk_prepare_enable(dwmac->clk_eth_ck);
+		if (ret) {
+			clk_disable_unprepare(dwmac->syscfg_clk);
 				return ret;
-			}
 		}
 	} else {
 		clk_disable_unprepare(dwmac->syscfg_clk);
-		if (dwmac->clk_eth_ck)
-			clk_disable_unprepare(dwmac->clk_eth_ck);
+		clk_disable_unprepare(dwmac->clk_eth_ck);
 	}
 	return ret;
 }
@@ -175,7 +171,7 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 {
 	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
 	u32 reg = dwmac->mode_reg;
-	int val, ret;
+	int val;
 
 	switch (plat_dat->interface) {
 	case PHY_INTERFACE_MODE_MII:
@@ -211,8 +207,8 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 	}
 
 	/* Need to update PMCCLRR (clear register) */
-	ret = regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
-			   dwmac->ops->syscfg_eth_mask);
+	regmap_write(dwmac->regmap, reg + SYSCFG_PMCCLRR_OFFSET,
+		     dwmac->ops->syscfg_eth_mask);
 
 	/* Update PMCSETR (set register) */
 	return regmap_update_bits(dwmac->regmap, reg,
@@ -320,12 +316,10 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 		return PTR_ERR(dwmac->clk_ethstp);
 	}
 
-	/*  Clock for sysconfig */
+	/*  Optional Clock for sysconfig */
 	dwmac->syscfg_clk = devm_clk_get(dev, "syscfg-clk");
-	if (IS_ERR(dwmac->syscfg_clk)) {
-		dev_err(dev, "No syscfg clock provided...\n");
-		return PTR_ERR(dwmac->syscfg_clk);
-	}
+	if (IS_ERR(dwmac->syscfg_clk))
+		dwmac->syscfg_clk = NULL;
 
 	/* Get IRQ information early to have an ability to ask for deferred
 	 * probe if needed before we went too far with resource allocation.
@@ -437,8 +431,7 @@ static int stm32mp1_suspend(struct stm32_dwmac *dwmac)
 
 	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->syscfg_clk);
-	if (dwmac->clk_eth_ck)
-		clk_disable_unprepare(dwmac->clk_eth_ck);
+	clk_disable_unprepare(dwmac->clk_eth_ck);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index ddcc191febdb..eefb06d918c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1105,6 +1105,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	struct stmmac_resources stmmac_res;
 	struct sunxi_priv_data *gmac;
 	struct device *dev = &pdev->dev;
+	phy_interface_t interface;
 	int ret;
 	struct stmmac_priv *priv;
 	struct net_device *ndev;
@@ -1178,10 +1179,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = of_get_phy_mode(dev->of_node);
-	if (ret < 0)
+	ret = of_get_phy_mode(dev->of_node, &interface);
+	if (ret)
 		return -EINVAL;
-	plat_dat->interface = ret;
+	plat_dat->interface = interface;
 
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index a299da3971b4..26353ef616b8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -18,7 +18,7 @@
 #include "stmmac_platform.h"
 
 struct sunxi_priv_data {
-	int interface;
+	phy_interface_t interface;
 	int clk_enabled;
 	struct clk *tx_clk;
 	struct regulator *regulator;
@@ -118,7 +118,11 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	gmac->interface = of_get_phy_mode(dev->of_node);
+	ret = of_get_phy_mode(dev->of_node, &gmac->interface);
+	if (ret && ret != -ENODEV) {
+		dev_err(dev, "Can't get phy-mode\n");
+		goto err_remove_config_dt;
+	}
 
 	gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx");
 	if (IS_ERR(gmac->tx_clk)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 3d69da112625..d0356fbd1e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -130,7 +130,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
 		writel(mcfilterbits[0], ioaddr + GMAC_HASH_LOW);
 		writel(mcfilterbits[1], ioaddr + GMAC_HASH_HIGH);
 		return;
-		break;
 	case 7:
 		numhashregs = 4;
 		break;
@@ -140,7 +139,6 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
 	default:
 		pr_debug("STMMAC: err in setting multicast filter\n");
 		return;
-		break;
 	}
 	for (regs = 0; regs < numhashregs; regs++)
 		writel(mcfilterbits[regs],
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 89a3420eba42..07e97f45755d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -43,6 +43,10 @@
 #define GMAC_ARP_ADDR			0x00000210
 #define GMAC_ADDR_HIGH(reg)		(0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)		(0x304 + reg * 8)
+#define GMAC_L3L4_CTRL(reg)		(0x900 + (reg) * 0x30)
+#define GMAC_L4_ADDR(reg)		(0x904 + (reg) * 0x30)
+#define GMAC_L3_ADDR0(reg)		(0x910 + (reg) * 0x30)
+#define GMAC_L3_ADDR1(reg)		(0x914 + (reg) * 0x30)
 
 /* RX Queues Routing */
 #define GMAC_RXQCTRL_AVCPQ_MASK		GENMASK(2, 0)
@@ -67,6 +71,7 @@
 #define GMAC_PACKET_FILTER_PCF		BIT(7)
 #define GMAC_PACKET_FILTER_HPF		BIT(10)
 #define GMAC_PACKET_FILTER_VTFE		BIT(16)
+#define GMAC_PACKET_FILTER_IPFE		BIT(20)
 
 #define GMAC_MAX_PERFECT_ADDRESSES	128
 
@@ -202,9 +207,11 @@ enum power_event {
 #define GMAC_HW_FEAT_MIISEL		BIT(0)
 
 /* MAC HW features1 bitmap */
+#define GMAC_HW_FEAT_L3L4FNUM		GENMASK(30, 27)
 #define GMAC_HW_HASH_TB_SZ		GENMASK(25, 24)
 #define GMAC_HW_FEAT_AVSEL		BIT(20)
 #define GMAC_HW_TSOEN			BIT(18)
+#define GMAC_HW_ADDR64			GENMASK(15, 14)
 #define GMAC_HW_TXFIFOSIZE		GENMASK(10, 6)
 #define GMAC_HW_RXFIFOSIZE		GENMASK(4, 0)
 
@@ -227,6 +234,21 @@ enum power_event {
 #define GMAC_HI_DCS_SHIFT		16
 #define GMAC_HI_REG_AE			BIT(31)
 
+/* L3/L4 Filters regs */
+#define GMAC_L4DPIM0			BIT(21)
+#define GMAC_L4DPM0			BIT(20)
+#define GMAC_L4SPIM0			BIT(19)
+#define GMAC_L4SPM0			BIT(18)
+#define GMAC_L4PEN0			BIT(16)
+#define GMAC_L3DAIM0			BIT(5)
+#define GMAC_L3DAM0			BIT(4)
+#define GMAC_L3SAIM0			BIT(3)
+#define GMAC_L3SAM0			BIT(2)
+#define GMAC_L3PEN0			BIT(0)
+#define GMAC_L4DP0			GENMASK(31, 16)
+#define GMAC_L4DP0_SHIFT		16
+#define GMAC_L4SP0			GENMASK(15, 0)
+
 /*  MTL registers */
 #define MTL_OPERATION_MODE		0x00000c00
 #define MTL_FRPE			BIT(15)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 66e60c7e9850..a2ecab5dc8c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -733,7 +733,7 @@ static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
 }
 
 static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash,
-				    bool is_double)
+				    u16 perfect_match, bool is_double)
 {
 	void __iomem *ioaddr = hw->pcsr;
 
@@ -748,6 +748,16 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash,
 		}
 
 		writel(value, ioaddr + GMAC_VLAN_TAG);
+	} else if (perfect_match) {
+		u32 value = GMAC_VLAN_ETV;
+
+		if (is_double) {
+			value |= GMAC_VLAN_EDVLP;
+			value |= GMAC_VLAN_ESVL;
+			value |= GMAC_VLAN_DOVLTC;
+		}
+
+		writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG);
 	} else {
 		u32 value = readl(ioaddr + GMAC_VLAN_TAG);
 
@@ -799,6 +809,106 @@ static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en,
 	writel(value, ioaddr + GMAC_CONFIG);
 }
 
+static int dwmac4_config_l3_filter(struct mac_device_info *hw, u32 filter_no,
+				   bool en, bool ipv6, bool sa, bool inv,
+				   u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + GMAC_PACKET_FILTER);
+	value |= GMAC_PACKET_FILTER_IPFE;
+	writel(value, ioaddr + GMAC_PACKET_FILTER);
+
+	value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+	/* For IPv6 not both SA/DA filters can be active */
+	if (ipv6) {
+		value |= GMAC_L3PEN0;
+		value &= ~(GMAC_L3SAM0 | GMAC_L3SAIM0);
+		value &= ~(GMAC_L3DAM0 | GMAC_L3DAIM0);
+		if (sa) {
+			value |= GMAC_L3SAM0;
+			if (inv)
+				value |= GMAC_L3SAIM0;
+		} else {
+			value |= GMAC_L3DAM0;
+			if (inv)
+				value |= GMAC_L3DAIM0;
+		}
+	} else {
+		value &= ~GMAC_L3PEN0;
+		if (sa) {
+			value |= GMAC_L3SAM0;
+			if (inv)
+				value |= GMAC_L3SAIM0;
+		} else {
+			value |= GMAC_L3DAM0;
+			if (inv)
+				value |= GMAC_L3DAIM0;
+		}
+	}
+
+	writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+	if (sa) {
+		writel(match, ioaddr + GMAC_L3_ADDR0(filter_no));
+	} else {
+		writel(match, ioaddr + GMAC_L3_ADDR1(filter_no));
+	}
+
+	if (!en)
+		writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+	return 0;
+}
+
+static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
+				   bool en, bool udp, bool sa, bool inv,
+				   u32 match)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+
+	value = readl(ioaddr + GMAC_PACKET_FILTER);
+	value |= GMAC_PACKET_FILTER_IPFE;
+	writel(value, ioaddr + GMAC_PACKET_FILTER);
+
+	value = readl(ioaddr + GMAC_L3L4_CTRL(filter_no));
+	if (udp) {
+		value |= GMAC_L4PEN0;
+	} else {
+		value &= ~GMAC_L4PEN0;
+	}
+
+	value &= ~(GMAC_L4SPM0 | GMAC_L4SPIM0);
+	value &= ~(GMAC_L4DPM0 | GMAC_L4DPIM0);
+	if (sa) {
+		value |= GMAC_L4SPM0;
+		if (inv)
+			value |= GMAC_L4SPIM0;
+	} else {
+		value |= GMAC_L4DPM0;
+		if (inv)
+			value |= GMAC_L4DPIM0;
+	}
+
+	writel(value, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+	if (sa) {
+		value = match & GMAC_L4SP0;
+	} else {
+		value = (match << GMAC_L4DP0_SHIFT) & GMAC_L4DP0;
+	}
+
+	writel(value, ioaddr + GMAC_L4_ADDR(filter_no));
+
+	if (!en)
+		writel(0, ioaddr + GMAC_L3L4_CTRL(filter_no));
+
+	return 0;
+}
+
 const struct stmmac_ops dwmac4_ops = {
 	.core_init = dwmac4_core_init,
 	.set_mac = stmmac_set_mac,
@@ -828,11 +938,14 @@ const struct stmmac_ops dwmac4_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.flex_pps_config = dwmac5_flex_pps_config,
 	.set_mac_loopback = dwmac4_set_mac_loopback,
 	.update_vlan_hash = dwmac4_update_vlan_hash,
 	.sarc_configure = dwmac4_sarc_configure,
 	.enable_vlan = dwmac4_enable_vlan,
 	.set_arp_offload = dwmac4_set_arp_offload,
+	.config_l3_filter = dwmac4_config_l3_filter,
+	.config_l4_filter = dwmac4_config_l4_filter,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -869,6 +982,8 @@ const struct stmmac_ops dwmac410_ops = {
 	.sarc_configure = dwmac4_sarc_configure,
 	.enable_vlan = dwmac4_enable_vlan,
 	.set_arp_offload = dwmac4_set_arp_offload,
+	.config_l3_filter = dwmac4_config_l3_filter,
+	.config_l4_filter = dwmac4_config_l4_filter,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -910,6 +1025,8 @@ const struct stmmac_ops dwmac510_ops = {
 	.sarc_configure = dwmac4_sarc_configure,
 	.enable_vlan = dwmac4_enable_vlan,
 	.set_arp_offload = dwmac4_set_arp_offload,
+	.config_l3_filter = dwmac4_config_l3_filter,
+	.config_l4_filter = dwmac4_config_l4_filter,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 15eb1abba91d..707ab5eba8da 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -431,8 +431,8 @@ static void dwmac4_get_addr(struct dma_desc *p, unsigned int *addr)
 
 static void dwmac4_set_addr(struct dma_desc *p, dma_addr_t addr)
 {
-	p->des0 = cpu_to_le32(addr);
-	p->des1 = 0;
+	p->des0 = cpu_to_le32(lower_32_bits(addr));
+	p->des1 = cpu_to_le32(upper_32_bits(addr));
 }
 
 static void dwmac4_clear(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 68c157979b94..b24c89572745 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -79,6 +79,10 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
 	value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
+		writel(upper_32_bits(dma_rx_phy),
+		       ioaddr + DMA_CHAN_RX_BASE_ADDR_HI(chan));
+
 	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
 }
 
@@ -97,6 +101,10 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
 
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && likely(dma_cfg->eame))
+		writel(upper_32_bits(dma_tx_phy),
+		       ioaddr + DMA_CHAN_TX_BASE_ADDR_HI(chan));
+
 	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
 }
 
@@ -132,6 +140,9 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
 	if (dma_cfg->aal)
 		value |= DMA_SYS_BUS_AAL;
 
+	if (dma_cfg->eame)
+		value |= DMA_SYS_BUS_EAME;
+
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
@@ -353,9 +364,27 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* MAC HW feature1 */
 	hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
+	dma_cap->l3l4fnum = (hw_cap & GMAC_HW_FEAT_L3L4FNUM) >> 27;
 	dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24;
 	dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
 	dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
+
+	dma_cap->addr64 = (hw_cap & GMAC_HW_ADDR64) >> 14;
+	switch (dma_cap->addr64) {
+	case 0:
+		dma_cap->addr64 = 32;
+		break;
+	case 1:
+		dma_cap->addr64 = 40;
+		break;
+	case 2:
+		dma_cap->addr64 = 48;
+		break;
+	default:
+		dma_cap->addr64 = 32;
+		break;
+	}
+
 	/* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
 	 * shifting and store the sizes in bytes.
 	 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index b66da0237d2a..5299fa1001a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -65,6 +65,7 @@
 #define DMA_SYS_BUS_MB			BIT(14)
 #define DMA_AXI_1KBBE			BIT(13)
 #define DMA_SYS_BUS_AAL			BIT(12)
+#define DMA_SYS_BUS_EAME		BIT(11)
 #define DMA_AXI_BLEN256			BIT(7)
 #define DMA_AXI_BLEN128			BIT(6)
 #define DMA_AXI_BLEN64			BIT(5)
@@ -91,7 +92,9 @@
 #define DMA_CHAN_CONTROL(x)		DMA_CHANX_BASE_ADDR(x)
 #define DMA_CHAN_TX_CONTROL(x)		(DMA_CHANX_BASE_ADDR(x) + 0x4)
 #define DMA_CHAN_RX_CONTROL(x)		(DMA_CHANX_BASE_ADDR(x) + 0x8)
+#define DMA_CHAN_TX_BASE_ADDR_HI(x)	(DMA_CHANX_BASE_ADDR(x) + 0x10)
 #define DMA_CHAN_TX_BASE_ADDR(x)	(DMA_CHANX_BASE_ADDR(x) + 0x14)
+#define DMA_CHAN_RX_BASE_ADDR_HI(x)	(DMA_CHANX_BASE_ADDR(x) + 0x18)
 #define DMA_CHAN_RX_BASE_ADDR(x)	(DMA_CHANX_BASE_ADDR(x) + 0x1c)
 #define DMA_CHAN_TX_END_ADDR(x)		(DMA_CHANX_BASE_ADDR(x) + 0x20)
 #define DMA_CHAN_RX_END_ADDR(x)		(DMA_CHANX_BASE_ADDR(x) + 0x28)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 070bd7d1ae4c..9553d2bec1a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -556,7 +556,7 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw,
 }
 
 static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash,
-				      bool is_double)
+				      u16 perfect_match, bool is_double)
 {
 	void __iomem *ioaddr = hw->pcsr;
 
@@ -577,6 +577,21 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash,
 		}
 
 		writel(value, ioaddr + XGMAC_VLAN_TAG);
+	} else if (perfect_match) {
+		u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
+
+		value |= XGMAC_FILTER_VTFE;
+
+		writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+		value = XGMAC_VLAN_ETV;
+		if (is_double) {
+			value |= XGMAC_VLAN_EDVLP;
+			value |= XGMAC_VLAN_ESVL;
+			value |= XGMAC_VLAN_DOVLTC;
+		}
+
+		writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG);
 	} else {
 		u32 value = readl(ioaddr + XGMAC_PACKET_FILTER);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index f70ca5300b82..f148cb2061d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -27,7 +27,10 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
 	if (dma_cfg->aal)
 		value |= XGMAC_AAL;
 
-	writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+	if (dma_cfg->eame)
+		value |= XGMAC_EAME;
+
+	writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
 }
 
 static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index ddb851d99618..1303d1e9a18f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -357,7 +357,7 @@ struct stmmac_ops {
 			     struct stmmac_rss *cfg, u32 num_rxq);
 	/* VLAN */
 	void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
-				 bool is_double);
+				 u16 perfect_match, bool is_double);
 	void (*enable_vlan)(struct mac_device_info *hw, u32 type);
 	/* TX Timestamp */
 	int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f826365c979d..199c4f938bb2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4214,15 +4214,25 @@ static u32 stmmac_vid_crc32_le(__le16 vid_le)
 static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double)
 {
 	u32 crc, hash = 0;
-	u16 vid;
+	int count = 0;
+	u16 vid = 0;
 
 	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
 		__le16 vid_le = cpu_to_le16(vid);
 		crc = bitrev32(~stmmac_vid_crc32_le(vid_le)) >> 28;
 		hash |= (1 << crc);
+		count++;
+	}
+
+	if (!priv->dma_cap.vlhash) {
+		if (count > 2) /* VID = 0 always passes filter */
+			return -EOPNOTSUPP;
+
+		vid = cpu_to_le16(vid);
+		hash = 0;
 	}
 
-	return stmmac_update_vlan_hash(priv, priv->hw, hash, is_double);
+	return stmmac_update_vlan_hash(priv, priv->hw, hash, vid, is_double);
 }
 
 static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid)
@@ -4231,8 +4241,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
 	bool is_double = false;
 	int ret;
 
-	if (!priv->dma_cap.vlhash)
-		return -EOPNOTSUPP;
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -4251,8 +4259,6 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	bool is_double = false;
 
-	if (!priv->dma_cap.vlhash)
-		return -EOPNOTSUPP;
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -4522,6 +4528,13 @@ int stmmac_dvr_probe(struct device *device,
 		if (!ret) {
 			dev_info(priv->device, "Using %d bits DMA width\n",
 				 priv->dma_cap.addr64);
+
+			/*
+			 * If more than 32 bits can be addressed, make sure to
+			 * enable enhanced addressing mode.
+			 */
+			if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+				priv->plat->dma_cfg->eame = true;
 		} else {
 			ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
 			if (ret) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 170c3a052b14..bedaff0c13bd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -412,9 +412,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		*mac = NULL;
 	}
 
-	plat->phy_interface = of_get_phy_mode(np);
-	if (plat->phy_interface < 0)
-		return ERR_PTR(plat->phy_interface);
+	rc = of_get_phy_mode(np, &plat->phy_interface);
+	if (rc)
+		return ERR_PTR(rc);
 
 	plat->interface = stmmac_of_get_mac_mode(np);
 	if (plat->interface < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index ac3f658105c0..f3d8b9336b8e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -877,16 +877,13 @@ out:
 	return 0;
 }
 
-static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+static int __stmmac_test_vlanfilt(struct stmmac_priv *priv)
 {
 	struct stmmac_packet_attrs attr = { };
 	struct stmmac_test_priv *tpriv;
 	struct sk_buff *skb = NULL;
 	int ret = 0, i;
 
-	if (!priv->dma_cap.vlhash)
-		return -EOPNOTSUPP;
-
 	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
 	if (!tpriv)
 		return -ENOMEM;
@@ -952,16 +949,32 @@ cleanup:
 	return ret;
 }
 
-static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
+{
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	return __stmmac_test_vlanfilt(priv);
+}
+
+static int stmmac_test_vlanfilt_perfect(struct stmmac_priv *priv)
+{
+	int ret, prev_cap = priv->dma_cap.vlhash;
+
+	priv->dma_cap.vlhash = 0;
+	ret = __stmmac_test_vlanfilt(priv);
+	priv->dma_cap.vlhash = prev_cap;
+
+	return ret;
+}
+
+static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv)
 {
 	struct stmmac_packet_attrs attr = { };
 	struct stmmac_test_priv *tpriv;
 	struct sk_buff *skb = NULL;
 	int ret = 0, i;
 
-	if (!priv->dma_cap.vlhash)
-		return -EOPNOTSUPP;
-
 	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
 	if (!tpriv)
 		return -ENOMEM;
@@ -1028,6 +1041,25 @@ cleanup:
 	return ret;
 }
 
+static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
+{
+	if (!priv->dma_cap.vlhash)
+		return -EOPNOTSUPP;
+
+	return __stmmac_test_dvlanfilt(priv);
+}
+
+static int stmmac_test_dvlanfilt_perfect(struct stmmac_priv *priv)
+{
+	int ret, prev_cap = priv->dma_cap.vlhash;
+
+	priv->dma_cap.vlhash = 0;
+	ret = __stmmac_test_dvlanfilt(priv);
+	priv->dma_cap.vlhash = prev_cap;
+
+	return ret;
+}
+
 #ifdef CONFIG_NET_CLS_ACT
 static int stmmac_test_rxp(struct stmmac_priv *priv)
 {
@@ -1702,119 +1734,127 @@ static const struct stmmac_test {
 	int (*fn)(struct stmmac_priv *priv);
 } stmmac_selftests[] = {
 	{
-		.name = "MAC Loopback         ",
+		.name = "MAC Loopback               ",
 		.lb = STMMAC_LOOPBACK_MAC,
 		.fn = stmmac_test_mac_loopback,
 	}, {
-		.name = "PHY Loopback         ",
+		.name = "PHY Loopback               ",
 		.lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
 		.fn = stmmac_test_phy_loopback,
 	}, {
-		.name = "MMC Counters         ",
+		.name = "MMC Counters               ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_mmc,
 	}, {
-		.name = "EEE                  ",
+		.name = "EEE                        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_eee,
 	}, {
-		.name = "Hash Filter MC       ",
+		.name = "Hash Filter MC             ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_hfilt,
 	}, {
-		.name = "Perfect Filter UC    ",
+		.name = "Perfect Filter UC          ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_pfilt,
 	}, {
-		.name = "MC Filter            ",
+		.name = "MC Filter                  ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_mcfilt,
 	}, {
-		.name = "UC Filter            ",
+		.name = "UC Filter                  ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_ucfilt,
 	}, {
-		.name = "Flow Control         ",
+		.name = "Flow Control               ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_flowctrl,
 	}, {
-		.name = "RSS                  ",
+		.name = "RSS                        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_rss,
 	}, {
-		.name = "VLAN Filtering       ",
+		.name = "VLAN Filtering             ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_vlanfilt,
 	}, {
-		.name = "Double VLAN Filtering",
+		.name = "VLAN Filtering (perf)      ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_vlanfilt_perfect,
+	}, {
+		.name = "Double VLAN Filter         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_dvlanfilt,
 	}, {
-		.name = "Flexible RX Parser   ",
+		.name = "Double VLAN Filter (perf)  ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_dvlanfilt_perfect,
+	}, {
+		.name = "Flexible RX Parser         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_rxp,
 	}, {
-		.name = "SA Insertion (desc)  ",
+		.name = "SA Insertion (desc)        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_desc_sai,
 	}, {
-		.name = "SA Replacement (desc)",
+		.name = "SA Replacement (desc)      ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_desc_sar,
 	}, {
-		.name = "SA Insertion (reg)  ",
+		.name = "SA Insertion (reg)         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_reg_sai,
 	}, {
-		.name = "SA Replacement (reg)",
+		.name = "SA Replacement (reg)       ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_reg_sar,
 	}, {
-		.name = "VLAN TX Insertion   ",
+		.name = "VLAN TX Insertion          ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_vlanoff,
 	}, {
-		.name = "SVLAN TX Insertion  ",
+		.name = "SVLAN TX Insertion         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_svlanoff,
 	}, {
-		.name = "L3 DA Filtering     ",
+		.name = "L3 DA Filtering            ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l3filt_da,
 	}, {
-		.name = "L3 SA Filtering     ",
+		.name = "L3 SA Filtering            ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l3filt_sa,
 	}, {
-		.name = "L4 DA TCP Filtering ",
+		.name = "L4 DA TCP Filtering        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l4filt_da_tcp,
 	}, {
-		.name = "L4 SA TCP Filtering ",
+		.name = "L4 SA TCP Filtering        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l4filt_sa_tcp,
 	}, {
-		.name = "L4 DA UDP Filtering ",
+		.name = "L4 DA UDP Filtering        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l4filt_da_udp,
 	}, {
-		.name = "L4 SA UDP Filtering ",
+		.name = "L4 SA UDP Filtering        ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_l4filt_sa_udp,
 	}, {
-		.name = "ARP Offload         ",
+		.name = "ARP Offload                ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_arpoffload,
 	}, {
-		.name = "Jumbo Frame         ",
+		.name = "Jumbo Frame                ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_jumbo,
 	}, {
-		.name = "Multichannel Jumbo  ",
+		.name = "Multichannel Jumbo         ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_mjumbo,
 	}, {
-		.name = "Split Header        ",
+		.name = "Split Header               ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_sph,
 	},
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f298d714efd6..329671e66fe4 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2619,11 +2619,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 				i);
 			goto no_phy_slave;
 		}
-		slave_data->phy_if = of_get_phy_mode(slave_node);
-		if (slave_data->phy_if < 0) {
+		ret = of_get_phy_mode(slave_node, &slave_data->phy_if);
+		if (ret) {
 			dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
 				i);
-			ret = slave_data->phy_if;
 			goto err_node_put;
 		}
 
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index 362c5a986869..8bfa761fa552 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -275,7 +275,7 @@ struct cpsw_slave_data {
 	struct device_node *slave_node;
 	struct device_node *phy_node;
 	char		phy_id[MII_BUS_ID_SIZE];
-	int		phy_if;
+	phy_interface_t	phy_if;
 	u8		mac_addr[ETH_ALEN];
 	u16		dual_emac_res_vlan;	/* Reserved VLAN for DualEMAC */
 	struct phy	*ifphy;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 61136428e2c0..729ce09dded9 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -459,7 +459,7 @@ int cpts_register(struct cpts *cpts)
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
 
-	timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
+	timecounter_init(&cpts->tc, &cpts->cc, ktime_get_real_ns());
 
 	cpts->clock = ptp_clock_register(&cpts->info, cpts->dev);
 	if (IS_ERR(cpts->clock)) {
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 2c1fac33136c..86a3f42a3dcc 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2291,6 +2291,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
 	struct gbe_slave *slave = gbe_intf->slave;
 	phy_interface_t phy_mode;
 	bool has_phy = false;
+	int err;
 
 	void (*hndlr)(struct net_device *) = gbe_adjust_link;
 
@@ -2320,11 +2321,11 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
 		slave->phy_port_t = PORT_MII;
 	} else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
 		has_phy = true;
-		phy_mode = of_get_phy_mode(slave->node);
+		err = of_get_phy_mode(slave->node, &phy_mode);
 		/* if phy-mode is not present, default to
 		 * PHY_INTERFACE_MODE_RGMII
 		 */
-		if (phy_mode < 0)
+		if (err)
 			phy_mode = PHY_INTERFACE_MODE_RGMII;
 
 		if (!phy_interface_mode_is_rgmii(phy_mode)) {
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 676006f32f91..8f32db6d2c45 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1761,11 +1761,9 @@ static int axienet_probe(struct platform_device *pdev)
 			goto free_netdev;
 		}
 	} else {
-		lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
-		if ((int)lp->phy_mode < 0) {
-			ret = -EINVAL;
+		ret = of_get_phy_mode(pdev->dev.of_node, &lp->phy_mode);
+		if (ret)
 			goto free_netdev;
-		}
 	}
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
@@ -1790,10 +1788,6 @@ static int axienet_probe(struct platform_device *pdev)
 		/* Check for these resources directly on the Ethernet node. */
 		struct resource *res = platform_get_resource(pdev,
 							     IORESOURCE_MEM, 1);
-		if (!res) {
-			dev_err(&pdev->dev, "unable to get DMA memory resource\n");
-			goto free_netdev;
-		}
 		lp->dma_regs = devm_ioremap_resource(&pdev->dev, res);
 		lp->rx_irq = platform_get_irq(pdev, 1);
 		lp->tx_irq = platform_get_irq(pdev, 0);