Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from David Miller: "Another merge window, another pull full of stuff: 1) Support alternative names for network devices, from Jiri Pirko. 2) Introduce per-netns netdev notifiers, also from Jiri Pirko. 3) Support MSG_PEEK in vsock/virtio, from Matias Ezequiel Vara Larsen. 4) Allow compiling out the TLS TOE code, from Jakub Kicinski. 5) Add several new tracepoints to the kTLS code, also from Jakub. 6) Support set channels ethtool callback in ena driver, from Sameeh Jubran. 7) New SCTP events SCTP_ADDR_ADDED, SCTP_ADDR_REMOVED, SCTP_ADDR_MADE_PRIM, and SCTP_SEND_FAILED_EVENT. From Xin Long. 8) Add XDP support to mvneta driver, from Lorenzo Bianconi. 9) Lots of netfilter hw offload fixes, cleanups and enhancements, from Pablo Neira Ayuso. 10) PTP support for aquantia chips, from Egor Pomozov. 11) Add UDP segmentation offload support to igb, ixgbe, and i40e. From Josh Hunt. 12) Add smart nagle to tipc, from Jon Maloy. 13) Support L2 field rewrite by TC offloads in bnxt_en, from Venkat Duvvuru. 14) Add a flow mask cache to OVS, from Tonghao Zhang. 15) Add XDP support to ice driver, from Maciej Fijalkowski. 16) Add AF_XDP support to ice driver, from Krzysztof Kazimierczak. 17) Support UDP GSO offload in atlantic driver, from Igor Russkikh. 18) Support it in stmmac driver too, from Jose Abreu. 19) Support TIPC encryption and auth, from Tuong Lien. 20) Introduce BPF trampolines, from Alexei Starovoitov. 21) Make page_pool API more numa friendly, from Saeed Mahameed. 22) Introduce route hints to ipv4 and ipv6, from Paolo Abeni. 23) Add UDP segmentation offload to cxgb4, Rahul Lakkireddy" * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1857 commits) libbpf: Fix usage of u32 in userspace code mm: Implement no-MMU variant of vmalloc_user_node_flags slip: Fix use-after-free Read in slip_open net: dsa: sja1105: fix sja1105_parse_rgmii_delays() macvlan: schedule bc_work even if error enetc: add support Credit Based Shaper(CBS) for hardware offload net: phy: add helpers phy_(un)lock_mdio_bus mdio_bus: don't use managed reset-controller ax88179_178a: add ethtool_op_get_ts_info() mlxsw: spectrum_router: Fix use of uninitialized adjacency index mlxsw: spectrum_router: After underlay moves, demote conflicting tunnels bpf: Simplify __bpf_arch_text_poke poke type handling bpf: Introduce BPF_TRACE_x helper for the tracing tests bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT bpf, testing: Add various tail call test cases bpf, x86: Emit patchable direct jump as tail call bpf: Constant map key tracking for prog array pokes bpf: Add poke dependency tracking for prog array maps bpf: Add initial poke descriptor table for jit images bpf: Move owner type, jited info into array auxiliary data ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-11-25 20:02:57 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-11-25 20:02:57 -0800
commit: 386403a115f95997c2715691226e11a7b5cffcfd (patch)
tree: a685df70bd3d5b295683713818ddf0752c3d75b6 /drivers/net/ethernet/chelsio
parent: Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 (diff)
parent: Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next (diff)
download: linux-dev-386403a115f95997c2715691226e11a7b5cffcfd.tar.xz
linux-dev-386403a115f95997c2715691226e11a7b5cffcfd.zip
25 files changed, 3251 insertions, 656 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 20390f6afbb4..a4b4d475abf8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
 	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
 	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \
-	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
+	      cudbg_common.o cudbg_lib.o cudbg_zlib.o cxgb4_tc_mqprio.o \
+	      cxgb4_tc_matchall.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index 69746696a929..f5be3ee1bdb4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -325,6 +325,9 @@ enum cudbg_qdesc_qtype {
 	CUDBG_QTYPE_CRYPTO_FLQ,
 	CUDBG_QTYPE_TLS_RXQ,
 	CUDBG_QTYPE_TLS_FLQ,
+	CUDBG_QTYPE_ETHOFLD_TXQ,
+	CUDBG_QTYPE_ETHOFLD_RXQ,
+	CUDBG_QTYPE_ETHOFLD_FLQ,
 	CUDBG_QTYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index c2e92786608b..19c11568113a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sort.h>
+#include <linux/string.h>
 
 #include "t4_regs.h"
 #include "cxgb4.h"
@@ -776,24 +777,18 @@ static int cudbg_get_mem_region(struct adapter *padap,
 				struct cudbg_mem_desc *mem_desc)
 {
 	u8 mc, found = 0;
-	u32 i, idx = 0;
-	int rc;
+	u32 idx = 0;
+	int rc, i;
 
 	rc = cudbg_meminfo_get_mem_index(padap, meminfo, mem_type, &mc);
 	if (rc)
 		return rc;
 
-	for (i = 0; i < ARRAY_SIZE(cudbg_region); i++) {
-		if (!strcmp(cudbg_region[i], region_name)) {
-			found = 1;
-			idx = i;
-			break;
-		}
-	}
-	if (!found)
+	i = match_string(cudbg_region, ARRAY_SIZE(cudbg_region), region_name);
+	if (i < 0)
 		return -EINVAL;
 
-	found = 0;
+	idx = i;
 	for (i = 0; i < meminfo->mem_c; i++) {
 		if (meminfo->mem[i].idx >= ARRAY_SIZE(cudbg_region))
 			continue; /* Skip holes */
@@ -2930,6 +2925,10 @@ void cudbg_fill_qdesc_num_and_size(const struct adapter *padap,
 	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * SGE_MAX_IQ_SIZE *
 		    MAX_RXQ_DESC_SIZE;
 
+	/* ETHOFLD TXQ, RXQ, and FLQ */
+	tot_entries += MAX_OFLD_QSETS * 3;
+	tot_size += MAX_OFLD_QSETS * MAX_TXQ_ENTRIES * MAX_TXQ_DESC_SIZE;
+
 	tot_size += sizeof(struct cudbg_ver_hdr) +
 		    sizeof(struct cudbg_qdesc_info) +
 		    sizeof(struct cudbg_qdesc_entry) * tot_entries;
@@ -3087,6 +3086,23 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
 		}
 	}
 
+	/* ETHOFLD TXQ */
+	if (s->eohw_txq)
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_TXQ(&s->eohw_txq[i].q,
+				      CUDBG_QTYPE_ETHOFLD_TXQ, out);
+
+	/* ETHOFLD RXQ and FLQ */
+	if (s->eohw_rxq) {
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_RXQ(&s->eohw_rxq[i].rspq,
+				      CUDBG_QTYPE_ETHOFLD_RXQ, out);
+
+		for (i = 0; i < s->eoqsets; i++)
+			QDESC_GET_FLQ(&s->eohw_rxq[i].fl,
+				      CUDBG_QTYPE_ETHOFLD_FLQ, out);
+	}
+
 out_unlock:
 	mutex_unlock(&uld_mutex);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1fbb640e896a..a70ac2097892 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -392,6 +392,7 @@ struct adapter_params {
 	struct arch_specific_params arch;  /* chip specific params */
 	unsigned char offload;
 	unsigned char crypto;		/* HW capability for crypto */
+	unsigned char ethofld;		/* QoS support */
 
 	unsigned char bypass;
 	unsigned char hash_filter;
@@ -602,6 +603,8 @@ struct port_info {
 	u8 vivld;
 	u8 smt_idx;
 	u8 rx_cchan;
+
+	bool tc_block_shared;
 };
 
 struct dentry;
@@ -711,6 +714,7 @@ struct sge_eth_rxq {                /* SW Ethernet Rx queue */
 	struct sge_rspq rspq;
 	struct sge_fl fl;
 	struct sge_eth_stats stats;
+	struct msix_info *msix;
 } ____cacheline_aligned_in_smp;
 
 struct sge_ofld_stats {             /* offload queue statistics */
@@ -724,13 +728,19 @@ struct sge_ofld_rxq {               /* SW offload Rx queue */
 	struct sge_rspq rspq;
 	struct sge_fl fl;
 	struct sge_ofld_stats stats;
+	struct msix_info *msix;
 } ____cacheline_aligned_in_smp;
 
 struct tx_desc {
 	__be64 flit[8];
 };
 
-struct tx_sw_desc;
+struct ulptx_sgl;
+
+struct tx_sw_desc {
+	struct sk_buff *skb; /* SKB to free after getting completion */
+	dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */
+};
 
 struct sge_txq {
 	unsigned int  in_use;       /* # of in-use Tx descriptors */
@@ -762,6 +772,7 @@ struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
 	u8 dbqt;                    /* SGE Doorbell Queue Timer in use */
 	unsigned int dbqtimerix;    /* SGE Doorbell Queue Timer Index */
 	unsigned long tso;          /* # of TSO requests */
+	unsigned long uso;          /* # of USO requests */
 	unsigned long tx_cso;       /* # of Tx checksum offloads */
 	unsigned long vlan_ins;     /* # of Tx VLAN insertions */
 	unsigned long mapping_err;  /* # of I/O MMU packet mapping errors */
@@ -788,7 +799,6 @@ struct sge_ctrl_txq {               /* state for an SGE control Tx queue */
 struct sge_uld_rxq_info {
 	char name[IFNAMSIZ];	/* name of ULD driver */
 	struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
-	u16 *msix_tbl;		/* msix_tbl for uld */
 	u16 *rspq_id;		/* response queue id's of rxq */
 	u16 nrxq;		/* # of ingress uld queues */
 	u16 nciq;		/* # of completion queues */
@@ -801,6 +811,51 @@ struct sge_uld_txq_info {
 	u16 ntxq;		/* # of egress uld queues */
 };
 
+enum sge_eosw_state {
+	CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */
+	CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */
+	CXGB4_EO_STATE_FLOWC_OPEN_REPLY, /* Waiting for FLOWC open reply */
+	CXGB4_EO_STATE_ACTIVE, /* Ready to accept traffic */
+	CXGB4_EO_STATE_FLOWC_CLOSE_SEND, /* Send FLOWC close request */
+	CXGB4_EO_STATE_FLOWC_CLOSE_REPLY, /* Waiting for FLOWC close reply */
+};
+
+struct sge_eosw_txq {
+	spinlock_t lock; /* Per queue lock to synchronize completions */
+	enum sge_eosw_state state; /* Current ETHOFLD State */
+	struct tx_sw_desc *desc; /* Descriptor ring to hold packets */
+	u32 ndesc; /* Number of descriptors */
+	u32 pidx; /* Current Producer Index */
+	u32 last_pidx; /* Last successfully transmitted Producer Index */
+	u32 cidx; /* Current Consumer Index */
+	u32 last_cidx; /* Last successfully reclaimed Consumer Index */
+	u32 flowc_idx; /* Descriptor containing a FLOWC request */
+	u32 inuse; /* Number of packets held in ring */
+
+	u32 cred; /* Current available credits */
+	u32 ncompl; /* # of completions posted */
+	u32 last_compl; /* # of credits consumed since last completion req */
+
+	u32 eotid; /* Index into EOTID table in software */
+	u32 hwtid; /* Hardware EOTID index */
+
+	u32 hwqid; /* Underlying hardware queue index */
+	struct net_device *netdev; /* Pointer to netdevice */
+	struct tasklet_struct qresume_tsk; /* Restarts the queue */
+	struct completion completion; /* completion for FLOWC rendezvous */
+};
+
+struct sge_eohw_txq {
+	spinlock_t lock; /* Per queue lock */
+	struct sge_txq q; /* HW Txq */
+	struct adapter *adap; /* Backpointer to adapter */
+	unsigned long tso; /* # of TSO requests */
+	unsigned long uso; /* # of USO requests */
+	unsigned long tx_cso; /* # of Tx checksum offloads */
+	unsigned long vlan_ins; /* # of Tx VLAN insertions */
+	unsigned long mapping_err; /* # of I/O MMU packet mapping errors */
+};
+
 struct sge {
 	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
 	struct sge_eth_txq ptptxq;
@@ -814,11 +869,16 @@ struct sge {
 	struct sge_rspq intrq ____cacheline_aligned_in_smp;
 	spinlock_t intrq_lock;
 
+	struct sge_eohw_txq *eohw_txq;
+	struct sge_ofld_rxq *eohw_rxq;
+
 	u16 max_ethqsets;           /* # of available Ethernet queue sets */
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 ofldqsets;              /* # of active ofld queue sets */
 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
+	u16 eoqsets;                /* # of ETHOFLD queues */
+
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u16 dbqtimer_tick;
@@ -841,6 +901,9 @@ struct sge {
 	unsigned long *blocked_fl;
 	struct timer_list rx_timer; /* refills starving FLs */
 	struct timer_list tx_timer; /* checks Tx queues */
+
+	int fwevtq_msix_idx; /* Index to firmware event queue MSI-X info */
+	int nd_msix_idx; /* Index to non-data interrupts MSI-X info */
 };
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
@@ -870,13 +933,13 @@ struct hash_mac_addr {
 	unsigned int iface_mac;
 };
 
-struct uld_msix_bmap {
+struct msix_bmap {
 	unsigned long *msix_bmap;
 	unsigned int mapsize;
 	spinlock_t lock; /* lock for acquiring bitmap */
 };
 
-struct uld_msix_info {
+struct msix_info {
 	unsigned short vec;
 	char desc[IFNAMSIZ + 10];
 	unsigned int idx;
@@ -945,14 +1008,9 @@ struct adapter {
 	struct cxgb4_virt_res vres;
 	unsigned int swintr;
 
-	struct msix_info {
-		unsigned short vec;
-		char desc[IFNAMSIZ + 10];
-		cpumask_var_t aff_mask;
-	} msix_info[MAX_INGQ + 1];
-	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
-	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
-	int msi_idx;
+	/* MSI-X Info for NIC and OFLD queues */
+	struct msix_info *msix_info;
+	struct msix_bmap msix_bmap;
 
 	struct doorbell_stats db_stats;
 	struct sge sge;
@@ -1044,6 +1102,12 @@ struct adapter {
 #if IS_ENABLED(CONFIG_THERMAL)
 	struct ch_thermal ch_thermal;
 #endif
+
+	/* TC MQPRIO offload */
+	struct cxgb4_tc_mqprio *tc_mqprio;
+
+	/* TC MATCHALL classifier offload */
+	struct cxgb4_tc_matchall *tc_matchall;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1073,10 +1137,12 @@ enum {
 
 enum {
 	SCHED_CLASS_LEVEL_CL_RL = 0,    /* class rate limiter */
+	SCHED_CLASS_LEVEL_CH_RL = 2,    /* channel rate limiter */
 };
 
 enum {
 	SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
+	SCHED_CLASS_MODE_FLOW,          /* per-flow scheduling */
 };
 
 enum {
@@ -1087,11 +1153,6 @@ enum {
 	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
 };
 
-struct tx_sw_desc {                /* SW state per Tx descriptor */
-	struct sk_buff *skb;
-	struct ulptx_sgl *sgl;
-};
-
 /* Support for "sched_queue" command to allow one or more NIC TX Queues
  * to be bound to a TX Scheduling Class.
  */
@@ -1100,6 +1161,14 @@ struct ch_sched_queue {
 	s8   class;    /* class index */
 };
 
+/* Support for "sched_flowc" command to allow one or more FLOWC
+ * to be bound to a TX Scheduling Class.
+ */
+struct ch_sched_flowc {
+	s32 tid;   /* TID to bind */
+	s8  class; /* class index */
+};
+
 /* Defined bit width of user definable filter tuples
  */
 #define ETHTYPE_BITWIDTH 16
@@ -1214,8 +1283,11 @@ struct ch_filter_specification {
 	u16 nat_lport;		/* local port to use after NAT'ing */
 	u16 nat_fport;		/* foreign port to use after NAT'ing */
 
+	u32 tc_prio;		/* TC's filter priority index */
+	u64 tc_cookie;		/* Unique cookie identifying TC rules */
+
 	/* reservation for future additions */
-	u8 rsvd[24];
+	u8 rsvd[12];
 
 	/* Filter rule value/mask pairs.
 	 */
@@ -1293,6 +1365,11 @@ static inline int is_uld(const struct adapter *adap)
 	return (adap->params.offload || adap->params.crypto);
 }
 
+static inline int is_ethofld(const struct adapter *adap)
+{
+	return adap->params.ethofld;
+}
+
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
 	return readl(adap->regs + reg_addr);
@@ -1426,6 +1503,9 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 			 struct net_device *dev, unsigned int iqid,
 			 unsigned int uld_type);
+int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq,
+			     struct net_device *dev, u32 iqid);
+void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
@@ -1890,6 +1970,12 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
+void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *txq,
+			      u32 ndesc);
+int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc);
+void cxgb4_ethofld_restart(unsigned long data);
+int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
+			     const struct pkt_gl *si);
 void free_txq(struct adapter *adap, struct sge_txq *q);
 void cxgb4_reclaim_completed_tx(struct adapter *adap,
 				struct sge_txq *q, bool unmap);
@@ -1948,5 +2034,10 @@ int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
 int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
 			  int *tcam_idx, const u8 *addr,
 			  bool persistent, u8 *smt_idx);
-
+int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
+void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
+int cxgb_open(struct net_device *dev);
+int cxgb_close(struct net_device *dev);
+void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
+void cxgb4_quiesce_rx(struct sge_rspq *q);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index ae6a47dd7dc9..93868dca186a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2658,6 +2658,7 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
 
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
+	int eth_entries, ctrl_entries, eo_entries = 0;
 	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
@@ -2665,11 +2666,12 @@ static int sge_qinfo_show(struct seq_file *seq, void *v)
 	const struct sge_uld_rxq_info *urxq_info;
 	struct adapter *adap = seq->private;
 	int i, n, r = (uintptr_t)v - 1;
-	int eth_entries, ctrl_entries;
 	struct sge *s = &adap->sge;
 
 	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
+	if (adap->sge.eohw_txq)
+		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
 
 	mutex_lock(&uld_mutex);
 	if (s->uld_txq_info)
@@ -2746,6 +2748,7 @@ do { \
 		RL("RxDrops:", stats.rx_drops);
 		RL("RxBadPkts:", stats.bad_rx_pkts);
 		TL("TSO:", tso);
+		TL("USO:", uso);
 		TL("TxCSO:", tx_cso);
 		TL("VLANins:", vlan_ins);
 		TL("TxQFull:", q.stops);
@@ -2761,6 +2764,55 @@ do { \
 	}
 
 	r -= eth_entries;
+	if (r < eo_entries) {
+		int base_qset = r * 4;
+		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
+		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
+
+		n = min(4, s->eoqsets - 4 * r);
+
+		S("QType:", "ETHOFLD");
+		S("Interface:",
+		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
+		T("TxQ ID:", q.cntxt_id);
+		T("TxQ size:", q.size);
+		T("TxQ inuse:", q.in_use);
+		T("TxQ CIDX:", q.cidx);
+		T("TxQ PIDX:", q.pidx);
+		R("RspQ ID:", rspq.abs_id);
+		R("RspQ size:", rspq.size);
+		R("RspQE size:", rspq.iqe_len);
+		R("RspQ CIDX:", rspq.cidx);
+		R("RspQ Gen:", rspq.gen);
+		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+		S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+		R("FL ID:", fl.cntxt_id);
+		S3("u", "FL size:", rx->fl.size ? rx->fl.size - 8 : 0);
+		R("FL pend:", fl.pend_cred);
+		R("FL avail:", fl.avail);
+		R("FL PIDX:", fl.pidx);
+		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImm:", stats.imm);
+		RL("RxAN", stats.an);
+		RL("RxNoMem", stats.nomem);
+		TL("TSO:", tso);
+		TL("USO:", uso);
+		TL("TxCSO:", tx_cso);
+		TL("VLANins:", vlan_ins);
+		TL("TxQFull:", q.stops);
+		TL("TxQRestarts:", q.restarts);
+		TL("TxMapErr:", mapping_err);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
+		RL("FLStarving:", fl.starving);
+
+		goto unlock;
+	}
+
+	r -= eo_entries;
 	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
 		const struct sge_uld_txq *tx;
 
@@ -3007,6 +3059,7 @@ static int sge_queue_entries(const struct adapter *adap)
 	mutex_unlock(&uld_mutex);
 
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
+	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
 	       tot_uld_entries +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 76538f4cd595..20ab3b6285a2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -91,6 +91,7 @@ static const char stats_strings[][ETH_GSTRING_LEN] = {
 	"rx_bg3_frames_trunc    ",
 
 	"tso                    ",
+	"uso                    ",
 	"tx_csum_offload        ",
 	"rx_csum_good           ",
 	"vlan_extractions       ",
@@ -220,6 +221,7 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
  */
 struct queue_port_stats {
 	u64 tso;
+	u64 uso;
 	u64 tx_csum;
 	u64 rx_csum;
 	u64 vlan_ex;
@@ -240,13 +242,15 @@ static void collect_sge_port_stats(const struct adapter *adap,
 				   const struct port_info *p,
 				   struct queue_port_stats *s)
 {
-	int i;
 	const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
 	const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
+	struct sge_eohw_txq *eohw_tx;
+	unsigned int i;
 
 	memset(s, 0, sizeof(*s));
 	for (i = 0; i < p->nqsets; i++, rx++, tx++) {
 		s->tso += tx->tso;
+		s->uso += tx->uso;
 		s->tx_csum += tx->tx_cso;
 		s->rx_csum += rx->stats.rx_cso;
 		s->vlan_ex += rx->stats.vlan_ex;
@@ -254,6 +258,16 @@ static void collect_sge_port_stats(const struct adapter *adap,
 		s->gro_pkts += rx->stats.lro_pkts;
 		s->gro_merged += rx->stats.lro_merged;
 	}
+
+	if (adap->sge.eohw_txq) {
+		eohw_tx = &adap->sge.eohw_txq[p->first_qset];
+		for (i = 0; i < p->nqsets; i++, eohw_tx++) {
+			s->tso += eohw_tx->tso;
+			s->uso += eohw_tx->uso;
+			s->tx_csum += eohw_tx->tx_cso;
+			s->vlan_ins += eohw_tx->vlan_ins;
+		}
+	}
 }
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 43b0f8c57da7..1d39fca11810 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -440,36 +440,48 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family)
 {
 	struct adapter *adap = netdev2adap(dev);
 	struct tid_info *t = &adap->tids;
+	bool found = false;
+	u8 i, n, cnt;
 	int ftid;
 
-	spin_lock_bh(&t->ftid_lock);
-	if (family == PF_INET) {
-		ftid = find_first_zero_bit(t->ftid_bmap, t->nftids);
-		if (ftid >= t->nftids)
-			ftid = -1;
-	} else {
-		if (is_t6(adap->params.chip)) {
-			ftid = bitmap_find_free_region(t->ftid_bmap,
-						       t->nftids, 1);
-			if (ftid < 0)
-				goto out_unlock;
-
-			/* this is only a lookup, keep the found region
-			 * unallocated
-			 */
-			bitmap_release_region(t->ftid_bmap, ftid, 1);
-		} else {
-			ftid = bitmap_find_free_region(t->ftid_bmap,
-						       t->nftids, 2);
-			if (ftid < 0)
-				goto out_unlock;
+	/* IPv4 occupy 1 slot. IPv6 occupy 2 slots on T6 and 4 slots
+	 * on T5.
+	 */
+	n = 1;
+	if (family == PF_INET6) {
+		n++;
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6)
+			n += 2;
+	}
+
+	if (n > t->nftids)
+		return -ENOMEM;
 
-			bitmap_release_region(t->ftid_bmap, ftid, 2);
+	/* Find free filter slots from the end of TCAM. Appropriate
+	 * checks must be done by caller later to ensure the prio
+	 * passed by TC doesn't conflict with prio saved by existing
+	 * rules in the TCAM.
+	 */
+	spin_lock_bh(&t->ftid_lock);
+	ftid = t->nftids - 1;
+	while (ftid >= n - 1) {
+		cnt = 0;
+		for (i = 0; i < n; i++) {
+			if (test_bit(ftid - i, t->ftid_bmap))
+				break;
+			cnt++;
 		}
+		if (cnt == n) {
+			ftid &= ~(n - 1);
+			found = true;
+			break;
+		}
+
+		ftid -= n;
 	}
-out_unlock:
 	spin_unlock_bh(&t->ftid_lock);
-	return ftid;
+
+	return found ? ftid : -ENOMEM;
 }
 
 static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family,
@@ -510,6 +522,60 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family,
 	spin_unlock_bh(&t->ftid_lock);
 }
 
+bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct filter_entry *prev_fe, *next_fe;
+	struct tid_info *t = &adap->tids;
+	u32 prev_ftid, next_ftid;
+	bool valid = true;
+
+	/* Only insert the rule if both of the following conditions
+	 * are met:
+	 * 1. The immediate previous rule has priority <= @prio.
+	 * 2. The immediate next rule has priority >= @prio.
+	 */
+	spin_lock_bh(&t->ftid_lock);
+	/* Don't insert if there's a rule already present at @idx. */
+	if (test_bit(idx, t->ftid_bmap)) {
+		valid = false;
+		goto out_unlock;
+	}
+
+	next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx);
+	if (next_ftid >= t->nftids)
+		next_ftid = idx;
+
+	next_fe = &adap->tids.ftid_tab[next_ftid];
+
+	prev_ftid = find_last_bit(t->ftid_bmap, idx);
+	if (prev_ftid >= idx)
+		prev_ftid = idx;
+
+	/* See if the filter entry belongs to an IPv6 rule, which
+	 * occupy 4 slots on T5 and 2 slots on T6. Adjust the
+	 * reference to the previously inserted filter entry
+	 * accordingly.
+	 */
+	if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) {
+		prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3];
+		if (!prev_fe->fs.type)
+			prev_fe = &adap->tids.ftid_tab[prev_ftid];
+	} else {
+		prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1];
+		if (!prev_fe->fs.type)
+			prev_fe = &adap->tids.ftid_tab[prev_ftid];
+	}
+
+	if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) ||
+	    (next_fe->valid && prio > next_fe->fs.tc_prio))
+		valid = false;
+
+out_unlock:
+	spin_unlock_bh(&t->ftid_lock);
+	return valid;
+}
+
 /* Delete the filter at a specified index. */
 static int del_filter_wr(struct adapter *adapter, int fidx)
 {
@@ -806,6 +872,12 @@ static void fill_default_mask(struct ch_filter_specification *fs)
 		fs->mask.tos |= ~0;
 	if (fs->val.proto && !fs->mask.proto)
 		fs->mask.proto |= ~0;
+	if (fs->val.pfvf_vld && !fs->mask.pfvf_vld)
+		fs->mask.pfvf_vld |= ~0;
+	if (fs->val.pf && !fs->mask.pf)
+		fs->mask.pf |= ~0;
+	if (fs->val.vf && !fs->mask.vf)
+		fs->mask.vf |= ~0;
 
 	for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) {
 		lip |= fs->val.lip[i];
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index b0751c0611ec..b3e4a645043d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -53,4 +53,5 @@ void clear_all_filters(struct adapter *adapter);
 void init_hash_filter(struct adapter *adap);
 bool is_filter_exact_match(struct adapter *adap,
 			   struct ch_filter_specification *fs);
+bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio);
 #endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38024877751c..12ff69b3ba91 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -65,6 +65,7 @@
 #include <linux/uaccess.h>
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
+#include <net/xfrm.h>
 
 #include "cxgb4.h"
 #include "cxgb4_filter.h"
@@ -82,6 +83,8 @@
 #include "sched.h"
 #include "cxgb4_tc_u32.h"
 #include "cxgb4_tc_flower.h"
+#include "cxgb4_tc_mqprio.h"
+#include "cxgb4_tc_matchall.h"
 #include "cxgb4_ptp.h"
 #include "cxgb4_cudbg.h"
 
@@ -184,6 +187,8 @@ static struct dentry *cxgb4_debugfs_root;
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
 
+static int cfg_queues(struct adapter *adap);
+
 static void link_report(struct net_device *dev)
 {
 	if (!netif_carrier_ok(dev))
@@ -683,31 +688,6 @@ static irqreturn_t t4_nondata_intr(int irq, void *cookie)
 	return IRQ_HANDLED;
 }
 
-/*
- * Name the MSI-X interrupts.
- */
-static void name_msix_vecs(struct adapter *adap)
-{
-	int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
-
-	/* non-data interrupts */
-	snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
-
-	/* FW events */
-	snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
-		 adap->port[0]->name);
-
-	/* Ethernet queues */
-	for_each_port(adap, j) {
-		struct net_device *d = adap->port[j];
-		const struct port_info *pi = netdev_priv(d);
-
-		for (i = 0; i < pi->nqsets; i++, msi_idx++)
-			snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
-				 d->name, i);
-	}
-}
-
 int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
 		       cpumask_var_t *aff_mask, int idx)
 {
@@ -741,15 +721,19 @@ static int request_msix_queue_irqs(struct adapter *adap)
 	struct sge *s = &adap->sge;
 	struct msix_info *minfo;
 	int err, ethqidx;
-	int msi_index = 2;
 
-	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
-			  adap->msix_info[1].desc, &s->fw_evtq);
+	if (s->fwevtq_msix_idx < 0)
+		return -ENOMEM;
+
+	err = request_irq(adap->msix_info[s->fwevtq_msix_idx].vec,
+			  t4_sge_intr_msix, 0,
+			  adap->msix_info[s->fwevtq_msix_idx].desc,
+			  &s->fw_evtq);
 	if (err)
 		return err;
 
 	for_each_ethrxq(s, ethqidx) {
-		minfo = &adap->msix_info[msi_index];
+		minfo = s->ethrxq[ethqidx].msix;
 		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
 				  minfo->desc,
@@ -759,18 +743,16 @@ static int request_msix_queue_irqs(struct adapter *adap)
 
 		cxgb4_set_msix_aff(adap, minfo->vec,
 				   &minfo->aff_mask, ethqidx);
-		msi_index++;
 	}
 	return 0;
 
 unwind:
 	while (--ethqidx >= 0) {
-		msi_index--;
-		minfo = &adap->msix_info[msi_index];
+		minfo = s->ethrxq[ethqidx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq);
 	}
-	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
+	free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 	return err;
 }
 
@@ -778,11 +760,11 @@ static void free_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 	struct msix_info *minfo;
-	int i, msi_index = 2;
+	int i;
 
-	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
+	free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 	for_each_ethrxq(s, i) {
-		minfo = &adap->msix_info[msi_index++];
+		minfo = s->ethrxq[i].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 		free_irq(minfo->vec, &s->ethrxq[i].rspq);
 	}
@@ -899,6 +881,12 @@ static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
 	return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
 }
 
+void cxgb4_quiesce_rx(struct sge_rspq *q)
+{
+	if (q->handler)
+		napi_disable(&q->napi);
+}
+
 /*
  * Wait until all NAPI handlers are descheduled.
  */
@@ -909,19 +897,24 @@ static void quiesce_rx(struct adapter *adap)
 	for (i = 0; i < adap->sge.ingr_sz; i++) {
 		struct sge_rspq *q = adap->sge.ingr_map[i];
 
-		if (q && q->handler)
-			napi_disable(&q->napi);
+		if (!q)
+			continue;
+
+		cxgb4_quiesce_rx(q);
 	}
 }
 
 /* Disable interrupt and napi handler */
 static void disable_interrupts(struct adapter *adap)
 {
+	struct sge *s = &adap->sge;
+
 	if (adap->flags & CXGB4_FULL_INIT_DONE) {
 		t4_intr_disable(adap);
 		if (adap->flags & CXGB4_USING_MSIX) {
 			free_msix_queue_irqs(adap);
-			free_irq(adap->msix_info[0].vec, adap);
+			free_irq(adap->msix_info[s->nd_msix_idx].vec,
+				 adap);
 		} else {
 			free_irq(adap->pdev->irq, adap);
 		}
@@ -929,6 +922,17 @@ static void disable_interrupts(struct adapter *adap)
 	}
 }
 
+void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q)
+{
+	if (q->handler)
+		napi_enable(&q->napi);
+
+	/* 0-increment GTS to start the timer and enable interrupts */
+	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
+		     SEINTARM_V(q->intr_params) |
+		     INGRESSQID_V(q->cntxt_id));
+}
+
 /*
  * Enable NAPI scheduling and interrupt generation for all Rx queues.
  */
@@ -941,37 +945,63 @@ static void enable_rx(struct adapter *adap)
 
 		if (!q)
 			continue;
-		if (q->handler)
-			napi_enable(&q->napi);
 
-		/* 0-increment GTS to start the timer and enable interrupts */
-		t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
-			     SEINTARM_V(q->intr_params) |
-			     INGRESSQID_V(q->cntxt_id));
+		cxgb4_enable_rx(adap, q);
 	}
 }
 
+static int setup_non_data_intr(struct adapter *adap)
+{
+	int msix;
+
+	adap->sge.nd_msix_idx = -1;
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		return 0;
+
+	/* Request MSI-X vector for non-data interrupt */
+	msix = cxgb4_get_msix_idx_from_bmap(adap);
+	if (msix < 0)
+		return -ENOMEM;
+
+	snprintf(adap->msix_info[msix].desc,
+		 sizeof(adap->msix_info[msix].desc),
+		 "%s", adap->port[0]->name);
+
+	adap->sge.nd_msix_idx = msix;
+	return 0;
+}
 
 static int setup_fw_sge_queues(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int err = 0;
+	int msix, err = 0;
 
 	bitmap_zero(s->starving_fl, s->egr_sz);
 	bitmap_zero(s->txq_maperr, s->egr_sz);
 
-	if (adap->flags & CXGB4_USING_MSIX)
-		adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
-	else {
+	if (adap->flags & CXGB4_USING_MSIX) {
+		s->fwevtq_msix_idx = -1;
+		msix = cxgb4_get_msix_idx_from_bmap(adap);
+		if (msix < 0)
+			return -ENOMEM;
+
+		snprintf(adap->msix_info[msix].desc,
+			 sizeof(adap->msix_info[msix].desc),
+			 "%s-FWeventq", adap->port[0]->name);
+	} else {
 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
 				       NULL, NULL, NULL, -1);
 		if (err)
 			return err;
-		adap->msi_idx = -((int)s->intrq.abs_id + 1);
+		msix = -((int)s->intrq.abs_id + 1);
 	}
 
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
+			       msix, NULL, fwevtq_handler, NULL, -1);
+	if (err && msix >= 0)
+		cxgb4_free_msix_idx_in_bmap(adap, msix);
+
+	s->fwevtq_msix_idx = msix;
 	return err;
 }
 
@@ -985,14 +1015,17 @@ static int setup_fw_sge_queues(struct adapter *adap)
  */
 static int setup_sge_queues(struct adapter *adap)
 {
-	int err, i, j;
-	struct sge *s = &adap->sge;
 	struct sge_uld_rxq_info *rxq_info = NULL;
+	struct sge *s = &adap->sge;
 	unsigned int cmplqid = 0;
+	int err, i, j, msix = 0;
 
 	if (is_uld(adap))
 		rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
 
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		msix = -((int)s->intrq.abs_id + 1);
+
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
 		struct port_info *pi = netdev_priv(dev);
@@ -1000,10 +1033,21 @@ static int setup_sge_queues(struct adapter *adap)
 		struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
 
 		for (j = 0; j < pi->nqsets; j++, q++) {
-			if (adap->msi_idx > 0)
-				adap->msi_idx++;
+			if (msix >= 0) {
+				msix = cxgb4_get_msix_idx_from_bmap(adap);
+				if (msix < 0) {
+					err = msix;
+					goto freeout;
+				}
+
+				snprintf(adap->msix_info[msix].desc,
+					 sizeof(adap->msix_info[msix].desc),
+					 "%s-Rx%d", dev->name, j);
+				q->msix = &adap->msix_info[msix];
+			}
+
 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
-					       adap->msi_idx, &q->fl,
+					       msix, &q->fl,
 					       t4_ethrx_handler,
 					       NULL,
 					       t4_get_tp_ch_map(adap,
@@ -1090,6 +1134,24 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 #endif /* CONFIG_CHELSIO_T4_DCB */
 
+	if (dev->num_tc) {
+		struct port_info *pi = netdev2pinfo(dev);
+		u8 ver, proto;
+
+		ver = ip_hdr(skb)->version;
+		proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr :
+				     ip_hdr(skb)->protocol;
+
+		/* Send unsupported traffic pattern to normal NIC queues. */
+		txq = netdev_pick_tx(dev, skb, sb_dev);
+		if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
+		    skb->encapsulation ||
+		    (proto != IPPROTO_TCP && proto != IPPROTO_UDP))
+			txq = txq % pi->nqsets;
+
+		return txq;
+	}
+
 	if (select_queue) {
 		txq = (skb_rx_queue_recorded(skb)
 			? skb_get_rx_queue(skb)
@@ -1456,19 +1518,23 @@ static int tid_init(struct tid_info *t)
 	struct adapter *adap = container_of(t, struct adapter, tids);
 	unsigned int max_ftids = t->nftids + t->nsftids;
 	unsigned int natids = t->natids;
+	unsigned int eotid_bmap_size;
 	unsigned int stid_bmap_size;
 	unsigned int ftid_bmap_size;
 	size_t size;
 
 	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
 	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
+	eotid_bmap_size = BITS_TO_LONGS(t->neotids);
 	size = t->ntids * sizeof(*t->tid_tab) +
 	       natids * sizeof(*t->atid_tab) +
 	       t->nstids * sizeof(*t->stid_tab) +
 	       t->nsftids * sizeof(*t->stid_tab) +
 	       stid_bmap_size * sizeof(long) +
 	       max_ftids * sizeof(*t->ftid_tab) +
-	       ftid_bmap_size * sizeof(long);
+	       ftid_bmap_size * sizeof(long) +
+	       t->neotids * sizeof(*t->eotid_tab) +
+	       eotid_bmap_size * sizeof(long);
 
 	t->tid_tab = kvzalloc(size, GFP_KERNEL);
 	if (!t->tid_tab)
@@ -1479,6 +1545,8 @@ static int tid_init(struct tid_info *t)
 	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
 	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
 	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
+	t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size];
+	t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids];
 	spin_lock_init(&t->stid_lock);
 	spin_lock_init(&t->atid_lock);
 	spin_lock_init(&t->ftid_lock);
@@ -1505,6 +1573,9 @@ static int tid_init(struct tid_info *t)
 		if (!t->stid_base &&
 		    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
 			__set_bit(0, t->stid_bmap);
+
+		if (t->neotids)
+			bitmap_zero(t->eotid_bmap, t->neotids);
 	}
 
 	bitmap_zero(t->ftid_bmap, t->nftids);
@@ -2361,6 +2432,7 @@ static void update_clip(const struct adapter *adap)
  */
 static int cxgb_up(struct adapter *adap)
 {
+	struct sge *s = &adap->sge;
 	int err;
 
 	mutex_lock(&uld_mutex);
@@ -2372,16 +2444,20 @@ static int cxgb_up(struct adapter *adap)
 		goto freeq;
 
 	if (adap->flags & CXGB4_USING_MSIX) {
-		name_msix_vecs(adap);
-		err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
-				  adap->msix_info[0].desc, adap);
+		if (s->nd_msix_idx < 0) {
+			err = -ENOMEM;
+			goto irq_err;
+		}
+
+		err = request_irq(adap->msix_info[s->nd_msix_idx].vec,
+				  t4_nondata_intr, 0,
+				  adap->msix_info[s->nd_msix_idx].desc, adap);
 		if (err)
 			goto irq_err;
+
 		err = request_msix_queue_irqs(adap);
-		if (err) {
-			free_irq(adap->msix_info[0].vec, adap);
-			goto irq_err;
-		}
+		if (err)
+			goto irq_err_free_nd_msix;
 	} else {
 		err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
 				  (adap->flags & CXGB4_USING_MSI) ? 0
@@ -2403,11 +2479,13 @@ static int cxgb_up(struct adapter *adap)
 #endif
 	return err;
 
- irq_err:
+irq_err_free_nd_msix:
+	free_irq(adap->msix_info[s->nd_msix_idx].vec, adap);
+irq_err:
 	dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
- freeq:
+freeq:
 	t4_free_sge_resources(adap);
- rel_lock:
+rel_lock:
 	mutex_unlock(&uld_mutex);
 	return err;
 }
@@ -2429,11 +2507,11 @@ static void cxgb_down(struct adapter *adapter)
 /*
  * net_device operations
  */
-static int cxgb_open(struct net_device *dev)
+int cxgb_open(struct net_device *dev)
 {
-	int err;
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
+	int err;
 
 	netif_carrier_off(dev);
 
@@ -2456,7 +2534,7 @@ static int cxgb_open(struct net_device *dev)
 	return err;
 }
 
-static int cxgb_close(struct net_device *dev)
+int cxgb_close(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -3163,8 +3241,33 @@ static int cxgb_setup_tc_cls_u32(struct net_device *dev,
 	}
 }
 
-static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
-				  void *cb_priv)
+static int cxgb_setup_tc_matchall(struct net_device *dev,
+				  struct tc_cls_matchall_offload *cls_matchall,
+				  bool ingress)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!adap->tc_matchall)
+		return -ENOMEM;
+
+	switch (cls_matchall->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return cxgb4_tc_matchall_replace(dev, cls_matchall, ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		return cxgb4_tc_matchall_destroy(dev, cls_matchall, ingress);
+	case TC_CLSMATCHALL_STATS:
+		if (ingress)
+			return cxgb4_tc_matchall_stats(dev, cls_matchall);
+		break;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int cxgb_setup_tc_block_ingress_cb(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
 {
 	struct net_device *dev = cb_priv;
 	struct port_info *pi = netdev2pinfo(dev);
@@ -3185,24 +3288,81 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 		return cxgb_setup_tc_cls_u32(dev, type_data);
 	case TC_SETUP_CLSFLOWER:
 		return cxgb_setup_tc_flower(dev, type_data);
+	case TC_SETUP_CLSMATCHALL:
+		return cxgb_setup_tc_matchall(dev, type_data, true);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
+static int cxgb_setup_tc_block_egress_cb(enum tc_setup_type type,
+					 void *type_data, void *cb_priv)
+{
+	struct net_device *dev = cb_priv;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
+		dev_err(adap->pdev_dev,
+			"Failed to setup tc on port %d. Link Down?\n",
+			pi->port_id);
+		return -EINVAL;
+	}
+
+	if (!tc_cls_can_offload_and_chain0(dev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return cxgb_setup_tc_matchall(dev, type_data, false);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int cxgb_setup_tc_mqprio(struct net_device *dev,
+				struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!is_ethofld(adap) || !adap->tc_mqprio)
+		return -ENOMEM;
+
+	return cxgb4_setup_tc_mqprio(dev, mqprio);
+}
+
 static LIST_HEAD(cxgb_block_cb_list);
 
+static int cxgb_setup_tc_block(struct net_device *dev,
+			       struct flow_block_offload *f)
+{
+	struct port_info *pi = netdev_priv(dev);
+	flow_setup_cb_t *cb;
+	bool ingress_only;
+
+	pi->tc_block_shared = f->block_shared;
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = cxgb_setup_tc_block_egress_cb;
+		ingress_only = false;
+	} else {
+		cb = cxgb_setup_tc_block_ingress_cb;
+		ingress_only = true;
+	}
+
+	return flow_block_cb_setup_simple(f, &cxgb_block_cb_list,
+					  cb, pi, dev, ingress_only);
+}
+
 static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
-	struct port_info *pi = netdev2pinfo(dev);
-
 	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return cxgb_setup_tc_mqprio(dev, type_data);
 	case TC_SETUP_BLOCK:
-		return flow_block_cb_setup_simple(type_data,
-						  &cxgb_block_cb_list,
-						  cxgb_setup_tc_block_cb,
-						  pi, dev, true);
+		return cxgb_setup_tc_block(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4286,14 +4446,14 @@ static struct fw_info *find_fw_info(int chip)
 /*
  * Phase 0 of initialization: contact FW, obtain config, perform basic init.
  */
-static int adap_init0(struct adapter *adap)
+static int adap_init0(struct adapter *adap, int vpd_skip)
 {
-	int ret;
-	u32 v, port_vec;
-	enum dev_state state;
-	u32 params[7], val[7];
 	struct fw_caps_config_cmd caps_cmd;
+	u32 params[7], val[7];
+	enum dev_state state;
+	u32 v, port_vec;
 	int reset = 1;
+	int ret;
 
 	/* Grab Firmware Device Log parameters as early as possible so we have
 	 * access to it for debugging, etc.
@@ -4448,9 +4608,11 @@ static int adap_init0(struct adapter *adap)
 	 * could have FLASHed a new VPD which won't be read by the firmware
 	 * until we do the RESET ...
 	 */
-	ret = t4_get_vpd_params(adap, &adap->params.vpd);
-	if (ret < 0)
-		goto bye;
+	if (!vpd_skip) {
+		ret = t4_get_vpd_params(adap, &adap->params.vpd);
+		if (ret < 0)
+			goto bye;
+	}
 
 	/* Find out what ports are available to us.  Note that we need to do
 	 * this before calling adap_init0_no_config() since it needs nports
@@ -4600,11 +4762,18 @@ static int adap_init0(struct adapter *adap)
 	adap->clipt_start = val[0];
 	adap->clipt_end = val[1];
 
-	/* We don't yet have a PARAMs calls to retrieve the number of Traffic
-	 * Classes supported by the hardware/firmware so we hard code it here
-	 * for now.
-	 */
-	adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+	/* Get the supported number of traffic classes */
+	params[0] = FW_PARAM_DEV(NUM_TM_CLASS);
+	ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params, val);
+	if (ret < 0) {
+		/* We couldn't retrieve the number of Traffic Classes
+		 * supported by the hardware/firmware. So we hard
+		 * code it here.
+		 */
+		adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+	} else {
+		adap->params.nsched_cls = val[0];
+	}
 
 	/* query params related to active filter region */
 	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
@@ -4689,7 +4858,8 @@ static int adap_init0(struct adapter *adap)
 		adap->params.offload = 1;
 
 	if (caps_cmd.ofldcaps ||
-	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
+	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) ||
+	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD))) {
 		/* query offload-related parameters */
 		params[0] = FW_PARAM_DEV(NTID);
 		params[1] = FW_PARAM_PFVF(SERVER_START);
@@ -4731,6 +4901,19 @@ static int adap_init0(struct adapter *adap)
 		} else {
 			adap->num_ofld_uld += 1;
 		}
+
+		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD)) {
+			params[0] = FW_PARAM_PFVF(ETHOFLD_START);
+			params[1] = FW_PARAM_PFVF(ETHOFLD_END);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+					      params, val);
+			if (!ret) {
+				adap->tids.eotid_base = val[0];
+				adap->tids.neotids = min_t(u32, MAX_ATIDS,
+							   val[1] - val[0] + 1);
+				adap->params.ethofld = 1;
+			}
+		}
 	}
 	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
@@ -5050,10 +5233,93 @@ static void eeh_resume(struct pci_dev *pdev)
 	rtnl_unlock();
 }
 
+static void eeh_reset_prepare(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+	int i;
+
+	if (adapter->pf != 4)
+		return;
+
+	adapter->flags &= ~CXGB4_FW_OK;
+
+	notify_ulds(adapter, CXGB4_STATE_DOWN);
+
+	for_each_port(adapter, i)
+		if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+			cxgb_close(adapter->port[i]);
+
+	disable_interrupts(adapter);
+	cxgb4_free_mps_ref_entries(adapter);
+
+	adap_free_hma_mem(adapter);
+
+	if (adapter->flags & CXGB4_FULL_INIT_DONE)
+		cxgb_down(adapter);
+}
+
+static void eeh_reset_done(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+	int err, i;
+
+	if (adapter->pf != 4)
+		return;
+
+	err = t4_wait_dev_ready(adapter->regs);
+	if (err < 0) {
+		dev_err(adapter->pdev_dev,
+			"Device not ready, err %d", err);
+		return;
+	}
+
+	setup_memwin(adapter);
+
+	err = adap_init0(adapter, 1);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Adapter init failed, err %d", err);
+		return;
+	}
+
+	setup_memwin_rdma(adapter);
+
+	if (adapter->flags & CXGB4_FW_OK) {
+		err = t4_port_init(adapter, adapter->pf, adapter->pf, 0);
+		if (err) {
+			dev_err(adapter->pdev_dev,
+				"Port init failed, err %d", err);
+			return;
+		}
+	}
+
+	err = cfg_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Config queues failed, err %d", err);
+		return;
+	}
+
+	cxgb4_init_mps_ref_entries(adapter);
+
+	err = setup_fw_sge_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"FW sge queue allocation failed, err %d", err);
+		return;
+	}
+
+	for_each_port(adapter, i)
+		if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+			cxgb_open(adapter->port[i]);
+}
+
 static const struct pci_error_handlers cxgb4_eeh = {
 	.error_detected = eeh_err_detected,
 	.slot_reset     = eeh_slot_reset,
 	.resume         = eeh_resume,
+	.reset_prepare  = eeh_reset_prepare,
+	.reset_done     = eeh_reset_done,
 };
 
 /* Return true if the Link Configuration supports "High Speeds" (those greater
@@ -5070,26 +5336,25 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 	return high_speeds != 0;
 }
 
-/*
- * Perform default configuration of DMA queues depending on the number and type
+/* Perform default configuration of DMA queues depending on the number and type
  * of ports we found and the number of available CPUs.  Most settings can be
  * modified by the admin prior to actual use.
  */
 static int cfg_queues(struct adapter *adap)
 {
+	u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
+	u32 niqflint, neq, num_ulds;
 	struct sge *s = &adap->sge;
-	int i, n10g = 0, qidx = 0;
-	int niqflint, neq, avail_eth_qsets;
-	int max_eth_qsets = 32;
+	u32 i, n10g = 0, qidx = 0;
 #ifndef CONFIG_CHELSIO_T4_DCB
 	int q10g = 0;
 #endif
 
-	/* Reduce memory usage in kdump environment, disable all offload.
-	 */
+	/* Reduce memory usage in kdump environment, disable all offload. */
 	if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) {
 		adap->params.offload = 0;
 		adap->params.crypto = 0;
+		adap->params.ethofld = 0;
 	}
 
 	/* Calculate the number of Ethernet Queue Sets available based on
@@ -5108,14 +5373,11 @@ static int cfg_queues(struct adapter *adap)
 	if (!(adap->flags & CXGB4_USING_MSIX))
 		niqflint--;
 	neq = adap->params.pfres.neq / 2;
-	avail_eth_qsets = min(niqflint, neq);
+	avail_qsets = min(niqflint, neq);
 
-	if (avail_eth_qsets > max_eth_qsets)
-		avail_eth_qsets = max_eth_qsets;
-
-	if (avail_eth_qsets < adap->params.nports) {
+	if (avail_qsets < adap->params.nports) {
 		dev_err(adap->pdev_dev, "avail_eth_qsets=%d < nports=%d\n",
-			avail_eth_qsets, adap->params.nports);
+			avail_qsets, adap->params.nports);
 		return -ENOMEM;
 	}
 
@@ -5123,6 +5385,7 @@ static int cfg_queues(struct adapter *adap)
 	for_each_port(adap, i)
 		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 
+	avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS);
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging support we need to be able to support up
 	 * to 8 Traffic Priorities; each of which will be assigned to its
@@ -5142,8 +5405,7 @@ static int cfg_queues(struct adapter *adap)
 		qidx += pi->nqsets;
 	}
 #else /* !CONFIG_CHELSIO_T4_DCB */
-	/*
-	 * We default to 1 queue per non-10G port and up to # of cores queues
+	/* We default to 1 queue per non-10G port and up to # of cores queues
 	 * per 10G port.
 	 */
 	if (n10g)
@@ -5165,19 +5427,40 @@ static int cfg_queues(struct adapter *adap)
 
 	s->ethqsets = qidx;
 	s->max_ethqsets = qidx;   /* MSI-X may lower it later */
+	avail_qsets -= qidx;
 
 	if (is_uld(adap)) {
-		/*
-		 * For offload we use 1 queue/channel if all ports are up to 1G,
+		/* For offload we use 1 queue/channel if all ports are up to 1G,
 		 * otherwise we divide all available queues amongst the channels
 		 * capped by the number of available cores.
 		 */
-		if (n10g) {
-			i = min_t(int, MAX_OFLD_QSETS, num_online_cpus());
-			s->ofldqsets = roundup(i, adap->params.nports);
-		} else {
+		num_ulds = adap->num_uld + adap->num_ofld_uld;
+		i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus());
+		avail_uld_qsets = roundup(i, adap->params.nports);
+		if (avail_qsets < num_ulds * adap->params.nports) {
+			adap->params.offload = 0;
+			adap->params.crypto = 0;
+			s->ofldqsets = 0;
+		} else if (avail_qsets < num_ulds * avail_uld_qsets || !n10g) {
 			s->ofldqsets = adap->params.nports;
+		} else {
+			s->ofldqsets = avail_uld_qsets;
+		}
+
+		avail_qsets -= num_ulds * s->ofldqsets;
+	}
+
+	/* ETHOFLD Queues used for QoS offload should follow same
+	 * allocation scheme as normal Ethernet Queues.
+	 */
+	if (is_ethofld(adap)) {
+		if (avail_qsets < s->max_ethqsets) {
+			adap->params.ethofld = 0;
+			s->eoqsets = 0;
+		} else {
+			s->eoqsets = s->max_ethqsets;
 		}
+		avail_qsets -= s->eoqsets;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -5230,42 +5513,62 @@ static void reduce_ethqs(struct adapter *adap, int n)
 	}
 }
 
-static int get_msix_info(struct adapter *adap)
+static int alloc_msix_info(struct adapter *adap, u32 num_vec)
 {
-	struct uld_msix_info *msix_info;
-	unsigned int max_ingq = 0;
-
-	if (is_offload(adap))
-		max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
-	if (is_pci_uld(adap))
-		max_ingq += MAX_OFLD_QSETS * adap->num_uld;
-
-	if (!max_ingq)
-		goto out;
+	struct msix_info *msix_info;
 
-	msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+	msix_info = kcalloc(num_vec, sizeof(*msix_info), GFP_KERNEL);
 	if (!msix_info)
 		return -ENOMEM;
 
-	adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
-						 sizeof(long), GFP_KERNEL);
-	if (!adap->msix_bmap_ulds.msix_bmap) {
+	adap->msix_bmap.msix_bmap = kcalloc(BITS_TO_LONGS(num_vec),
+					    sizeof(long), GFP_KERNEL);
+	if (!adap->msix_bmap.msix_bmap) {
 		kfree(msix_info);
 		return -ENOMEM;
 	}
-	spin_lock_init(&adap->msix_bmap_ulds.lock);
-	adap->msix_info_ulds = msix_info;
-out:
+
+	spin_lock_init(&adap->msix_bmap.lock);
+	adap->msix_bmap.mapsize = num_vec;
+
+	adap->msix_info = msix_info;
 	return 0;
 }
 
 static void free_msix_info(struct adapter *adap)
 {
-	if (!(adap->num_uld && adap->num_ofld_uld))
-		return;
+	kfree(adap->msix_bmap.msix_bmap);
+	kfree(adap->msix_info);
+}
 
-	kfree(adap->msix_info_ulds);
-	kfree(adap->msix_bmap_ulds.msix_bmap);
+int cxgb4_get_msix_idx_from_bmap(struct adapter *adap)
+{
+	struct msix_bmap *bmap = &adap->msix_bmap;
+	unsigned int msix_idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bmap->lock, flags);
+	msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
+	if (msix_idx < bmap->mapsize) {
+		__set_bit(msix_idx, bmap->msix_bmap);
+	} else {
+		spin_unlock_irqrestore(&bmap->lock, flags);
+		return -ENOSPC;
+	}
+
+	spin_unlock_irqrestore(&bmap->lock, flags);
+	return msix_idx;
+}
+
+void cxgb4_free_msix_idx_in_bmap(struct adapter *adap,
+				 unsigned int msix_idx)
+{
+	struct msix_bmap *bmap = &adap->msix_bmap;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bmap->lock, flags);
+	__clear_bit(msix_idx, bmap->msix_bmap);
+	spin_unlock_irqrestore(&bmap->lock, flags);
 }
 
 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
@@ -5273,88 +5576,161 @@ static void free_msix_info(struct adapter *adap)
 
 static int enable_msix(struct adapter *adap)
 {
-	int ofld_need = 0, uld_need = 0;
-	int i, j, want, need, allocated;
+	u32 eth_need, uld_need = 0, ethofld_need = 0;
+	u32 ethqsets = 0, ofldqsets = 0, eoqsets = 0;
+	u8 num_uld = 0, nchan = adap->params.nports;
+	u32 i, want, need, num_vec;
 	struct sge *s = &adap->sge;
-	unsigned int nchan = adap->params.nports;
 	struct msix_entry *entries;
-	int max_ingq = MAX_INGQ;
-
-	if (is_pci_uld(adap))
-		max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
-	if (is_offload(adap))
-		max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
-	entries = kmalloc_array(max_ingq + 1, sizeof(*entries),
-				GFP_KERNEL);
-	if (!entries)
-		return -ENOMEM;
-
-	/* map for msix */
-	if (get_msix_info(adap)) {
-		adap->params.offload = 0;
-		adap->params.crypto = 0;
-	}
-
-	for (i = 0; i < max_ingq + 1; ++i)
-		entries[i].entry = i;
+	struct port_info *pi;
+	int allocated, ret;
 
-	want = s->max_ethqsets + EXTRA_VECS;
-	if (is_offload(adap)) {
-		want += adap->num_ofld_uld * s->ofldqsets;
-		ofld_need = adap->num_ofld_uld * nchan;
-	}
-	if (is_pci_uld(adap)) {
-		want += adap->num_uld * s->ofldqsets;
-		uld_need = adap->num_uld * nchan;
-	}
+	want = s->max_ethqsets;
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
 	 * each port.
 	 */
-	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
+	need = 8 * nchan;
 #else
-	need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
+	need = nchan;
 #endif
+	eth_need = need;
+	if (is_uld(adap)) {
+		num_uld = adap->num_ofld_uld + adap->num_uld;
+		want += num_uld * s->ofldqsets;
+		uld_need = num_uld * nchan;
+		need += uld_need;
+	}
+
+	if (is_ethofld(adap)) {
+		want += s->eoqsets;
+		ethofld_need = eth_need;
+		need += ethofld_need;
+	}
+
+	want += EXTRA_VECS;
+	need += EXTRA_VECS;
+
+	entries = kmalloc_array(want, sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < want; i++)
+		entries[i].entry = i;
+
 	allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
 	if (allocated < 0) {
-		dev_info(adap->pdev_dev, "not enough MSI-X vectors left,"
-			 " not using MSI-X\n");
-		kfree(entries);
-		return allocated;
+		/* Disable offload and attempt to get vectors for NIC
+		 * only mode.
+		 */
+		want = s->max_ethqsets + EXTRA_VECS;
+		need = eth_need + EXTRA_VECS;
+		allocated = pci_enable_msix_range(adap->pdev, entries,
+						  need, want);
+		if (allocated < 0) {
+			dev_info(adap->pdev_dev,
+				 "Disabling MSI-X due to insufficient MSI-X vectors\n");
+			ret = allocated;
+			goto out_free;
+		}
+
+		dev_info(adap->pdev_dev,
+			 "Disabling offload due to insufficient MSI-X vectors\n");
+		adap->params.offload = 0;
+		adap->params.crypto = 0;
+		adap->params.ethofld = 0;
+		s->ofldqsets = 0;
+		s->eoqsets = 0;
+		uld_need = 0;
+		ethofld_need = 0;
+	}
+
+	num_vec = allocated;
+	if (num_vec < want) {
+		/* Distribute available vectors to the various queue groups.
+		 * Every group gets its minimum requirement and NIC gets top
+		 * priority for leftovers.
+		 */
+		ethqsets = eth_need;
+		if (is_uld(adap))
+			ofldqsets = nchan;
+		if (is_ethofld(adap))
+			eoqsets = ethofld_need;
+
+		num_vec -= need;
+		while (num_vec) {
+			if (num_vec < eth_need + ethofld_need ||
+			    ethqsets > s->max_ethqsets)
+				break;
+
+			for_each_port(adap, i) {
+				pi = adap2pinfo(adap, i);
+				if (pi->nqsets < 2)
+					continue;
+
+				ethqsets++;
+				num_vec--;
+				if (ethofld_need) {
+					eoqsets++;
+					num_vec--;
+				}
+			}
+		}
+
+		if (is_uld(adap)) {
+			while (num_vec) {
+				if (num_vec < uld_need ||
+				    ofldqsets > s->ofldqsets)
+					break;
+
+				ofldqsets++;
+				num_vec -= uld_need;
+			}
+		}
+	} else {
+		ethqsets = s->max_ethqsets;
+		if (is_uld(adap))
+			ofldqsets = s->ofldqsets;
+		if (is_ethofld(adap))
+			eoqsets = s->eoqsets;
 	}
 
-	/* Distribute available vectors to the various queue groups.
-	 * Every group gets its minimum requirement and NIC gets top
-	 * priority for leftovers.
-	 */
-	i = allocated - EXTRA_VECS - ofld_need - uld_need;
-	if (i < s->max_ethqsets) {
-		s->max_ethqsets = i;
-		if (i < s->ethqsets)
-			reduce_ethqs(adap, i);
+	if (ethqsets < s->max_ethqsets) {
+		s->max_ethqsets = ethqsets;
+		reduce_ethqs(adap, ethqsets);
 	}
+
 	if (is_uld(adap)) {
-		if (allocated < want)
-			s->nqs_per_uld = nchan;
-		else
-			s->nqs_per_uld = s->ofldqsets;
+		s->ofldqsets = ofldqsets;
+		s->nqs_per_uld = s->ofldqsets;
 	}
 
-	for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
+	if (is_ethofld(adap))
+		s->eoqsets = eoqsets;
+
+	/* map for msix */
+	ret = alloc_msix_info(adap, allocated);
+	if (ret)
+		goto out_disable_msix;
+
+	for (i = 0; i < allocated; i++) {
 		adap->msix_info[i].vec = entries[i].vector;
-	if (is_uld(adap)) {
-		for (j = 0 ; i < allocated; ++i, j++) {
-			adap->msix_info_ulds[j].vec = entries[i].vector;
-			adap->msix_info_ulds[j].idx = i;
-		}
-		adap->msix_bmap_ulds.mapsize = j;
+		adap->msix_info[i].idx = i;
 	}
-	dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
-		 "nic %d per uld %d\n",
-		 allocated, s->max_ethqsets, s->nqs_per_uld);
+
+	dev_info(adap->pdev_dev,
+		 "%d MSI-X vectors allocated, nic %d eoqsets %d per uld %d\n",
+		 allocated, s->max_ethqsets, s->eoqsets, s->nqs_per_uld);
 
 	kfree(entries);
 	return 0;
+
+out_disable_msix:
+	pci_disable_msix(adap->pdev);
+
+out_free:
+	kfree(entries);
+	return ret;
 }
 
 #undef EXTRA_VECS
@@ -5441,6 +5817,8 @@ static void free_some_resources(struct adapter *adapter)
 	kvfree(adapter->srq);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
+	cxgb4_cleanup_tc_matchall(adapter);
+	cxgb4_cleanup_tc_mqprio(adapter);
 	cxgb4_cleanup_tc_flower(adapter);
 	cxgb4_cleanup_tc_u32(adapter);
 	kfree(adapter->sge.egr_map);
@@ -5466,7 +5844,8 @@ static void free_some_resources(struct adapter *adapter)
 		t4_fw_bye(adapter, adapter->pf);
 }
 
-#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
+		   NETIF_F_GSO_UDP_L4)
 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
 		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 #define SEGMENT_SIZE 128
@@ -5837,7 +6216,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	setup_memwin(adapter);
-	err = adap_init0(adapter);
+	err = adap_init0(adapter, 0);
 #ifdef CONFIG_DEBUG_FS
 	bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
 #endif
@@ -5855,8 +6234,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_LIST_HEAD(&adapter->mac_hlist);
 
 	for_each_port(adapter, i) {
+		/* For supporting MQPRIO Offload, need some extra
+		 * queues for each ETHOFLD TIDs. Keep it equal to
+		 * MAX_ATIDs for now. Once we connect to firmware
+		 * later and query the EOTID params, we'll come to
+		 * know the actual # of EOTIDs supported.
+		 */
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
-					   MAX_ETH_QSETS);
+					   MAX_ETH_QSETS + MAX_ATIDS);
 		if (!netdev) {
 			err = -ENOMEM;
 			goto out_free_dev;
@@ -6004,6 +6389,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		if (cxgb4_init_tc_flower(adapter))
 			dev_warn(&pdev->dev,
 				 "could not offload tc flower, continuing\n");
+
+		if (cxgb4_init_tc_mqprio(adapter))
+			dev_warn(&pdev->dev,
+				 "could not offload tc mqprio, continuing\n");
+
+		if (cxgb4_init_tc_matchall(adapter))
+			dev_warn(&pdev->dev,
+				 "could not offload tc matchall, continuing\n");
 	}
 
 	if (is_offload(adapter) || is_hashfilter(adapter)) {
@@ -6040,6 +6433,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_free_dev;
 
+	err = setup_non_data_intr(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"Non Data interrupt allocation failed, err: %d\n", err);
+		goto out_free_dev;
+	}
+
 	err = setup_fw_sge_queues(adapter);
 	if (err) {
 		dev_err(adapter->pdev_dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index e447976bdd3e..0fa80bef575d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -378,15 +378,14 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
 	}
 }
 
-static void cxgb4_process_flow_actions(struct net_device *in,
-				       struct flow_cls_offload *cls,
-				       struct ch_filter_specification *fs)
+void cxgb4_process_flow_actions(struct net_device *in,
+				struct flow_action *actions,
+				struct ch_filter_specification *fs)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_action_entry *act;
 	int i;
 
-	flow_action_for_each(i, act, &rule->action) {
+	flow_action_for_each(i, act, actions) {
 		switch (act->id) {
 		case FLOW_ACTION_ACCEPT:
 			fs->action = FILTER_PASS;
@@ -544,17 +543,16 @@ static bool valid_pedit_action(struct net_device *dev,
 	return true;
 }
 
-static int cxgb4_validate_flow_actions(struct net_device *dev,
-				       struct flow_cls_offload *cls)
+int cxgb4_validate_flow_actions(struct net_device *dev,
+				struct flow_action *actions)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_action_entry *act;
 	bool act_redir = false;
 	bool act_pedit = false;
 	bool act_vlan = false;
 	int i;
 
-	flow_action_for_each(i, act, &rule->action) {
+	flow_action_for_each(i, act, actions) {
 		switch (act->id) {
 		case FLOW_ACTION_ACCEPT:
 		case FLOW_ACTION_DROP:
@@ -636,14 +634,15 @@ static int cxgb4_validate_flow_actions(struct net_device *dev,
 int cxgb4_tc_flower_replace(struct net_device *dev,
 			    struct flow_cls_offload *cls)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct netlink_ext_ack *extack = cls->common.extack;
 	struct adapter *adap = netdev2adap(dev);
 	struct ch_tc_flower_entry *ch_flower;
 	struct ch_filter_specification *fs;
 	struct filter_ctx ctx;
-	int fidx;
-	int ret;
+	int fidx, ret;
 
-	if (cxgb4_validate_flow_actions(dev, cls))
+	if (cxgb4_validate_flow_actions(dev, &rule->action))
 		return -EOPNOTSUPP;
 
 	if (cxgb4_validate_flow_match(dev, cls))
@@ -658,20 +657,41 @@ int cxgb4_tc_flower_replace(struct net_device *dev,
 	fs = &ch_flower->fs;
 	fs->hitcnts = 1;
 	cxgb4_process_flow_match(dev, cls, fs);
-	cxgb4_process_flow_actions(dev, cls, fs);
+	cxgb4_process_flow_actions(dev, &rule->action, fs);
 
 	fs->hash = is_filter_exact_match(adap, fs);
 	if (fs->hash) {
 		fidx = 0;
 	} else {
-		fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET);
-		if (fidx < 0) {
-			netdev_err(dev, "%s: No fidx for offload.\n", __func__);
+		u8 inet_family;
+
+		inet_family = fs->type ? PF_INET6 : PF_INET;
+
+		/* Note that TC uses prio 0 to indicate stack to
+		 * generate automatic prio and hence doesn't pass prio
+		 * 0 to driver. However, the hardware TCAM index
+		 * starts from 0. Hence, the -1 here.
+		 */
+		if (cls->common.prio <= adap->tids.nftids)
+			fidx = cls->common.prio - 1;
+		else
+			fidx = cxgb4_get_free_ftid(dev, inet_family);
+
+		/* Only insert FLOWER rule if its priority doesn't
+		 * conflict with existing rules in the LETCAM.
+		 */
+		if (fidx < 0 ||
+		    !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "No free LETCAM index available");
 			ret = -ENOMEM;
 			goto free_entry;
 		}
 	}
 
+	fs->tc_prio = cls->common.prio;
+	fs->tc_cookie = cls->cookie;
+
 	init_completion(&ctx.completion);
 	ret = __cxgb4_set_filter(dev, fidx, fs, &ctx);
 	if (ret) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index eb4c95248baf..e132516e9868 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -108,6 +108,12 @@ struct ch_tc_pedit_fields {
 #define PEDIT_TCP_SPORT_DPORT		0x0
 #define PEDIT_UDP_SPORT_DPORT		0x0
 
+void cxgb4_process_flow_actions(struct net_device *in,
+				struct flow_action *actions,
+				struct ch_filter_specification *fs);
+int cxgb4_validate_flow_actions(struct net_device *dev,
+				struct flow_action *actions);
+
 int cxgb4_tc_flower_replace(struct net_device *dev,
 			    struct flow_cls_offload *cls);
 int cxgb4_tc_flower_destroy(struct net_device *dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
new file mode 100644
index 000000000000..102b370fbd3e
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#include "cxgb4.h"
+#include "cxgb4_tc_matchall.h"
+#include "sched.h"
+#include "cxgb4_uld.h"
+#include "cxgb4_filter.h"
+#include "cxgb4_tc_flower.h"
+
+static int cxgb4_matchall_egress_validate(struct net_device *dev,
+					  struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct flow_action *actions = &cls->rule->action;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct flow_action_entry *entry;
+	u64 max_link_rate;
+	u32 i, speed;
+	int ret;
+
+	if (!flow_action_has_entries(actions)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Egress MATCHALL offload needs at least 1 policing action");
+		return -EINVAL;
+	} else if (!flow_offload_has_one_action(actions)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Egress MATCHALL offload only supports 1 policing action");
+		return -EINVAL;
+	} else if (pi->tc_block_shared) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Egress MATCHALL offload not supported with shared blocks");
+		return -EINVAL;
+	}
+
+	ret = t4_get_link_params(pi, NULL, &speed, NULL);
+	if (ret) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Failed to get max speed supported by the link");
+		return -EINVAL;
+	}
+
+	/* Convert from Mbps to bps */
+	max_link_rate = (u64)speed * 1000 * 1000;
+
+	flow_action_for_each(i, entry, actions) {
+		switch (entry->id) {
+		case FLOW_ACTION_POLICE:
+			/* Convert bytes per second to bits per second */
+			if (entry->police.rate_bytes_ps * 8 > max_link_rate) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Specified policing max rate is larger than underlying link speed");
+				return -ERANGE;
+			}
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only policing action supported with Egress MATCHALL offload");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int cxgb4_matchall_alloc_tc(struct net_device *dev,
+				   struct tc_cls_matchall_offload *cls)
+{
+	struct ch_sched_params p = {
+		.type = SCHED_CLASS_TYPE_PACKET,
+		.u.params.level = SCHED_CLASS_LEVEL_CH_RL,
+		.u.params.mode = SCHED_CLASS_MODE_CLASS,
+		.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS,
+		.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS,
+		.u.params.class = SCHED_CLS_NONE,
+		.u.params.minrate = 0,
+		.u.params.weight = 0,
+		.u.params.pktsize = dev->mtu,
+	};
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct flow_action_entry *entry;
+	struct sched_class *e;
+	u32 i;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+
+	flow_action_for_each(i, entry, &cls->rule->action)
+		if (entry->id == FLOW_ACTION_POLICE)
+			break;
+
+	/* Convert from bytes per second to Kbps */
+	p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000);
+	p.u.params.channel = pi->tx_chan;
+	e = cxgb4_sched_class_alloc(dev, &p);
+	if (!e) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "No free traffic class available for policing action");
+		return -ENOMEM;
+	}
+
+	tc_port_matchall->egress.hwtc = e->idx;
+	tc_port_matchall->egress.cookie = cls->cookie;
+	tc_port_matchall->egress.state = CXGB4_MATCHALL_STATE_ENABLED;
+	return 0;
+}
+
+static void cxgb4_matchall_free_tc(struct net_device *dev)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	cxgb4_sched_class_free(dev, tc_port_matchall->egress.hwtc);
+
+	tc_port_matchall->egress.hwtc = SCHED_CLS_NONE;
+	tc_port_matchall->egress.cookie = 0;
+	tc_port_matchall->egress.state = CXGB4_MATCHALL_STATE_DISABLED;
+}
+
+static int cxgb4_matchall_alloc_filter(struct net_device *dev,
+				       struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_filter_specification *fs;
+	int ret, fidx;
+
+	/* Note that TC uses prio 0 to indicate stack to generate
+	 * automatic prio and hence doesn't pass prio 0 to driver.
+	 * However, the hardware TCAM index starts from 0. Hence, the
+	 * -1 here. 1 slot is enough to create a wildcard matchall
+	 * VIID rule.
+	 */
+	if (cls->common.prio <= adap->tids.nftids)
+		fidx = cls->common.prio - 1;
+	else
+		fidx = cxgb4_get_free_ftid(dev, PF_INET);
+
+	/* Only insert MATCHALL rule if its priority doesn't conflict
+	 * with existing rules in the LETCAM.
+	 */
+	if (fidx < 0 ||
+	    !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "No free LETCAM index available");
+		return -ENOMEM;
+	}
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	fs = &tc_port_matchall->ingress.fs;
+	memset(fs, 0, sizeof(*fs));
+
+	fs->tc_prio = cls->common.prio;
+	fs->tc_cookie = cls->cookie;
+	fs->hitcnts = 1;
+
+	fs->val.pfvf_vld = 1;
+	fs->val.pf = adap->pf;
+	fs->val.vf = pi->vin;
+
+	cxgb4_process_flow_actions(dev, &cls->rule->action, fs);
+
+	ret = cxgb4_set_filter(dev, fidx, fs);
+	if (ret)
+		return ret;
+
+	tc_port_matchall->ingress.tid = fidx;
+	tc_port_matchall->ingress.state = CXGB4_MATCHALL_STATE_ENABLED;
+	return 0;
+}
+
+static int cxgb4_matchall_free_filter(struct net_device *dev)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+
+	ret = cxgb4_del_filter(dev, tc_port_matchall->ingress.tid,
+			       &tc_port_matchall->ingress.fs);
+	if (ret)
+		return ret;
+
+	tc_port_matchall->ingress.packets = 0;
+	tc_port_matchall->ingress.bytes = 0;
+	tc_port_matchall->ingress.last_used = 0;
+	tc_port_matchall->ingress.tid = 0;
+	tc_port_matchall->ingress.state = CXGB4_MATCHALL_STATE_DISABLED;
+	return 0;
+}
+
+int cxgb4_tc_matchall_replace(struct net_device *dev,
+			      struct tc_cls_matchall_offload *cls_matchall,
+			      bool ingress)
+{
+	struct netlink_ext_ack *extack = cls_matchall->common.extack;
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	int ret;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	if (ingress) {
+		if (tc_port_matchall->ingress.state ==
+		    CXGB4_MATCHALL_STATE_ENABLED) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only 1 Ingress MATCHALL can be offloaded");
+			return -ENOMEM;
+		}
+
+		ret = cxgb4_validate_flow_actions(dev,
+						  &cls_matchall->rule->action);
+		if (ret)
+			return ret;
+
+		return cxgb4_matchall_alloc_filter(dev, cls_matchall);
+	}
+
+	if (tc_port_matchall->egress.state == CXGB4_MATCHALL_STATE_ENABLED) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only 1 Egress MATCHALL can be offloaded");
+		return -ENOMEM;
+	}
+
+	ret = cxgb4_matchall_egress_validate(dev, cls_matchall);
+	if (ret)
+		return ret;
+
+	return cxgb4_matchall_alloc_tc(dev, cls_matchall);
+}
+
+int cxgb4_tc_matchall_destroy(struct net_device *dev,
+			      struct tc_cls_matchall_offload *cls_matchall,
+			      bool ingress)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	if (ingress) {
+		if (cls_matchall->cookie !=
+		    tc_port_matchall->ingress.fs.tc_cookie)
+			return -ENOENT;
+
+		return cxgb4_matchall_free_filter(dev);
+	}
+
+	if (cls_matchall->cookie != tc_port_matchall->egress.cookie)
+		return -ENOENT;
+
+	cxgb4_matchall_free_tc(dev);
+	return 0;
+}
+
+int cxgb4_tc_matchall_stats(struct net_device *dev,
+			    struct tc_cls_matchall_offload *cls_matchall)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u64 packets, bytes;
+	int ret;
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	if (tc_port_matchall->ingress.state == CXGB4_MATCHALL_STATE_DISABLED)
+		return -ENOENT;
+
+	ret = cxgb4_get_filter_counters(dev, tc_port_matchall->ingress.tid,
+					&packets, &bytes,
+					tc_port_matchall->ingress.fs.hash);
+	if (ret)
+		return ret;
+
+	if (tc_port_matchall->ingress.packets != packets) {
+		flow_stats_update(&cls_matchall->stats,
+				  bytes - tc_port_matchall->ingress.bytes,
+				  packets - tc_port_matchall->ingress.packets,
+				  tc_port_matchall->ingress.last_used);
+
+		tc_port_matchall->ingress.packets = packets;
+		tc_port_matchall->ingress.bytes = bytes;
+		tc_port_matchall->ingress.last_used = jiffies;
+	}
+
+	return 0;
+}
+
+static void cxgb4_matchall_disable_offload(struct net_device *dev)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	tc_port_matchall = &adap->tc_matchall->port_matchall[pi->port_id];
+	if (tc_port_matchall->egress.state == CXGB4_MATCHALL_STATE_ENABLED)
+		cxgb4_matchall_free_tc(dev);
+
+	if (tc_port_matchall->ingress.state == CXGB4_MATCHALL_STATE_ENABLED)
+		cxgb4_matchall_free_filter(dev);
+}
+
+int cxgb4_init_tc_matchall(struct adapter *adap)
+{
+	struct cxgb4_tc_port_matchall *tc_port_matchall;
+	struct cxgb4_tc_matchall *tc_matchall;
+	int ret;
+
+	tc_matchall = kzalloc(sizeof(*tc_matchall), GFP_KERNEL);
+	if (!tc_matchall)
+		return -ENOMEM;
+
+	tc_port_matchall = kcalloc(adap->params.nports,
+				   sizeof(*tc_port_matchall),
+				   GFP_KERNEL);
+	if (!tc_port_matchall) {
+		ret = -ENOMEM;
+		goto out_free_matchall;
+	}
+
+	tc_matchall->port_matchall = tc_port_matchall;
+	adap->tc_matchall = tc_matchall;
+	return 0;
+
+out_free_matchall:
+	kfree(tc_matchall);
+	return ret;
+}
+
+void cxgb4_cleanup_tc_matchall(struct adapter *adap)
+{
+	u8 i;
+
+	if (adap->tc_matchall) {
+		if (adap->tc_matchall->port_matchall) {
+			for (i = 0; i < adap->params.nports; i++) {
+				struct net_device *dev = adap->port[i];
+
+				if (dev)
+					cxgb4_matchall_disable_offload(dev);
+			}
+			kfree(adap->tc_matchall->port_matchall);
+		}
+		kfree(adap->tc_matchall);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h
new file mode 100644
index 000000000000..ab6b5683dfd3
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CXGB4_TC_MATCHALL_H__
+#define __CXGB4_TC_MATCHALL_H__
+
+#include <net/pkt_cls.h>
+
+enum cxgb4_matchall_state {
+	CXGB4_MATCHALL_STATE_DISABLED = 0,
+	CXGB4_MATCHALL_STATE_ENABLED,
+};
+
+struct cxgb4_matchall_egress_entry {
+	enum cxgb4_matchall_state state; /* Current MATCHALL offload state */
+	u8 hwtc; /* Traffic class bound to port */
+	u64 cookie; /* Used to identify the MATCHALL rule offloaded */
+};
+
+struct cxgb4_matchall_ingress_entry {
+	enum cxgb4_matchall_state state; /* Current MATCHALL offload state */
+	u32 tid; /* Index to hardware filter entry */
+	struct ch_filter_specification fs; /* Filter entry */
+	u64 bytes; /* # of bytes hitting the filter */
+	u64 packets; /* # of packets hitting the filter */
+	u64 last_used; /* Last updated jiffies time */
+};
+
+struct cxgb4_tc_port_matchall {
+	struct cxgb4_matchall_egress_entry egress; /* Egress offload info */
+	struct cxgb4_matchall_ingress_entry ingress; /* Ingress offload info */
+};
+
+struct cxgb4_tc_matchall {
+	struct cxgb4_tc_port_matchall *port_matchall; /* Per port entry */
+};
+
+int cxgb4_tc_matchall_replace(struct net_device *dev,
+			      struct tc_cls_matchall_offload *cls_matchall,
+			      bool ingress);
+int cxgb4_tc_matchall_destroy(struct net_device *dev,
+			      struct tc_cls_matchall_offload *cls_matchall,
+			      bool ingress);
+int cxgb4_tc_matchall_stats(struct net_device *dev,
+			    struct tc_cls_matchall_offload *cls_matchall);
+
+int cxgb4_init_tc_matchall(struct adapter *adap);
+void cxgb4_cleanup_tc_matchall(struct adapter *adap);
+#endif /* __CXGB4_TC_MATCHALL_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
new file mode 100644
index 000000000000..477973d2e341
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#include "cxgb4.h"
+#include "cxgb4_tc_mqprio.h"
+#include "sched.h"
+
+static int cxgb4_mqprio_validate(struct net_device *dev,
+				 struct tc_mqprio_qopt_offload *mqprio)
+{
+	u64 min_rate = 0, max_rate = 0, max_link_rate;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u32 speed, qcount = 0, qoffset = 0;
+	int ret;
+	u8 i;
+
+	if (!mqprio->qopt.num_tc)
+		return 0;
+
+	if (mqprio->qopt.hw != TC_MQPRIO_HW_OFFLOAD_TCS) {
+		netdev_err(dev, "Only full TC hardware offload is supported\n");
+		return -EINVAL;
+	} else if (mqprio->mode != TC_MQPRIO_MODE_CHANNEL) {
+		netdev_err(dev, "Only channel mode offload is supported\n");
+		return -EINVAL;
+	} else if (mqprio->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+		netdev_err(dev,	"Only bandwidth rate shaper supported\n");
+		return -EINVAL;
+	} else if (mqprio->qopt.num_tc > adap->params.nsched_cls) {
+		netdev_err(dev,
+			   "Only %u traffic classes supported by hardware\n",
+			   adap->params.nsched_cls);
+		return -ERANGE;
+	}
+
+	ret = t4_get_link_params(pi, NULL, &speed, NULL);
+	if (ret) {
+		netdev_err(dev, "Failed to get link speed, ret: %d\n", ret);
+		return -EINVAL;
+	}
+
+	/* Convert from Mbps to bps */
+	max_link_rate = (u64)speed * 1000 * 1000;
+
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qoffset = max_t(u16, mqprio->qopt.offset[i], qoffset);
+		qcount += mqprio->qopt.count[i];
+
+		/* Convert byte per second to bits per second */
+		min_rate += (mqprio->min_rate[i] * 8);
+		max_rate += (mqprio->max_rate[i] * 8);
+	}
+
+	if (qoffset >= adap->tids.neotids || qcount > adap->tids.neotids)
+		return -ENOMEM;
+
+	if (min_rate > max_link_rate || max_rate > max_link_rate) {
+		netdev_err(dev,
+			   "Total Min/Max (%llu/%llu) Rate > supported (%llu)\n",
+			   min_rate, max_rate, max_link_rate);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cxgb4_init_eosw_txq(struct net_device *dev,
+			       struct sge_eosw_txq *eosw_txq,
+			       u32 eotid, u32 hwqid)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct tx_sw_desc *ring;
+
+	memset(eosw_txq, 0, sizeof(*eosw_txq));
+
+	ring = kcalloc(CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM,
+		       sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	eosw_txq->desc = ring;
+	eosw_txq->ndesc = CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM;
+	spin_lock_init(&eosw_txq->lock);
+	eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+	eosw_txq->eotid = eotid;
+	eosw_txq->hwtid = adap->tids.eotid_base + eosw_txq->eotid;
+	eosw_txq->cred = adap->params.ofldq_wr_cred;
+	eosw_txq->hwqid = hwqid;
+	eosw_txq->netdev = dev;
+	tasklet_init(&eosw_txq->qresume_tsk, cxgb4_ethofld_restart,
+		     (unsigned long)eosw_txq);
+	return 0;
+}
+
+static void cxgb4_clean_eosw_txq(struct net_device *dev,
+				 struct sge_eosw_txq *eosw_txq)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	cxgb4_eosw_txq_free_desc(adap, eosw_txq, eosw_txq->ndesc);
+	eosw_txq->pidx = 0;
+	eosw_txq->last_pidx = 0;
+	eosw_txq->cidx = 0;
+	eosw_txq->last_cidx = 0;
+	eosw_txq->flowc_idx = 0;
+	eosw_txq->inuse = 0;
+	eosw_txq->cred = adap->params.ofldq_wr_cred;
+	eosw_txq->ncompl = 0;
+	eosw_txq->last_compl = 0;
+	eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+}
+
+static void cxgb4_free_eosw_txq(struct net_device *dev,
+				struct sge_eosw_txq *eosw_txq)
+{
+	spin_lock_bh(&eosw_txq->lock);
+	cxgb4_clean_eosw_txq(dev, eosw_txq);
+	kfree(eosw_txq->desc);
+	spin_unlock_bh(&eosw_txq->lock);
+	tasklet_kill(&eosw_txq->qresume_tsk);
+}
+
+static int cxgb4_mqprio_alloc_hw_resources(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_ofld_rxq *eorxq;
+	struct sge_eohw_txq *eotxq;
+	int ret, msix = 0;
+	u32 i;
+
+	/* Allocate ETHOFLD hardware queue structures if not done already */
+	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
+		adap->sge.eohw_rxq = kcalloc(adap->sge.eoqsets,
+					     sizeof(struct sge_ofld_rxq),
+					     GFP_KERNEL);
+		if (!adap->sge.eohw_rxq)
+			return -ENOMEM;
+
+		adap->sge.eohw_txq = kcalloc(adap->sge.eoqsets,
+					     sizeof(struct sge_eohw_txq),
+					     GFP_KERNEL);
+		if (!adap->sge.eohw_txq) {
+			kfree(adap->sge.eohw_rxq);
+			return -ENOMEM;
+		}
+	}
+
+	if (!(adap->flags & CXGB4_USING_MSIX))
+		msix = -((int)adap->sge.intrq.abs_id + 1);
+
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		/* Allocate Rxqs for receiving ETHOFLD Tx completions */
+		if (msix >= 0) {
+			msix = cxgb4_get_msix_idx_from_bmap(adap);
+			if (msix < 0) {
+				ret = msix;
+				goto out_free_queues;
+			}
+
+			eorxq->msix = &adap->msix_info[msix];
+			snprintf(eorxq->msix->desc,
+				 sizeof(eorxq->msix->desc),
+				 "%s-eorxq%d", dev->name, i);
+		}
+
+		init_rspq(adap, &eorxq->rspq,
+			  CXGB4_EOHW_RXQ_DEFAULT_INTR_USEC,
+			  CXGB4_EOHW_RXQ_DEFAULT_PKT_CNT,
+			  CXGB4_EOHW_RXQ_DEFAULT_DESC_NUM,
+			  CXGB4_EOHW_RXQ_DEFAULT_DESC_SIZE);
+
+		eorxq->fl.size = CXGB4_EOHW_FLQ_DEFAULT_DESC_NUM;
+
+		ret = t4_sge_alloc_rxq(adap, &eorxq->rspq, false,
+				       dev, msix, &eorxq->fl,
+				       cxgb4_ethofld_rx_handler,
+				       NULL, 0);
+		if (ret)
+			goto out_free_queues;
+
+		/* Allocate ETHOFLD hardware Txqs */
+		eotxq->q.size = CXGB4_EOHW_TXQ_DEFAULT_DESC_NUM;
+		ret = t4_sge_alloc_ethofld_txq(adap, eotxq, dev,
+					       eorxq->rspq.cntxt_id);
+		if (ret)
+			goto out_free_queues;
+
+		/* Allocate IRQs, set IRQ affinity, and start Rx */
+		if (adap->flags & CXGB4_USING_MSIX) {
+			ret = request_irq(eorxq->msix->vec, t4_sge_intr_msix, 0,
+					  eorxq->msix->desc, &eorxq->rspq);
+			if (ret)
+				goto out_free_msix;
+
+			cxgb4_set_msix_aff(adap, eorxq->msix->vec,
+					   &eorxq->msix->aff_mask, i);
+		}
+
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
+			cxgb4_enable_rx(adap, &eorxq->rspq);
+	}
+
+	refcount_inc(&adap->tc_mqprio->refcnt);
+	return 0;
+
+out_free_msix:
+	while (i-- > 0) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+
+		if (adap->flags & CXGB4_FULL_INIT_DONE)
+			cxgb4_quiesce_rx(&eorxq->rspq);
+
+		if (adap->flags & CXGB4_USING_MSIX) {
+			cxgb4_clear_msix_aff(eorxq->msix->vec,
+					     eorxq->msix->aff_mask);
+			free_irq(eorxq->msix->vec, &eorxq->rspq);
+		}
+	}
+
+out_free_queues:
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		if (eorxq->rspq.desc)
+			free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl);
+		if (eorxq->msix)
+			cxgb4_free_msix_idx_in_bmap(adap, eorxq->msix->idx);
+		t4_sge_free_ethofld_txq(adap, eotxq);
+	}
+
+	kfree(adap->sge.eohw_txq);
+	kfree(adap->sge.eohw_rxq);
+
+	return ret;
+}
+
+static void cxgb4_mqprio_free_hw_resources(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_ofld_rxq *eorxq;
+	struct sge_eohw_txq *eotxq;
+	u32 i;
+
+	/* Return if no ETHOFLD structures have been allocated yet */
+	if (!refcount_read(&adap->tc_mqprio->refcnt))
+		return;
+
+	/* Return if no hardware queues have been allocated */
+	if (!adap->sge.eohw_rxq[pi->first_qset].rspq.desc)
+		return;
+
+	for (i = 0; i < pi->nqsets; i++) {
+		eorxq = &adap->sge.eohw_rxq[pi->first_qset + i];
+		eotxq = &adap->sge.eohw_txq[pi->first_qset + i];
+
+		/* Device removal path will already disable NAPI
+		 * before unregistering netdevice. So, only disable
+		 * NAPI if we're not in device removal path
+		 */
+		if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+			cxgb4_quiesce_rx(&eorxq->rspq);
+
+		if (adap->flags & CXGB4_USING_MSIX) {
+			cxgb4_clear_msix_aff(eorxq->msix->vec,
+					     eorxq->msix->aff_mask);
+			free_irq(eorxq->msix->vec, &eorxq->rspq);
+		}
+
+		free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl);
+		t4_sge_free_ethofld_txq(adap, eotxq);
+	}
+
+	/* Free up ETHOFLD structures if there are no users */
+	if (refcount_dec_and_test(&adap->tc_mqprio->refcnt)) {
+		kfree(adap->sge.eohw_txq);
+		kfree(adap->sge.eohw_rxq);
+	}
+}
+
+static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
+				 struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct ch_sched_params p = {
+		.type = SCHED_CLASS_TYPE_PACKET,
+		.u.params.level = SCHED_CLASS_LEVEL_CL_RL,
+		.u.params.mode = SCHED_CLASS_MODE_FLOW,
+		.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS,
+		.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS,
+		.u.params.class = SCHED_CLS_NONE,
+		.u.params.weight = 0,
+		.u.params.pktsize = dev->mtu,
+	};
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sched_class *e;
+	int ret;
+	u8 i;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	p.u.params.channel = pi->tx_chan;
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		/* Convert from bytes per second to Kbps */
+		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
+		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
+
+		e = cxgb4_sched_class_alloc(dev, &p);
+		if (!e) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		tc_port_mqprio->tc_hwtc_map[i] = e->idx;
+	}
+
+	return 0;
+
+out_err:
+	while (i--)
+		cxgb4_sched_class_free(dev, tc_port_mqprio->tc_hwtc_map[i]);
+
+	return ret;
+}
+
+static void cxgb4_mqprio_free_tc(struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u8 i;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	for (i = 0; i < tc_port_mqprio->mqprio.qopt.num_tc; i++)
+		cxgb4_sched_class_free(dev, tc_port_mqprio->tc_hwtc_map[i]);
+}
+
+static int cxgb4_mqprio_class_bind(struct net_device *dev,
+				   struct sge_eosw_txq *eosw_txq,
+				   u8 tc)
+{
+	struct ch_sched_flowc fe;
+	int ret;
+
+	init_completion(&eosw_txq->completion);
+
+	fe.tid = eosw_txq->eotid;
+	fe.class = tc;
+
+	ret = cxgb4_sched_class_bind(dev, &fe, SCHED_FLOWC);
+	if (ret)
+		return ret;
+
+	ret = wait_for_completion_timeout(&eosw_txq->completion,
+					  CXGB4_FLOWC_WAIT_TIMEOUT);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void cxgb4_mqprio_class_unbind(struct net_device *dev,
+				      struct sge_eosw_txq *eosw_txq,
+				      u8 tc)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_sched_flowc fe;
+
+	/* If we're shutting down, interrupts are disabled and no completions
+	 * come back. So, skip waiting for completions in this scenario.
+	 */
+	if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+		init_completion(&eosw_txq->completion);
+
+	fe.tid = eosw_txq->eotid;
+	fe.class = tc;
+	cxgb4_sched_class_unbind(dev, &fe, SCHED_FLOWC);
+
+	if (!(adap->flags & CXGB4_SHUTTING_DOWN))
+		wait_for_completion_timeout(&eosw_txq->completion,
+					    CXGB4_FLOWC_WAIT_TIMEOUT);
+}
+
+static int cxgb4_mqprio_enable_offload(struct net_device *dev,
+				       struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	u32 qoffset, qcount, tot_qcount, qid, hwqid;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	int eotid, ret;
+	u16 i, j;
+	u8 hwtc;
+
+	ret = cxgb4_mqprio_alloc_hw_resources(dev);
+	if (ret)
+		return -ENOMEM;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qoffset = mqprio->qopt.offset[i];
+		qcount = mqprio->qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eotid = cxgb4_get_free_eotid(&adap->tids);
+			if (eotid < 0) {
+				ret = -ENOMEM;
+				goto out_free_eotids;
+			}
+
+			qid = qoffset + j;
+			hwqid = pi->first_qset + (eotid % pi->nqsets);
+			eosw_txq = &tc_port_mqprio->eosw_txq[qid];
+			ret = cxgb4_init_eosw_txq(dev, eosw_txq,
+						  eotid, hwqid);
+			if (ret)
+				goto out_free_eotids;
+
+			cxgb4_alloc_eotid(&adap->tids, eotid, eosw_txq);
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			ret = cxgb4_mqprio_class_bind(dev, eosw_txq, hwtc);
+			if (ret)
+				goto out_free_eotids;
+		}
+	}
+
+	memcpy(&tc_port_mqprio->mqprio, mqprio,
+	       sizeof(struct tc_mqprio_qopt_offload));
+
+	/* Inform the stack about the configured tc params.
+	 *
+	 * Set the correct queue map. If no queue count has been
+	 * specified, then send the traffic through default NIC
+	 * queues; instead of ETHOFLD queues.
+	 */
+	ret = netdev_set_num_tc(dev, mqprio->qopt.num_tc);
+	if (ret)
+		goto out_free_eotids;
+
+	tot_qcount = pi->nqsets;
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		qcount = mqprio->qopt.count[i];
+		if (qcount) {
+			qoffset = mqprio->qopt.offset[i] + pi->nqsets;
+		} else {
+			qcount = pi->nqsets;
+			qoffset = 0;
+		}
+
+		ret = netdev_set_tc_queue(dev, i, qcount, qoffset);
+		if (ret)
+			goto out_reset_tc;
+
+		tot_qcount += mqprio->qopt.count[i];
+	}
+
+	ret = netif_set_real_num_tx_queues(dev, tot_qcount);
+	if (ret)
+		goto out_reset_tc;
+
+	tc_port_mqprio->state = CXGB4_MQPRIO_STATE_ACTIVE;
+	return 0;
+
+out_reset_tc:
+	netdev_reset_tc(dev);
+	i = mqprio->qopt.num_tc;
+
+out_free_eotids:
+	while (i-- > 0) {
+		qoffset = mqprio->qopt.offset[i];
+		qcount = mqprio->qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eosw_txq = &tc_port_mqprio->eosw_txq[qoffset + j];
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			cxgb4_mqprio_class_unbind(dev, eosw_txq, hwtc);
+
+			cxgb4_free_eotid(&adap->tids, eosw_txq->eotid);
+			cxgb4_free_eosw_txq(dev, eosw_txq);
+		}
+	}
+
+	cxgb4_mqprio_free_hw_resources(dev);
+	return ret;
+}
+
+static void cxgb4_mqprio_disable_offload(struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	u32 qoffset, qcount;
+	u16 i, j;
+	u8 hwtc;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	if (tc_port_mqprio->state != CXGB4_MQPRIO_STATE_ACTIVE)
+		return;
+
+	netdev_reset_tc(dev);
+	netif_set_real_num_tx_queues(dev, pi->nqsets);
+
+	for (i = 0; i < tc_port_mqprio->mqprio.qopt.num_tc; i++) {
+		qoffset = tc_port_mqprio->mqprio.qopt.offset[i];
+		qcount = tc_port_mqprio->mqprio.qopt.count[i];
+		for (j = 0; j < qcount; j++) {
+			eosw_txq = &tc_port_mqprio->eosw_txq[qoffset + j];
+
+			hwtc = tc_port_mqprio->tc_hwtc_map[i];
+			cxgb4_mqprio_class_unbind(dev, eosw_txq, hwtc);
+
+			cxgb4_free_eotid(&adap->tids, eosw_txq->eotid);
+			cxgb4_free_eosw_txq(dev, eosw_txq);
+		}
+	}
+
+	cxgb4_mqprio_free_hw_resources(dev);
+
+	/* Free up the traffic classes */
+	cxgb4_mqprio_free_tc(dev);
+
+	memset(&tc_port_mqprio->mqprio, 0,
+	       sizeof(struct tc_mqprio_qopt_offload));
+
+	tc_port_mqprio->state = CXGB4_MQPRIO_STATE_DISABLED;
+}
+
+int cxgb4_setup_tc_mqprio(struct net_device *dev,
+			  struct tc_mqprio_qopt_offload *mqprio)
+{
+	bool needs_bring_up = false;
+	int ret;
+
+	ret = cxgb4_mqprio_validate(dev, mqprio);
+	if (ret)
+		return ret;
+
+	/* To configure tc params, the current allocated EOTIDs must
+	 * be freed up. However, they can't be freed up if there's
+	 * traffic running on the interface. So, ensure interface is
+	 * down before configuring tc params.
+	 */
+	if (netif_running(dev)) {
+		cxgb_close(dev);
+		needs_bring_up = true;
+	}
+
+	cxgb4_mqprio_disable_offload(dev);
+
+	/* If requested for clear, then just return since resources are
+	 * already freed up by now.
+	 */
+	if (!mqprio->qopt.num_tc)
+		goto out;
+
+	/* Allocate free available traffic classes and configure
+	 * their rate parameters.
+	 */
+	ret = cxgb4_mqprio_alloc_tc(dev, mqprio);
+	if (ret)
+		goto out;
+
+	ret = cxgb4_mqprio_enable_offload(dev, mqprio);
+	if (ret) {
+		cxgb4_mqprio_free_tc(dev);
+		goto out;
+	}
+
+out:
+	if (needs_bring_up)
+		cxgb_open(dev);
+
+	return ret;
+}
+
+int cxgb4_init_tc_mqprio(struct adapter *adap)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio, *port_mqprio;
+	struct cxgb4_tc_mqprio *tc_mqprio;
+	struct sge_eosw_txq *eosw_txq;
+	int ret = 0;
+	u8 i;
+
+	tc_mqprio = kzalloc(sizeof(*tc_mqprio), GFP_KERNEL);
+	if (!tc_mqprio)
+		return -ENOMEM;
+
+	tc_port_mqprio = kcalloc(adap->params.nports, sizeof(*tc_port_mqprio),
+				 GFP_KERNEL);
+	if (!tc_port_mqprio) {
+		ret = -ENOMEM;
+		goto out_free_mqprio;
+	}
+
+	tc_mqprio->port_mqprio = tc_port_mqprio;
+	for (i = 0; i < adap->params.nports; i++) {
+		port_mqprio = &tc_mqprio->port_mqprio[i];
+		eosw_txq = kcalloc(adap->tids.neotids, sizeof(*eosw_txq),
+				   GFP_KERNEL);
+		if (!eosw_txq) {
+			ret = -ENOMEM;
+			goto out_free_ports;
+		}
+		port_mqprio->eosw_txq = eosw_txq;
+	}
+
+	adap->tc_mqprio = tc_mqprio;
+	refcount_set(&adap->tc_mqprio->refcnt, 0);
+	return 0;
+
+out_free_ports:
+	for (i = 0; i < adap->params.nports; i++) {
+		port_mqprio = &tc_mqprio->port_mqprio[i];
+		kfree(port_mqprio->eosw_txq);
+	}
+	kfree(tc_port_mqprio);
+
+out_free_mqprio:
+	kfree(tc_mqprio);
+	return ret;
+}
+
+void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
+{
+	struct cxgb4_tc_port_mqprio *port_mqprio;
+	u8 i;
+
+	if (adap->tc_mqprio) {
+		if (adap->tc_mqprio->port_mqprio) {
+			for (i = 0; i < adap->params.nports; i++) {
+				struct net_device *dev = adap->port[i];
+
+				if (dev)
+					cxgb4_mqprio_disable_offload(dev);
+				port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+				kfree(port_mqprio->eosw_txq);
+			}
+			kfree(adap->tc_mqprio->port_mqprio);
+		}
+		kfree(adap->tc_mqprio);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
new file mode 100644
index 000000000000..c532f1ef8451
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2019 Chelsio Communications.  All rights reserved. */
+
+#ifndef __CXGB4_TC_MQPRIO_H__
+#define __CXGB4_TC_MQPRIO_H__
+
+#include <net/pkt_cls.h>
+
+#define CXGB4_EOSW_TXQ_DEFAULT_DESC_NUM 128
+
+#define CXGB4_EOHW_TXQ_DEFAULT_DESC_NUM 1024
+
+#define CXGB4_EOHW_RXQ_DEFAULT_DESC_NUM 1024
+#define CXGB4_EOHW_RXQ_DEFAULT_DESC_SIZE 64
+#define CXGB4_EOHW_RXQ_DEFAULT_INTR_USEC 5
+#define CXGB4_EOHW_RXQ_DEFAULT_PKT_CNT 8
+
+#define CXGB4_EOHW_FLQ_DEFAULT_DESC_NUM 72
+
+#define CXGB4_FLOWC_WAIT_TIMEOUT (5 * HZ)
+
+enum cxgb4_mqprio_state {
+	CXGB4_MQPRIO_STATE_DISABLED = 0,
+	CXGB4_MQPRIO_STATE_ACTIVE,
+};
+
+struct cxgb4_tc_port_mqprio {
+	enum cxgb4_mqprio_state state; /* Current MQPRIO offload state */
+	struct tc_mqprio_qopt_offload mqprio; /* MQPRIO offload params */
+	struct sge_eosw_txq *eosw_txq; /* Netdev SW Tx queue array */
+	u8 tc_hwtc_map[TC_QOPT_MAX_QUEUE]; /* MQPRIO tc to hardware tc map */
+};
+
+struct cxgb4_tc_mqprio {
+	refcount_t refcnt; /* Refcount for adapter-wide resources */
+	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
+};
+
+int cxgb4_setup_tc_mqprio(struct net_device *dev,
+			  struct tc_mqprio_qopt_offload *mqprio);
+int cxgb4_init_tc_mqprio(struct adapter *adap);
+void cxgb4_cleanup_tc_mqprio(struct adapter *adap);
+#endif /* __CXGB4_TC_MQPRIO_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 02fc63fa7f25..133f8623ba86 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -36,6 +36,7 @@
 #include <net/tc_act/tc_mirred.h>
 
 #include "cxgb4.h"
+#include "cxgb4_filter.h"
 #include "cxgb4_tc_u32_parse.h"
 #include "cxgb4_tc_u32.h"
 
@@ -148,6 +149,7 @@ static int fill_action_fields(struct adapter *adap,
 int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	const struct cxgb4_match_field *start, *link_start = NULL;
+	struct netlink_ext_ack *extack = cls->common.extack;
 	struct adapter *adapter = netdev2adap(dev);
 	__be16 protocol = cls->common.protocol;
 	struct ch_filter_specification fs;
@@ -164,14 +166,21 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6))
 		return -EOPNOTSUPP;
 
-	/* Fetch the location to insert the filter. */
-	filter_id = cls->knode.handle & 0xFFFFF;
+	/* Note that TC uses prio 0 to indicate stack to generate
+	 * automatic prio and hence doesn't pass prio 0 to driver.
+	 * However, the hardware TCAM index starts from 0. Hence, the
+	 * -1 here.
+	 */
+	filter_id = TC_U32_NODE(cls->knode.handle) - 1;
 
-	if (filter_id > adapter->tids.nftids) {
-		dev_err(adapter->pdev_dev,
-			"Location %d out of range for insertion. Max: %d\n",
-			filter_id, adapter->tids.nftids);
-		return -ERANGE;
+	/* Only insert U32 rule if its priority doesn't conflict with
+	 * existing rules in the LETCAM.
+	 */
+	if (filter_id >= adapter->tids.nftids ||
+	    !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "No free LETCAM index available");
+		return -ENOMEM;
 	}
 
 	t = adapter->tc_u32;
@@ -190,6 +199,9 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 
 	memset(&fs, 0, sizeof(fs));
 
+	fs.tc_prio = cls->common.prio;
+	fs.tc_cookie = cls->knode.handle;
+
 	if (protocol == htons(ETH_P_IPV6)) {
 		start = cxgb4_ipv6_fields;
 		is_ipv6 = true;
@@ -350,14 +362,10 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 		return -EOPNOTSUPP;
 
 	/* Fetch the location to delete the filter. */
-	filter_id = cls->knode.handle & 0xFFFFF;
-
-	if (filter_id > adapter->tids.nftids) {
-		dev_err(adapter->pdev_dev,
-			"Location %d out of range for deletion. Max: %d\n",
-			filter_id, adapter->tids.nftids);
+	filter_id = TC_U32_NODE(cls->knode.handle) - 1;
+	if (filter_id >= adapter->tids.nftids ||
+	    cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie)
 		return -ERANGE;
-	}
 
 	t = adapter->tc_u32;
 	handle = cls->knode.handle;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 86b528d8364c..cce33d279094 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -53,35 +53,6 @@
 
 #define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++)
 
-static int get_msix_idx_from_bmap(struct adapter *adap)
-{
-	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
-	unsigned long flags;
-	unsigned int msix_idx;
-
-	spin_lock_irqsave(&bmap->lock, flags);
-	msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
-	if (msix_idx < bmap->mapsize) {
-		__set_bit(msix_idx, bmap->msix_bmap);
-	} else {
-		spin_unlock_irqrestore(&bmap->lock, flags);
-		return -ENOSPC;
-	}
-
-	spin_unlock_irqrestore(&bmap->lock, flags);
-	return msix_idx;
-}
-
-static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx)
-{
-	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
-	unsigned long flags;
-
-	spin_lock_irqsave(&bmap->lock, flags);
-	__clear_bit(msix_idx, bmap->msix_bmap);
-	spin_unlock_irqrestore(&bmap->lock, flags);
-}
-
 /* Flush the aggregated lro sessions */
 static void uldrx_flush_handler(struct sge_rspq *q)
 {
@@ -138,9 +109,9 @@ static int alloc_uld_rxqs(struct adapter *adap,
 			  struct sge_uld_rxq_info *rxq_info, bool lro)
 {
 	unsigned int nq = rxq_info->nrxq + rxq_info->nciq;
-	int i, err, msi_idx, que_idx = 0, bmap_idx = 0;
 	struct sge_ofld_rxq *q = rxq_info->uldrxq;
 	unsigned short *ids = rxq_info->rspq_id;
+	int i, err, msi_idx, que_idx = 0;
 	struct sge *s = &adap->sge;
 	unsigned int per_chan;
 
@@ -159,12 +130,18 @@ static int alloc_uld_rxqs(struct adapter *adap,
 		}
 
 		if (msi_idx >= 0) {
-			bmap_idx = get_msix_idx_from_bmap(adap);
-			if (bmap_idx < 0) {
+			msi_idx = cxgb4_get_msix_idx_from_bmap(adap);
+			if (msi_idx < 0) {
 				err = -ENOSPC;
 				goto freeout;
 			}
-			msi_idx = adap->msix_info_ulds[bmap_idx].idx;
+
+			snprintf(adap->msix_info[msi_idx].desc,
+				 sizeof(adap->msix_info[msi_idx].desc),
+				 "%s-%s%d",
+				 adap->port[0]->name, rxq_info->name, i);
+
+			q->msix = &adap->msix_info[msi_idx];
 		}
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 				       adap->port[que_idx++ / per_chan],
@@ -175,8 +152,7 @@ static int alloc_uld_rxqs(struct adapter *adap,
 				       0);
 		if (err)
 			goto freeout;
-		if (msi_idx >= 0)
-			rxq_info->msix_tbl[i] = bmap_idx;
+
 		memset(&q->stats, 0, sizeof(q->stats));
 		if (ids)
 			ids[i] = q->rspq.abs_id;
@@ -188,6 +164,8 @@ freeout:
 		if (q->rspq.desc)
 			free_rspq_fl(adap, &q->rspq,
 				     q->fl.size ? &q->fl : NULL);
+		if (q->msix)
+			cxgb4_free_msix_idx_in_bmap(adap, q->msix->idx);
 	}
 	return err;
 }
@@ -198,14 +176,6 @@ setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int i, ret = 0;
 
-	if (adap->flags & CXGB4_USING_MSIX) {
-		rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
-					     sizeof(unsigned short),
-					     GFP_KERNEL);
-		if (!rxq_info->msix_tbl)
-			return -ENOMEM;
-	}
-
 	ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
 
 	/* Tell uP to route control queue completions to rdma rspq */
@@ -261,8 +231,6 @@ static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
 		t4_free_uld_rxqs(adap, rxq_info->nciq,
 				 rxq_info->uldrxq + rxq_info->nrxq);
 	t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq);
-	if (adap->flags & CXGB4_USING_MSIX)
-		kfree(rxq_info->msix_tbl);
 }
 
 static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
@@ -355,13 +323,12 @@ static int
 request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	struct uld_msix_info *minfo;
+	struct msix_info *minfo;
+	unsigned int idx;
 	int err = 0;
-	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
+		minfo = rxq_info->uldrxq[idx].msix;
 		err = request_irq(minfo->vec,
 				  t4_sge_intr_msix, 0,
 				  minfo->desc,
@@ -376,10 +343,9 @@ request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 
 unwind:
 	while (idx-- > 0) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
+		minfo = rxq_info->uldrxq[idx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
-		free_msix_idx_in_bmap(adap, bmap_idx);
+		cxgb4_free_msix_idx_in_bmap(adap, minfo->idx);
 		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 	return err;
@@ -389,69 +355,45 @@ static void
 free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	struct uld_msix_info *minfo;
-	unsigned int idx, bmap_idx;
+	struct msix_info *minfo;
+	unsigned int idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-		minfo = &adap->msix_info_ulds[bmap_idx];
-
+		minfo = rxq_info->uldrxq[idx].msix;
 		cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
-		free_msix_idx_in_bmap(adap, bmap_idx);
+		cxgb4_free_msix_idx_in_bmap(adap, minfo->idx);
 		free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq);
 	}
 }
 
-static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
+static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int n = sizeof(adap->msix_info_ulds[0].desc);
-	unsigned int idx, bmap_idx;
+	int idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		bmap_idx = rxq_info->msix_tbl[idx];
-
-		snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
-			 adap->port[0]->name, rxq_info->name, idx);
-	}
-}
-
-static void enable_rx(struct adapter *adap, struct sge_rspq *q)
-{
-	if (!q)
-		return;
+		struct sge_rspq *q = &rxq_info->uldrxq[idx].rspq;
 
-	if (q->handler)
-		napi_enable(&q->napi);
-
-	/* 0-increment GTS to start the timer and enable interrupts */
-	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
-		     SEINTARM_V(q->intr_params) |
-		     INGRESSQID_V(q->cntxt_id));
-}
+		if (!q)
+			continue;
 
-static void quiesce_rx(struct adapter *adap, struct sge_rspq *q)
-{
-	if (q && q->handler)
-		napi_disable(&q->napi);
+		cxgb4_enable_rx(adap, q);
+	}
 }
 
-static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
+static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int idx;
 
-	for_each_uldrxq(rxq_info, idx)
-		enable_rx(adap, &rxq_info->uldrxq[idx].rspq);
-}
+	for_each_uldrxq(rxq_info, idx) {
+		struct sge_rspq *q = &rxq_info->uldrxq[idx].rspq;
 
-static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
-{
-	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int idx;
+		if (!q)
+			continue;
 
-	for_each_uldrxq(rxq_info, idx)
-		quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
+		cxgb4_quiesce_rx(q);
+	}
 }
 
 static void
@@ -750,7 +692,6 @@ void cxgb4_register_uld(enum cxgb4_uld type,
 		if (ret)
 			goto free_queues;
 		if (adap->flags & CXGB4_USING_MSIX) {
-			name_msix_vecs_uld(adap, type);
 			ret = request_msix_queue_irqs_uld(adap, type);
 			if (ret)
 				goto free_rxq;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index cee582e36134..861b25d28ed6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -89,6 +89,10 @@ union aopen_entry {
 	union aopen_entry *next;
 };
 
+struct eotid_entry {
+	void *data;
+};
+
 /*
  * Holds the size, base address, free list start, etc of the TID, server TID,
  * and active-open TID tables.  The tables themselves are allocated dynamically.
@@ -126,6 +130,12 @@ struct tid_info {
 	unsigned int v6_stids_in_use;
 	unsigned int sftids_in_use;
 
+	/* ETHOFLD range */
+	struct eotid_entry *eotid_tab;
+	unsigned long *eotid_bmap;
+	unsigned int eotid_base;
+	unsigned int neotids;
+
 	/* TIDs in the TCAM */
 	atomic_t tids_in_use;
 	/* TIDs in the HASH */
@@ -176,6 +186,35 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data,
 	atomic_inc(&t->conns_in_use);
 }
 
+static inline struct eotid_entry *cxgb4_lookup_eotid(struct tid_info *t,
+						     u32 eotid)
+{
+	return eotid < t->neotids ? &t->eotid_tab[eotid] : NULL;
+}
+
+static inline int cxgb4_get_free_eotid(struct tid_info *t)
+{
+	int eotid;
+
+	eotid = find_first_zero_bit(t->eotid_bmap, t->neotids);
+	if (eotid >= t->neotids)
+		eotid = -1;
+
+	return eotid;
+}
+
+static inline void cxgb4_alloc_eotid(struct tid_info *t, u32 eotid, void *data)
+{
+	set_bit(eotid, t->eotid_bmap);
+	t->eotid_tab[eotid].data = data;
+}
+
+static inline void cxgb4_free_eotid(struct tid_info *t, u32 eotid)
+{
+	clear_bit(eotid, t->eotid_bmap);
+	t->eotid_tab[eotid].data = NULL;
+}
+
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data);
 int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 1a407d3c1d67..e9e45006632d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -351,15 +351,13 @@ exists:
 static void _t4_l2e_free(struct l2t_entry *e)
 {
 	struct l2t_data *d;
-	struct sk_buff *skb;
 
 	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
 		if (e->neigh) {
 			neigh_release(e->neigh);
 			e->neigh = NULL;
 		}
-		while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-			kfree_skb(skb);
+		__skb_queue_purge(&e->arpq);
 	}
 
 	d = container_of(e, struct l2t_data, l2tab[e->idx]);
@@ -370,7 +368,6 @@ static void _t4_l2e_free(struct l2t_entry *e)
 static void t4_l2e_free(struct l2t_entry *e)
 {
 	struct l2t_data *d;
-	struct sk_buff *skb;
 
 	spin_lock_bh(&e->lock);
 	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
@@ -378,8 +375,7 @@ static void t4_l2e_free(struct l2t_entry *e)
 			neigh_release(e->neigh);
 			e->neigh = NULL;
 		}
-		while ((skb = __skb_dequeue(&e->arpq)) != NULL)
-			kfree_skb(skb);
+		__skb_queue_purge(&e->arpq);
 	}
 	spin_unlock_bh(&e->lock);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index 60218dc676a8..3e61bd5d0c29 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -50,6 +50,7 @@ static int t4_sched_class_fw_cmd(struct port_info *pi,
 	e = &s->tab[p->u.params.class];
 	switch (op) {
 	case SCHED_FW_OP_ADD:
+	case SCHED_FW_OP_DEL:
 		err = t4_sched_params(adap, p->type,
 				      p->u.params.level, p->u.params.mode,
 				      p->u.params.rateunit,
@@ -92,45 +93,69 @@ static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg,
 
 		pf = adap->pf;
 		vf = 0;
+
+		err = t4_set_params(adap, adap->mbox, pf, vf, 1,
+				    &fw_param, &fw_class);
+		break;
+	}
+	case SCHED_FLOWC: {
+		struct sched_flowc_entry *fe;
+
+		fe = (struct sched_flowc_entry *)arg;
+
+		fw_class = bind ? fe->param.class : FW_SCHED_CLS_NONE;
+		err = cxgb4_ethofld_send_flowc(adap->port[pi->port_id],
+					       fe->param.tid, fw_class);
 		break;
 	}
 	default:
 		err = -ENOTSUPP;
-		goto out;
+		break;
 	}
 
-	err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class);
-
-out:
 	return err;
 }
 
-static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
-						 const unsigned int qid,
-						 int *index)
+static void *t4_sched_entry_lookup(struct port_info *pi,
+				   enum sched_bind_type type,
+				   const u32 val)
 {
 	struct sched_table *s = pi->sched_tbl;
 	struct sched_class *e, *end;
-	struct sched_class *found = NULL;
-	int i;
+	void *found = NULL;
 
-	/* Look for a class with matching bound queue parameters */
+	/* Look for an entry with matching @val */
 	end = &s->tab[s->sched_size];
 	for (e = &s->tab[0]; e != end; ++e) {
-		struct sched_queue_entry *qe;
-
-		i = 0;
-		if (e->state == SCHED_STATE_UNUSED)
+		if (e->state == SCHED_STATE_UNUSED ||
+		    e->bind_type != type)
 			continue;
 
-		list_for_each_entry(qe, &e->queue_list, list) {
-			if (qe->cntxt_id == qid) {
-				found = e;
-				if (index)
-					*index = i;
-				break;
+		switch (type) {
+		case SCHED_QUEUE: {
+			struct sched_queue_entry *qe;
+
+			list_for_each_entry(qe, &e->entry_list, list) {
+				if (qe->cntxt_id == val) {
+					found = qe;
+					break;
+				}
+			}
+			break;
+		}
+		case SCHED_FLOWC: {
+			struct sched_flowc_entry *fe;
+
+			list_for_each_entry(fe, &e->entry_list, list) {
+				if (fe->param.tid == val) {
+					found = fe;
+					break;
+				}
 			}
-			i++;
+			break;
+		}
+		default:
+			return NULL;
 		}
 
 		if (found)
@@ -142,52 +167,41 @@ static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
 
 static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
 {
-	struct adapter *adap = pi->adapter;
-	struct sched_class *e;
 	struct sched_queue_entry *qe = NULL;
+	struct adapter *adap = pi->adapter;
 	struct sge_eth_txq *txq;
-	unsigned int qid;
-	int index = -1;
+	struct sched_class *e;
 	int err = 0;
 
 	if (p->queue < 0 || p->queue >= pi->nqsets)
 		return -ERANGE;
 
 	txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
-	qid = txq->q.cntxt_id;
 
-	/* Find the existing class that the queue is bound to */
-	e = t4_sched_queue_lookup(pi, qid, &index);
-	if (e && index >= 0) {
-		int i = 0;
-
-		list_for_each_entry(qe, &e->queue_list, list) {
-			if (i == index)
-				break;
-			i++;
-		}
+	/* Find the existing entry that the queue is bound to */
+	qe = t4_sched_entry_lookup(pi, SCHED_QUEUE, txq->q.cntxt_id);
+	if (qe) {
 		err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE,
 					      false);
 		if (err)
 			return err;
 
+		e = &pi->sched_tbl->tab[qe->param.class];
 		list_del(&qe->list);
 		kvfree(qe);
-		if (atomic_dec_and_test(&e->refcnt)) {
-			e->state = SCHED_STATE_UNUSED;
-			memset(&e->info, 0, sizeof(e->info));
-		}
+		if (atomic_dec_and_test(&e->refcnt))
+			cxgb4_sched_class_free(adap->port[pi->port_id], e->idx);
 	}
 	return err;
 }
 
 static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
 {
-	struct adapter *adap = pi->adapter;
 	struct sched_table *s = pi->sched_tbl;
-	struct sched_class *e;
 	struct sched_queue_entry *qe = NULL;
+	struct adapter *adap = pi->adapter;
 	struct sge_eth_txq *txq;
+	struct sched_class *e;
 	unsigned int qid;
 	int err = 0;
 
@@ -215,7 +229,8 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
 	if (err)
 		goto out_err;
 
-	list_add_tail(&qe->list, &e->queue_list);
+	list_add_tail(&qe->list, &e->entry_list);
+	e->bind_type = SCHED_QUEUE;
 	atomic_inc(&e->refcnt);
 	return err;
 
@@ -224,6 +239,71 @@ out_err:
 	return err;
 }
 
+static int t4_sched_flowc_unbind(struct port_info *pi, struct ch_sched_flowc *p)
+{
+	struct sched_flowc_entry *fe = NULL;
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	int err = 0;
+
+	if (p->tid < 0 || p->tid >= adap->tids.neotids)
+		return -ERANGE;
+
+	/* Find the existing entry that the flowc is bound to */
+	fe = t4_sched_entry_lookup(pi, SCHED_FLOWC, p->tid);
+	if (fe) {
+		err = t4_sched_bind_unbind_op(pi, (void *)fe, SCHED_FLOWC,
+					      false);
+		if (err)
+			return err;
+
+		e = &pi->sched_tbl->tab[fe->param.class];
+		list_del(&fe->list);
+		kvfree(fe);
+		if (atomic_dec_and_test(&e->refcnt))
+			cxgb4_sched_class_free(adap->port[pi->port_id], e->idx);
+	}
+	return err;
+}
+
+static int t4_sched_flowc_bind(struct port_info *pi, struct ch_sched_flowc *p)
+{
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_flowc_entry *fe = NULL;
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	int err = 0;
+
+	if (p->tid < 0 || p->tid >= adap->tids.neotids)
+		return -ERANGE;
+
+	fe = kvzalloc(sizeof(*fe), GFP_KERNEL);
+	if (!fe)
+		return -ENOMEM;
+
+	/* Unbind flowc from any existing class */
+	err = t4_sched_flowc_unbind(pi, p);
+	if (err)
+		goto out_err;
+
+	/* Bind flowc to specified class */
+	memcpy(&fe->param, p, sizeof(fe->param));
+
+	e = &s->tab[fe->param.class];
+	err = t4_sched_bind_unbind_op(pi, (void *)fe, SCHED_FLOWC, true);
+	if (err)
+		goto out_err;
+
+	list_add_tail(&fe->list, &e->entry_list);
+	e->bind_type = SCHED_FLOWC;
+	atomic_inc(&e->refcnt);
+	return err;
+
+out_err:
+	kvfree(fe);
+	return err;
+}
+
 static void t4_sched_class_unbind_all(struct port_info *pi,
 				      struct sched_class *e,
 				      enum sched_bind_type type)
@@ -235,10 +315,17 @@ static void t4_sched_class_unbind_all(struct port_info *pi,
 	case SCHED_QUEUE: {
 		struct sched_queue_entry *qe;
 
-		list_for_each_entry(qe, &e->queue_list, list)
+		list_for_each_entry(qe, &e->entry_list, list)
 			t4_sched_queue_unbind(pi, &qe->param);
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct sched_flowc_entry *fe;
+
+		list_for_each_entry(fe, &e->entry_list, list)
+			t4_sched_flowc_unbind(pi, &fe->param);
+		break;
+	}
 	default:
 		break;
 	}
@@ -262,6 +349,15 @@ static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg,
 			err = t4_sched_queue_unbind(pi, qe);
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		if (bind)
+			err = t4_sched_flowc_bind(pi, fe);
+		else
+			err = t4_sched_flowc_unbind(pi, fe);
+		break;
+	}
 	default:
 		err = -ENOTSUPP;
 		break;
@@ -299,6 +395,12 @@ int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
 		class_id = qe->class;
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		class_id = fe->class;
+		break;
+	}
 	default:
 		return -ENOTSUPP;
 	}
@@ -340,6 +442,12 @@ int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
 		class_id = qe->class;
 		break;
 	}
+	case SCHED_FLOWC: {
+		struct ch_sched_flowc *fe = (struct ch_sched_flowc *)arg;
+
+		class_id = fe->class;
+		break;
+	}
 	default:
 		return -ENOTSUPP;
 	}
@@ -355,8 +463,8 @@ static struct sched_class *t4_sched_class_lookup(struct port_info *pi,
 						const struct ch_sched_params *p)
 {
 	struct sched_table *s = pi->sched_tbl;
-	struct sched_class *e, *end;
 	struct sched_class *found = NULL;
+	struct sched_class *e, *end;
 
 	if (!p) {
 		/* Get any available unused class */
@@ -400,7 +508,7 @@ static struct sched_class *t4_sched_class_lookup(struct port_info *pi,
 static struct sched_class *t4_sched_class_alloc(struct port_info *pi,
 						struct ch_sched_params *p)
 {
-	struct sched_class *e;
+	struct sched_class *e = NULL;
 	u8 class_id;
 	int err;
 
@@ -415,10 +523,13 @@ static struct sched_class *t4_sched_class_alloc(struct port_info *pi,
 	if (class_id != SCHED_CLS_NONE)
 		return NULL;
 
-	/* See if there's an exisiting class with same
-	 * requested sched params
+	/* See if there's an exisiting class with same requested sched
+	 * params. Classes can only be shared among FLOWC types. For
+	 * other types, always request a new class.
 	 */
-	e = t4_sched_class_lookup(pi, p);
+	if (p->u.params.mode == SCHED_CLASS_MODE_FLOW)
+		e = t4_sched_class_lookup(pi, p);
+
 	if (!e) {
 		struct ch_sched_params np;
 
@@ -467,9 +578,57 @@ struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
 	return t4_sched_class_alloc(pi, p);
 }
 
-static void t4_sched_class_free(struct port_info *pi, struct sched_class *e)
+/**
+ * cxgb4_sched_class_free - free a scheduling class
+ * @dev: net_device pointer
+ * @e: scheduling class
+ *
+ * Frees a scheduling class if there are no users.
+ */
+void cxgb4_sched_class_free(struct net_device *dev, u8 classid)
 {
-	t4_sched_class_unbind_all(pi, e, SCHED_QUEUE);
+	struct port_info *pi = netdev2pinfo(dev);
+	struct sched_table *s = pi->sched_tbl;
+	struct ch_sched_params p;
+	struct sched_class *e;
+	u32 speed;
+	int ret;
+
+	e = &s->tab[classid];
+	if (!atomic_read(&e->refcnt) && e->state != SCHED_STATE_UNUSED) {
+		/* Port based rate limiting needs explicit reset back
+		 * to max rate. But, we'll do explicit reset for all
+		 * types, instead of just port based type, to be on
+		 * the safer side.
+		 */
+		memcpy(&p, &e->info, sizeof(p));
+		/* Always reset mode to 0. Otherwise, FLOWC mode will
+		 * still be enabled even after resetting the traffic
+		 * class.
+		 */
+		p.u.params.mode = 0;
+		p.u.params.minrate = 0;
+		p.u.params.pktsize = 0;
+
+		ret = t4_get_link_params(pi, NULL, &speed, NULL);
+		if (!ret)
+			p.u.params.maxrate = speed * 1000; /* Mbps to Kbps */
+		else
+			p.u.params.maxrate = SCHED_MAX_RATE_KBPS;
+
+		t4_sched_class_fw_cmd(pi, &p, SCHED_FW_OP_DEL);
+
+		e->state = SCHED_STATE_UNUSED;
+		memset(&e->info, 0, sizeof(e->info));
+	}
+}
+
+static void t4_sched_class_free(struct net_device *dev, struct sched_class *e)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+
+	t4_sched_class_unbind_all(pi, e, e->bind_type);
+	cxgb4_sched_class_free(dev, e->idx);
 }
 
 struct sched_table *t4_init_sched(unsigned int sched_size)
@@ -487,7 +646,7 @@ struct sched_table *t4_init_sched(unsigned int sched_size)
 		memset(&s->tab[i], 0, sizeof(struct sched_class));
 		s->tab[i].idx = i;
 		s->tab[i].state = SCHED_STATE_UNUSED;
-		INIT_LIST_HEAD(&s->tab[i].queue_list);
+		INIT_LIST_HEAD(&s->tab[i].entry_list);
 		atomic_set(&s->tab[i].refcnt, 0);
 	}
 	return s;
@@ -510,7 +669,7 @@ void t4_cleanup_sched(struct adapter *adap)
 
 			e = &s->tab[i];
 			if (e->state == SCHED_STATE_ACTIVE)
-				t4_sched_class_free(pi, e);
+				t4_sched_class_free(adap->port[j], e);
 		}
 		kvfree(s);
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 168fb4ce3759..e92ff68bdd0a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -52,10 +52,12 @@ enum {
 
 enum sched_fw_ops {
 	SCHED_FW_OP_ADD,
+	SCHED_FW_OP_DEL,
 };
 
 enum sched_bind_type {
 	SCHED_QUEUE,
+	SCHED_FLOWC,
 };
 
 struct sched_queue_entry {
@@ -64,11 +66,17 @@ struct sched_queue_entry {
 	struct ch_sched_queue param;
 };
 
+struct sched_flowc_entry {
+	struct list_head list;
+	struct ch_sched_flowc param;
+};
+
 struct sched_class {
 	u8 state;
 	u8 idx;
 	struct ch_sched_params info;
-	struct list_head queue_list;
+	enum sched_bind_type bind_type;
+	struct list_head entry_list;
 	atomic_t refcnt;
 };
 
@@ -102,6 +110,7 @@ int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
 
 struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
 					    struct ch_sched_params *p);
+void cxgb4_sched_class_free(struct net_device *dev, u8 classid);
 
 struct sched_table *t4_init_sched(unsigned int size);
 void t4_cleanup_sched(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 928bfea5457b..97cda501e7e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -55,6 +55,8 @@
 #include "t4fw_api.h"
 #include "cxgb4_ptp.h"
 #include "cxgb4_uld.h"
+#include "cxgb4_tc_mqprio.h"
+#include "sched.h"
 
 /*
  * Rx buffer size.  We use largish buffers if possible but settle for single
@@ -269,7 +271,6 @@ out_err:
 }
 EXPORT_SYMBOL(cxgb4_map_skb);
 
-#ifdef CONFIG_NEED_DMA_MAP_STATE
 static void unmap_skb(struct device *dev, const struct sk_buff *skb,
 		      const dma_addr_t *addr)
 {
@@ -284,6 +285,7 @@ static void unmap_skb(struct device *dev, const struct sk_buff *skb,
 		dma_unmap_page(dev, *addr++, skb_frag_size(fp), DMA_TO_DEVICE);
 }
 
+#ifdef CONFIG_NEED_DMA_MAP_STATE
 /**
  *	deferred_unmap_destructor - unmap a packet when it is freed
  *	@skb: the packet
@@ -298,65 +300,6 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
 }
 #endif
 
-static void unmap_sgl(struct device *dev, const struct sk_buff *skb,
-		      const struct ulptx_sgl *sgl, const struct sge_txq *q)
-{
-	const struct ulptx_sge_pair *p;
-	unsigned int nfrags = skb_shinfo(skb)->nr_frags;
-
-	if (likely(skb_headlen(skb)))
-		dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
-				 DMA_TO_DEVICE);
-	else {
-		dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
-			       DMA_TO_DEVICE);
-		nfrags--;
-	}
-
-	/*
-	 * the complexity below is because of the possibility of a wrap-around
-	 * in the middle of an SGL
-	 */
-	for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
-		if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) {
-unmap:			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
-				       ntohl(p->len[0]), DMA_TO_DEVICE);
-			dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
-				       ntohl(p->len[1]), DMA_TO_DEVICE);
-			p++;
-		} else if ((u8 *)p == (u8 *)q->stat) {
-			p = (const struct ulptx_sge_pair *)q->desc;
-			goto unmap;
-		} else if ((u8 *)p + 8 == (u8 *)q->stat) {
-			const __be64 *addr = (const __be64 *)q->desc;
-
-			dma_unmap_page(dev, be64_to_cpu(addr[0]),
-				       ntohl(p->len[0]), DMA_TO_DEVICE);
-			dma_unmap_page(dev, be64_to_cpu(addr[1]),
-				       ntohl(p->len[1]), DMA_TO_DEVICE);
-			p = (const struct ulptx_sge_pair *)&addr[2];
-		} else {
-			const __be64 *addr = (const __be64 *)q->desc;
-
-			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
-				       ntohl(p->len[0]), DMA_TO_DEVICE);
-			dma_unmap_page(dev, be64_to_cpu(addr[0]),
-				       ntohl(p->len[1]), DMA_TO_DEVICE);
-			p = (const struct ulptx_sge_pair *)&addr[1];
-		}
-	}
-	if (nfrags) {
-		__be64 addr;
-
-		if ((u8 *)p == (u8 *)q->stat)
-			p = (const struct ulptx_sge_pair *)q->desc;
-		addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] :
-						       *(const __be64 *)q->desc;
-		dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]),
-			       DMA_TO_DEVICE);
-	}
-}
-
 /**
  *	free_tx_desc - reclaims Tx descriptors and their buffers
  *	@adapter: the adapter
@@ -370,15 +313,16 @@ unmap:			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap)
 {
-	struct tx_sw_desc *d;
 	unsigned int cidx = q->cidx;
-	struct device *dev = adap->pdev_dev;
+	struct tx_sw_desc *d;
 
 	d = &q->sdesc[cidx];
 	while (n--) {
 		if (d->skb) {                       /* an SGL is present */
-			if (unmap)
-				unmap_sgl(dev, d->skb, d->sgl, q);
+			if (unmap && d->addr[0]) {
+				unmap_skb(adap->pdev_dev, d->skb, d->addr);
+				memset(d->addr, 0, sizeof(d->addr));
+			}
 			dev_consume_skb_any(d->skb);
 			d->skb = NULL;
 		}
@@ -790,6 +734,8 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver)
 	    chip_ver > CHELSIO_T5) {
 		hdrlen = sizeof(struct cpl_tx_tnl_lso);
 		hdrlen += sizeof(struct cpl_tx_pkt_core);
+	} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		return 0;
 	} else {
 		hdrlen = skb_shinfo(skb)->gso_size ?
 			 sizeof(struct cpl_tx_pkt_lso_core) : 0;
@@ -831,12 +777,20 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb,
 	 */
 	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
 	if (skb_shinfo(skb)->gso_size) {
-		if (skb->encapsulation && chip_ver > CHELSIO_T5)
+		if (skb->encapsulation && chip_ver > CHELSIO_T5) {
 			hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 				 sizeof(struct cpl_tx_tnl_lso);
-		else
+		} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+			u32 pkt_hdrlen;
+
+			pkt_hdrlen = eth_get_headlen(skb->dev, skb->data,
+						     skb_headlen(skb));
+			hdrlen = sizeof(struct fw_eth_tx_eo_wr) +
+				 round_up(pkt_hdrlen, 16);
+		} else {
 			hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 				 sizeof(struct cpl_tx_pkt_lso_core);
+		}
 
 		hdrlen += sizeof(struct cpl_tx_pkt_core);
 		flits += (hdrlen / sizeof(__be64));
@@ -1309,6 +1263,35 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 	tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len));
 }
 
+static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb,
+				 struct cpl_tx_pkt_lso_core *lso)
+{
+	int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
+	int l3hdr_len = skb_network_header_len(skb);
+	const struct skb_shared_info *ssi;
+	bool ipv6 = false;
+
+	ssi = skb_shinfo(skb);
+	if (ssi->gso_type & SKB_GSO_TCPV6)
+		ipv6 = true;
+
+	lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
+			      LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
+			      LSO_IPV6_V(ipv6) |
+			      LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
+			      LSO_IPHDR_LEN_V(l3hdr_len / 4) |
+			      LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
+	lso->ipid_ofst = htons(0);
+	lso->mss = htons(ssi->gso_size);
+	lso->seqno_offset = htonl(0);
+	if (is_t4(adap->params.chip))
+		lso->len = htonl(skb->len);
+	else
+		lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
+
+	return (void *)(lso + 1);
+}
+
 /**
  *	t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
  *	@adap: the adapter
@@ -1347,6 +1330,50 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
 	return reclaimed;
 }
 
+static inline int cxgb4_validate_skb(struct sk_buff *skb,
+				     struct net_device *dev,
+				     u32 min_pkt_len)
+{
+	u32 max_pkt_len;
+
+	/* The chip min packet length is 10 octets but some firmware
+	 * commands have a minimum packet length requirement. So, play
+	 * safe and reject anything shorter than @min_pkt_len.
+	 */
+	if (unlikely(skb->len < min_pkt_len))
+		return -EINVAL;
+
+	/* Discard the packet if the length is greater than mtu */
+	max_pkt_len = ETH_HLEN + dev->mtu;
+
+	if (skb_vlan_tagged(skb))
+		max_pkt_len += VLAN_HLEN;
+
+	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void *write_eo_udp_wr(struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+			     u32 hdr_len)
+{
+	wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG;
+	wr->u.udpseg.ethlen = skb_network_offset(skb);
+	wr->u.udpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
+	wr->u.udpseg.udplen = sizeof(struct udphdr);
+	wr->u.udpseg.rtplen = 0;
+	wr->u.udpseg.r4 = 0;
+	if (skb_shinfo(skb)->gso_size)
+		wr->u.udpseg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+	else
+		wr->u.udpseg.mss = cpu_to_be16(skb->len - hdr_len);
+	wr->u.udpseg.schedpktsize = wr->u.udpseg.mss;
+	wr->u.udpseg.plen = cpu_to_be32(skb->len - hdr_len);
+
+	return (void *)(wr + 1);
+}
+
 /**
  *	cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *	@skb: the packet
@@ -1356,41 +1383,25 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
  */
 static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	u32 wr_mid, ctrl0, op;
-	u64 cntrl, *end, *sgl;
-	int qidx, credits;
-	unsigned int flits, ndesc;
-	struct adapter *adap;
-	struct sge_eth_txq *q;
-	const struct port_info *pi;
+	enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
+	bool ptp_enabled = is_ptp_enabled(skb, dev);
+	unsigned int last_desc, flits, ndesc;
+	u32 wr_mid, ctrl0, op, sgl_off = 0;
+	const struct skb_shared_info *ssi;
+	int len, qidx, credits, ret, left;
+	struct tx_sw_desc *sgl_sdesc;
+	struct fw_eth_tx_eo_wr *eowr;
 	struct fw_eth_tx_pkt_wr *wr;
 	struct cpl_tx_pkt_core *cpl;
-	const struct skb_shared_info *ssi;
-	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	const struct port_info *pi;
 	bool immediate = false;
-	int len, max_pkt_len;
-	bool ptp_enabled = is_ptp_enabled(skb, dev);
+	u64 cntrl, *end, *sgl;
+	struct sge_eth_txq *q;
 	unsigned int chip_ver;
-	enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
-
-#ifdef CONFIG_CHELSIO_T4_FCOE
-	int err;
-#endif /* CONFIG_CHELSIO_T4_FCOE */
-
-	/*
-	 * The chip min packet length is 10 octets but play safe and reject
-	 * anything shorter than an Ethernet header.
-	 */
-	if (unlikely(skb->len < ETH_HLEN)) {
-out_free:	dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
+	struct adapter *adap;
 
-	/* Discard the packet if the length is greater than mtu */
-	max_pkt_len = ETH_HLEN + dev->mtu;
-	if (skb_vlan_tagged(skb))
-		max_pkt_len += VLAN_HLEN;
-	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+	ret = cxgb4_validate_skb(skb, dev, ETH_HLEN);
+	if (ret)
 		goto out_free;
 
 	pi = netdev_priv(dev);
@@ -1421,8 +1432,8 @@ out_free:	dev_kfree_skb_any(skb);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
-	err = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
-	if (unlikely(err == -ENOTSUPP)) {
+	ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
+	if (unlikely(ret == -ENOTSUPP)) {
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
 		goto out_free;
@@ -1450,8 +1461,14 @@ out_free:	dev_kfree_skb_any(skb);
 	if (skb->encapsulation && chip_ver > CHELSIO_T5)
 		tnl_type = cxgb_encap_offload_supported(skb);
 
+	last_desc = q->q.pidx + ndesc - 1;
+	if (last_desc >= q->q.size)
+		last_desc -= q->q.size;
+	sgl_sdesc = &q->q.sdesc[last_desc];
+
 	if (!immediate &&
-	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
 		q->mapping_err++;
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
@@ -1482,17 +1499,18 @@ out_free:	dev_kfree_skb_any(skb);
 	}
 
 	wr = (void *)&q->q.desc[q->q.pidx];
+	eowr = (void *)&q->q.desc[q->q.pidx];
 	wr->equiq_to_len16 = htonl(wr_mid);
 	wr->r3 = cpu_to_be64(0);
-	end = (u64 *)wr + flits;
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+		end = (u64 *)eowr + flits;
+	else
+		end = (u64 *)wr + flits;
 
 	len = immediate ? skb->len : 0;
 	len += sizeof(*cpl);
-	if (ssi->gso_size) {
+	if (ssi->gso_size && !(ssi->gso_type & SKB_GSO_UDP_L4)) {
 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
-		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
-		int l3hdr_len = skb_network_header_len(skb);
-		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
 		struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1);
 
 		if (tnl_type)
@@ -1519,46 +1537,33 @@ out_free:	dev_kfree_skb_any(skb);
 			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				cntrl = hwcsum(adap->params.chip, skb);
 		} else {
-			lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
-					LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
-					LSO_IPV6_V(v6) |
-					LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
-					LSO_IPHDR_LEN_V(l3hdr_len / 4) |
-					LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
-			lso->ipid_ofst = htons(0);
-			lso->mss = htons(ssi->gso_size);
-			lso->seqno_offset = htonl(0);
-			if (is_t4(adap->params.chip))
-				lso->len = htonl(skb->len);
-			else
-				lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
-			cpl = (void *)(lso + 1);
-
-			if (CHELSIO_CHIP_VERSION(adap->params.chip)
-			    <= CHELSIO_T5)
-				cntrl =	TXPKT_ETHHDR_LEN_V(eth_xtra_len);
-			else
-				cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
-
-			cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
-				 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
-				 TXPKT_IPHDR_LEN_V(l3hdr_len);
+			cpl = write_tso_wr(adap, skb, lso);
+			cntrl = hwcsum(adap->params.chip, skb);
 		}
 		sgl = (u64 *)(cpl + 1); /* sgl start here */
-		if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
-			/* If current position is already at the end of the
-			 * txq, reset the current to point to start of the queue
-			 * and update the end ptr as well.
-			 */
-			if (sgl == (u64 *)q->q.stat) {
-				int left = (u8 *)end - (u8 *)q->q.stat;
-
-				end = (void *)q->q.desc + left;
-				sgl = (void *)q->q.desc;
-			}
-		}
 		q->tso++;
 		q->tx_cso += ssi->gso_segs;
+	} else if (ssi->gso_size) {
+		u64 *start;
+		u32 hdrlen;
+
+		hdrlen = eth_get_headlen(dev, skb->data, skb_headlen(skb));
+		len += hdrlen;
+		wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) |
+					     FW_ETH_TX_EO_WR_IMMDLEN_V(len));
+		cpl = write_eo_udp_wr(skb, eowr, hdrlen);
+		cntrl = hwcsum(adap->params.chip, skb);
+
+		start = (u64 *)(cpl + 1);
+		sgl = (u64 *)inline_tx_skb_header(skb, &q->q, (void *)start,
+						  hdrlen);
+		if (unlikely(start > sgl)) {
+			left = (u8 *)end - (u8 *)q->q.stat;
+			end = (void *)q->q.desc + left;
+		}
+		sgl_off = hdrlen;
+		q->uso++;
+		q->tx_cso += ssi->gso_segs;
 	} else {
 		if (ptp_enabled)
 			op = FW_PTP_TX_PKT_WR;
@@ -1575,6 +1580,16 @@ out_free:	dev_kfree_skb_any(skb);
 		}
 	}
 
+	if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
+		/* If current position is already at the end of the
+		 * txq, reset the current to point to start of the queue
+		 * and update the end ptr as well.
+		 */
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		sgl = (void *)q->q.desc;
+	}
+
 	if (skb_vlan_tag_present(skb)) {
 		q->vlan_ins++;
 		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
@@ -1604,16 +1619,10 @@ out_free:	dev_kfree_skb_any(skb);
 		cxgb4_inline_tx_skb(skb, &q->q, sgl);
 		dev_consume_skb_any(skb);
 	} else {
-		int last_desc;
-
-		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr);
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, sgl_off,
+				sgl_sdesc->addr);
 		skb_orphan(skb);
-
-		last_desc = q->q.pidx + ndesc - 1;
-		if (last_desc >= q->q.size)
-			last_desc -= q->q.size;
-		q->q.sdesc[last_desc].skb = skb;
-		q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
+		sgl_sdesc->skb = skb;
 	}
 
 	txq_advance(&q->q, ndesc);
@@ -1622,6 +1631,10 @@ out_free:	dev_kfree_skb_any(skb);
 	if (ptp_enabled)
 		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
+
+out_free:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
 }
 
 /* Constants ... */
@@ -1707,35 +1720,28 @@ static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb)
 static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
-	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	unsigned int last_desc, flits, ndesc;
 	const struct skb_shared_info *ssi;
 	struct fw_eth_tx_pkt_vm_wr *wr;
-	int qidx, credits, max_pkt_len;
+	struct tx_sw_desc *sgl_sdesc;
 	struct cpl_tx_pkt_core *cpl;
 	const struct port_info *pi;
-	unsigned int flits, ndesc;
 	struct sge_eth_txq *txq;
 	struct adapter *adapter;
+	int qidx, credits, ret;
+	size_t fw_hdr_copy_len;
 	u64 cntrl, *end;
 	u32 wr_mid;
-	const size_t fw_hdr_copy_len = sizeof(wr->ethmacdst) +
-				       sizeof(wr->ethmacsrc) +
-				       sizeof(wr->ethtype) +
-				       sizeof(wr->vlantci);
 
 	/* The chip minimum packet length is 10 octets but the firmware
 	 * command that we are using requires that we copy the Ethernet header
 	 * (including the VLAN tag) into the header so we reject anything
 	 * smaller than that ...
 	 */
-	if (unlikely(skb->len < fw_hdr_copy_len))
-		goto out_free;
-
-	/* Discard the packet if the length is greater than mtu */
-	max_pkt_len = ETH_HLEN + dev->mtu;
-	if (skb_vlan_tag_present(skb))
-		max_pkt_len += VLAN_HLEN;
-	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+	fw_hdr_copy_len = sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
+			  sizeof(wr->ethtype) + sizeof(wr->vlantci);
+	ret = cxgb4_validate_skb(skb, dev, fw_hdr_copy_len);
+	if (ret)
 		goto out_free;
 
 	/* Figure out which TX Queue we're going to use. */
@@ -1771,12 +1777,19 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
+	last_desc = txq->q.pidx + ndesc - 1;
+	if (last_desc >= txq->q.size)
+		last_desc -= txq->q.size;
+	sgl_sdesc = &txq->q.sdesc[last_desc];
+
 	if (!t4vf_is_eth_imm(skb) &&
-	    unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adapter->pdev_dev, skb,
+				   sgl_sdesc->addr) < 0)) {
 		/* We need to map the skb into PCI DMA space (because it can't
 		 * be in-lined directly into the Work Request) and the mapping
 		 * operation failed.  Record the error and drop the packet.
 		 */
+		memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
 		txq->mapping_err++;
 		goto out_free;
 	}
@@ -1951,7 +1964,6 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 		 */
 		struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
 		struct sge_txq *tq = &txq->q;
-		int last_desc;
 
 		/* If the Work Request header was an exact multiple of our TX
 		 * Descriptor length, then it's possible that the starting SGL
@@ -1965,14 +1977,9 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
 				       ((void *)end - (void *)tq->stat));
 		}
 
-		cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
+		cxgb4_write_sgl(skb, tq, sgl, end, 0, sgl_sdesc->addr);
 		skb_orphan(skb);
-
-		last_desc = tq->pidx + ndesc - 1;
-		if (last_desc >= tq->size)
-			last_desc -= tq->size;
-		tq->sdesc[last_desc].skb = skb;
-		tq->sdesc[last_desc].sgl = sgl;
+		sgl_sdesc->skb = skb;
 	}
 
 	/* Advance our internal TX Queue state, tell the hardware about
@@ -1991,34 +1998,473 @@ out_free:
 	return NETDEV_TX_OK;
 }
 
+/**
+ * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
+ * @q: the SGE control Tx queue
+ *
+ * This is a variant of cxgb4_reclaim_completed_tx() that is used
+ * for Tx queues that send only immediate data (presently just
+ * the control queues) and	thus do not have any sk_buffs to release.
+ */
+static inline void reclaim_completed_tx_imm(struct sge_txq *q)
+{
+	int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
+	int reclaim = hw_cidx - q->cidx;
+
+	if (reclaim < 0)
+		reclaim += q->size;
+
+	q->in_use -= reclaim;
+	q->cidx = hw_cidx;
+}
+
+static inline void eosw_txq_advance_index(u32 *idx, u32 n, u32 max)
+{
+	u32 val = *idx + n;
+
+	if (val >= max)
+		val -= max;
+
+	*idx = val;
+}
+
+void cxgb4_eosw_txq_free_desc(struct adapter *adap,
+			      struct sge_eosw_txq *eosw_txq, u32 ndesc)
+{
+	struct tx_sw_desc *d;
+
+	d = &eosw_txq->desc[eosw_txq->last_cidx];
+	while (ndesc--) {
+		if (d->skb) {
+			if (d->addr[0]) {
+				unmap_skb(adap->pdev_dev, d->skb, d->addr);
+				memset(d->addr, 0, sizeof(d->addr));
+			}
+			dev_consume_skb_any(d->skb);
+			d->skb = NULL;
+		}
+		eosw_txq_advance_index(&eosw_txq->last_cidx, 1,
+				       eosw_txq->ndesc);
+		d = &eosw_txq->desc[eosw_txq->last_cidx];
+	}
+}
+
+static inline void eosw_txq_advance(struct sge_eosw_txq *eosw_txq, u32 n)
+{
+	eosw_txq_advance_index(&eosw_txq->pidx, n, eosw_txq->ndesc);
+	eosw_txq->inuse += n;
+}
+
+static inline int eosw_txq_enqueue(struct sge_eosw_txq *eosw_txq,
+				   struct sk_buff *skb)
+{
+	if (eosw_txq->inuse == eosw_txq->ndesc)
+		return -ENOMEM;
+
+	eosw_txq->desc[eosw_txq->pidx].skb = skb;
+	return 0;
+}
+
+static inline struct sk_buff *eosw_txq_peek(struct sge_eosw_txq *eosw_txq)
+{
+	return eosw_txq->desc[eosw_txq->last_pidx].skb;
+}
+
+static inline u8 ethofld_calc_tx_flits(struct adapter *adap,
+				       struct sk_buff *skb, u32 hdr_len)
+{
+	u8 flits, nsgl = 0;
+	u32 wrlen;
+
+	wrlen = sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core);
+	if (skb_shinfo(skb)->gso_size &&
+	    !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4))
+		wrlen += sizeof(struct cpl_tx_pkt_lso_core);
+
+	wrlen += roundup(hdr_len, 16);
+
+	/* Packet headers + WR + CPLs */
+	flits = DIV_ROUND_UP(wrlen, 8);
+
+	if (skb_shinfo(skb)->nr_frags > 0) {
+		if (skb_headlen(skb) - hdr_len)
+			nsgl = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+		else
+			nsgl = sgl_len(skb_shinfo(skb)->nr_frags);
+	} else if (skb->len - hdr_len) {
+		nsgl = sgl_len(1);
+	}
+
+	return flits + nsgl;
+}
+
+static inline void *write_eo_wr(struct adapter *adap,
+				struct sge_eosw_txq *eosw_txq,
+				struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+				u32 hdr_len, u32 wrlen)
+{
+	const struct skb_shared_info *ssi = skb_shinfo(skb);
+	struct cpl_tx_pkt_core *cpl;
+	u32 immd_len, wrlen16;
+	bool compl = false;
+	u8 ver, proto;
+
+	ver = ip_hdr(skb)->version;
+	proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : ip_hdr(skb)->protocol;
+
+	wrlen16 = DIV_ROUND_UP(wrlen, 16);
+	immd_len = sizeof(struct cpl_tx_pkt_core);
+	if (skb_shinfo(skb)->gso_size &&
+	    !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4))
+		immd_len += sizeof(struct cpl_tx_pkt_lso_core);
+	immd_len += hdr_len;
+
+	if (!eosw_txq->ncompl ||
+	    eosw_txq->last_compl >= adap->params.ofldq_wr_cred / 2) {
+		compl = true;
+		eosw_txq->ncompl++;
+		eosw_txq->last_compl = 0;
+	}
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) |
+				     FW_ETH_TX_EO_WR_IMMDLEN_V(immd_len) |
+				     FW_WR_COMPL_V(compl));
+	wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16_V(wrlen16) |
+					 FW_WR_FLOWID_V(eosw_txq->hwtid));
+	wr->r3 = 0;
+	if (proto == IPPROTO_UDP) {
+		cpl = write_eo_udp_wr(skb, wr, hdr_len);
+	} else {
+		wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
+		wr->u.tcpseg.ethlen = skb_network_offset(skb);
+		wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
+		wr->u.tcpseg.tcplen = tcp_hdrlen(skb);
+		wr->u.tcpseg.tsclk_tsoff = 0;
+		wr->u.tcpseg.r4 = 0;
+		wr->u.tcpseg.r5 = 0;
+		wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len);
+
+		if (ssi->gso_size) {
+			struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+
+			wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size);
+			cpl = write_tso_wr(adap, skb, lso);
+		} else {
+			wr->u.tcpseg.mss = cpu_to_be16(0xffff);
+			cpl = (void *)(wr + 1);
+		}
+	}
+
+	eosw_txq->cred -= wrlen16;
+	eosw_txq->last_compl += wrlen16;
+	return cpl;
+}
+
+static void ethofld_hard_xmit(struct net_device *dev,
+			      struct sge_eosw_txq *eosw_txq)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	u32 wrlen, wrlen16, hdr_len, data_len;
+	enum sge_eosw_state next_state;
+	u64 cntrl, *start, *end, *sgl;
+	struct sge_eohw_txq *eohw_txq;
+	struct cpl_tx_pkt_core *cpl;
+	struct fw_eth_tx_eo_wr *wr;
+	bool skip_eotx_wr = false;
+	struct tx_sw_desc *d;
+	struct sk_buff *skb;
+	u8 flits, ndesc;
+	int left;
+
+	eohw_txq = &adap->sge.eohw_txq[eosw_txq->hwqid];
+	spin_lock(&eohw_txq->lock);
+	reclaim_completed_tx_imm(&eohw_txq->q);
+
+	d = &eosw_txq->desc[eosw_txq->last_pidx];
+	skb = d->skb;
+	skb_tx_timestamp(skb);
+
+	wr = (struct fw_eth_tx_eo_wr *)&eohw_txq->q.desc[eohw_txq->q.pidx];
+	if (unlikely(eosw_txq->state != CXGB4_EO_STATE_ACTIVE &&
+		     eosw_txq->last_pidx == eosw_txq->flowc_idx)) {
+		hdr_len = skb->len;
+		data_len = 0;
+		flits = DIV_ROUND_UP(hdr_len, 8);
+		if (eosw_txq->state == CXGB4_EO_STATE_FLOWC_OPEN_SEND)
+			next_state = CXGB4_EO_STATE_FLOWC_OPEN_REPLY;
+		else
+			next_state = CXGB4_EO_STATE_FLOWC_CLOSE_REPLY;
+		skip_eotx_wr = true;
+	} else {
+		hdr_len = eth_get_headlen(dev, skb->data, skb_headlen(skb));
+		data_len = skb->len - hdr_len;
+		flits = ethofld_calc_tx_flits(adap, skb, hdr_len);
+	}
+	ndesc = flits_to_desc(flits);
+	wrlen = flits * 8;
+	wrlen16 = DIV_ROUND_UP(wrlen, 16);
+
+	/* If there are no CPL credits, then wait for credits
+	 * to come back and retry again
+	 */
+	if (unlikely(wrlen16 > eosw_txq->cred))
+		goto out_unlock;
+
+	if (unlikely(skip_eotx_wr)) {
+		start = (u64 *)wr;
+		eosw_txq->state = next_state;
+		goto write_wr_headers;
+	}
+
+	cpl = write_eo_wr(adap, eosw_txq, skb, wr, hdr_len, wrlen);
+	cntrl = hwcsum(adap->params.chip, skb);
+	if (skb_vlan_tag_present(skb))
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+
+	cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
+				 TXPKT_INTF_V(pi->tx_chan) |
+				 TXPKT_PF_V(adap->pf));
+	cpl->pack = 0;
+	cpl->len = cpu_to_be16(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	start = (u64 *)(cpl + 1);
+
+write_wr_headers:
+	sgl = (u64 *)inline_tx_skb_header(skb, &eohw_txq->q, (void *)start,
+					  hdr_len);
+	if (data_len) {
+		if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, d->addr))) {
+			memset(d->addr, 0, sizeof(d->addr));
+			eohw_txq->mapping_err++;
+			goto out_unlock;
+		}
+
+		end = (u64 *)wr + flits;
+		if (unlikely(start > sgl)) {
+			left = (u8 *)end - (u8 *)eohw_txq->q.stat;
+			end = (void *)eohw_txq->q.desc + left;
+		}
+
+		if (unlikely((u8 *)sgl >= (u8 *)eohw_txq->q.stat)) {
+			/* If current position is already at the end of the
+			 * txq, reset the current to point to start of the queue
+			 * and update the end ptr as well.
+			 */
+			left = (u8 *)end - (u8 *)eohw_txq->q.stat;
+
+			end = (void *)eohw_txq->q.desc + left;
+			sgl = (void *)eohw_txq->q.desc;
+		}
+
+		cxgb4_write_sgl(skb, &eohw_txq->q, (void *)sgl, end, hdr_len,
+				d->addr);
+	}
+
+	if (skb_shinfo(skb)->gso_size) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+			eohw_txq->uso++;
+		else
+			eohw_txq->tso++;
+		eohw_txq->tx_cso += skb_shinfo(skb)->gso_segs;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		eohw_txq->tx_cso++;
+	}
+
+	if (skb_vlan_tag_present(skb))
+		eohw_txq->vlan_ins++;
+
+	txq_advance(&eohw_txq->q, ndesc);
+	cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc);
+	eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc);
+
+out_unlock:
+	spin_unlock(&eohw_txq->lock);
+}
+
+static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
+{
+	struct sk_buff *skb;
+	int pktcount;
+
+	switch (eosw_txq->state) {
+	case CXGB4_EO_STATE_ACTIVE:
+	case CXGB4_EO_STATE_FLOWC_OPEN_SEND:
+	case CXGB4_EO_STATE_FLOWC_CLOSE_SEND:
+		pktcount = eosw_txq->pidx - eosw_txq->last_pidx;
+		if (pktcount < 0)
+			pktcount += eosw_txq->ndesc;
+		break;
+	case CXGB4_EO_STATE_FLOWC_OPEN_REPLY:
+	case CXGB4_EO_STATE_FLOWC_CLOSE_REPLY:
+	case CXGB4_EO_STATE_CLOSED:
+	default:
+		return;
+	}
+
+	while (pktcount--) {
+		skb = eosw_txq_peek(eosw_txq);
+		if (!skb) {
+			eosw_txq_advance_index(&eosw_txq->last_pidx, 1,
+					       eosw_txq->ndesc);
+			continue;
+		}
+
+		ethofld_hard_xmit(dev, eosw_txq);
+	}
+}
+
+static netdev_tx_t cxgb4_ethofld_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct cxgb4_tc_port_mqprio *tc_port_mqprio;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_eosw_txq *eosw_txq;
+	u32 qid;
+	int ret;
+
+	ret = cxgb4_validate_skb(skb, dev, ETH_HLEN);
+	if (ret)
+		goto out_free;
+
+	tc_port_mqprio = &adap->tc_mqprio->port_mqprio[pi->port_id];
+	qid = skb_get_queue_mapping(skb) - pi->nqsets;
+	eosw_txq = &tc_port_mqprio->eosw_txq[qid];
+	spin_lock_bh(&eosw_txq->lock);
+	if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
+		goto out_unlock;
+
+	ret = eosw_txq_enqueue(eosw_txq, skb);
+	if (ret)
+		goto out_unlock;
+
+	/* SKB is queued for processing until credits are available.
+	 * So, call the destructor now and we'll free the skb later
+	 * after it has been successfully transmitted.
+	 */
+	skb_orphan(skb);
+
+	eosw_txq_advance(eosw_txq, 1);
+	ethofld_xmit(dev, eosw_txq);
+	spin_unlock_bh(&eosw_txq->lock);
+	return NETDEV_TX_OK;
+
+out_unlock:
+	spin_unlock_bh(&eosw_txq->lock);
+out_free:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
+	u16 qid = skb_get_queue_mapping(skb);
 
 	if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
 		return cxgb4_vf_eth_xmit(skb, dev);
 
+	if (unlikely(qid >= pi->nqsets))
+		return cxgb4_ethofld_xmit(skb, dev);
+
 	return cxgb4_eth_xmit(skb, dev);
 }
 
 /**
- *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
- *	@q: the SGE control Tx queue
+ * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc.
+ * @dev - netdevice
+ * @eotid - ETHOFLD tid to bind/unbind
+ * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
  *
- *	This is a variant of cxgb4_reclaim_completed_tx() that is used
- *	for Tx queues that send only immediate data (presently just
- *	the control queues) and	thus do not have any sk_buffs to release.
+ * Send a FLOWC work request to bind an ETHOFLD TID to a traffic class.
+ * If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from
+ * a traffic class.
  */
-static inline void reclaim_completed_tx_imm(struct sge_txq *q)
+int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 {
-	int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
-	int reclaim = hw_cidx - q->cidx;
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+	enum sge_eosw_state next_state;
+	struct sge_eosw_txq *eosw_txq;
+	u32 len, len16, nparams = 6;
+	struct fw_flowc_wr *flowc;
+	struct eotid_entry *entry;
+	struct sge_ofld_rxq *rxq;
+	struct sk_buff *skb;
+	int ret = 0;
 
-	if (reclaim < 0)
-		reclaim += q->size;
+	len = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval) * nparams;
+	len16 = DIV_ROUND_UP(len, 16);
 
-	q->in_use -= reclaim;
-	q->cidx = hw_cidx;
+	entry = cxgb4_lookup_eotid(&adap->tids, eotid);
+	if (!entry)
+		return -ENOMEM;
+
+	eosw_txq = (struct sge_eosw_txq *)entry->data;
+	if (!eosw_txq)
+		return -ENOMEM;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	spin_lock_bh(&eosw_txq->lock);
+	if (tc != FW_SCHED_CLS_NONE) {
+		if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
+			goto out_unlock;
+
+		next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
+	} else {
+		if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
+			goto out_unlock;
+
+		next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
+	}
+
+	flowc = __skb_put(skb, len);
+	memset(flowc, 0, len);
+
+	rxq = &adap->sge.eohw_rxq[eosw_txq->hwqid];
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(len16) |
+					  FW_WR_FLOWID_V(eosw_txq->hwtid));
+	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+					   FW_FLOWC_WR_NPARAMS_V(nparams) |
+					   FW_WR_COMPL_V(1));
+	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
+	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V(adap->pf));
+	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
+	flowc->mnemval[1].val = cpu_to_be32(pi->tx_chan);
+	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
+	flowc->mnemval[2].val = cpu_to_be32(pi->tx_chan);
+	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
+	flowc->mnemval[3].val = cpu_to_be32(rxq->rspq.abs_id);
+	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+	flowc->mnemval[4].val = cpu_to_be32(tc);
+	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_EOSTATE;
+	flowc->mnemval[5].val = cpu_to_be32(tc == FW_SCHED_CLS_NONE ?
+					    FW_FLOWC_MNEM_EOSTATE_CLOSING :
+					    FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
+
+	eosw_txq->cred -= len16;
+	eosw_txq->ncompl++;
+	eosw_txq->last_compl = 0;
+
+	ret = eosw_txq_enqueue(eosw_txq, skb);
+	if (ret) {
+		dev_consume_skb_any(skb);
+		goto out_unlock;
+	}
+
+	eosw_txq->state = next_state;
+	eosw_txq->flowc_idx = eosw_txq->pidx;
+	eosw_txq_advance(eosw_txq, 1);
+	ethofld_xmit(dev, eosw_txq);
+
+out_unlock:
+	spin_unlock_bh(&eosw_txq->lock);
+	return ret;
 }
 
 /**
@@ -3311,6 +3757,112 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+void cxgb4_ethofld_restart(unsigned long data)
+{
+	struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data;
+	int pktcount;
+
+	spin_lock(&eosw_txq->lock);
+	pktcount = eosw_txq->cidx - eosw_txq->last_cidx;
+	if (pktcount < 0)
+		pktcount += eosw_txq->ndesc;
+
+	if (pktcount) {
+		cxgb4_eosw_txq_free_desc(netdev2adap(eosw_txq->netdev),
+					 eosw_txq, pktcount);
+		eosw_txq->inuse -= pktcount;
+	}
+
+	/* There may be some packets waiting for completions. So,
+	 * attempt to send these packets now.
+	 */
+	ethofld_xmit(eosw_txq->netdev, eosw_txq);
+	spin_unlock(&eosw_txq->lock);
+}
+
+/* cxgb4_ethofld_rx_handler - Process ETHOFLD Tx completions
+ * @q: the response queue that received the packet
+ * @rsp: the response queue descriptor holding the CPL message
+ * @si: the gather list of packet fragments
+ *
+ * Process a ETHOFLD Tx completion. Increment the cidx here, but
+ * free up the descriptors in a tasklet later.
+ */
+int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
+			     const struct pkt_gl *si)
+{
+	u8 opcode = ((const struct rss_header *)rsp)->opcode;
+
+	/* skip RSS header */
+	rsp++;
+
+	if (opcode == CPL_FW4_ACK) {
+		const struct cpl_fw4_ack *cpl;
+		struct sge_eosw_txq *eosw_txq;
+		struct eotid_entry *entry;
+		struct sk_buff *skb;
+		u32 hdr_len, eotid;
+		u8 flits, wrlen16;
+		int credits;
+
+		cpl = (const struct cpl_fw4_ack *)rsp;
+		eotid = CPL_FW4_ACK_FLOWID_G(ntohl(OPCODE_TID(cpl))) -
+			q->adap->tids.eotid_base;
+		entry = cxgb4_lookup_eotid(&q->adap->tids, eotid);
+		if (!entry)
+			goto out_done;
+
+		eosw_txq = (struct sge_eosw_txq *)entry->data;
+		if (!eosw_txq)
+			goto out_done;
+
+		spin_lock(&eosw_txq->lock);
+		credits = cpl->credits;
+		while (credits > 0) {
+			skb = eosw_txq->desc[eosw_txq->cidx].skb;
+			if (!skb)
+				break;
+
+			if (unlikely((eosw_txq->state ==
+				      CXGB4_EO_STATE_FLOWC_OPEN_REPLY ||
+				      eosw_txq->state ==
+				      CXGB4_EO_STATE_FLOWC_CLOSE_REPLY) &&
+				     eosw_txq->cidx == eosw_txq->flowc_idx)) {
+				flits = DIV_ROUND_UP(skb->len, 8);
+				if (eosw_txq->state ==
+				    CXGB4_EO_STATE_FLOWC_OPEN_REPLY)
+					eosw_txq->state = CXGB4_EO_STATE_ACTIVE;
+				else
+					eosw_txq->state = CXGB4_EO_STATE_CLOSED;
+				complete(&eosw_txq->completion);
+			} else {
+				hdr_len = eth_get_headlen(eosw_txq->netdev,
+							  skb->data,
+							  skb_headlen(skb));
+				flits = ethofld_calc_tx_flits(q->adap, skb,
+							      hdr_len);
+			}
+			eosw_txq_advance_index(&eosw_txq->cidx, 1,
+					       eosw_txq->ndesc);
+			wrlen16 = DIV_ROUND_UP(flits * 8, 16);
+			credits -= wrlen16;
+		}
+
+		eosw_txq->cred += cpl->credits;
+		eosw_txq->ncompl--;
+
+		spin_unlock(&eosw_txq->lock);
+
+		/* Schedule a tasklet to reclaim SKBs and restart ETHOFLD Tx,
+		 * if there were packets waiting for completion.
+		 */
+		tasklet_schedule(&eosw_txq->qresume_tsk);
+	}
+
+out_done:
+	return 0;
+}
+
 /*
  * The MSI-X interrupt handler for an SGE response queue.
  */
@@ -3835,7 +4387,10 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 	txq->q.q_type = CXGB4_TXQ_ETH;
 	init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->txq = netdevq;
-	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
+	txq->tso = 0;
+	txq->uso = 0;
+	txq->tx_cso = 0;
+	txq->vlan_ins = 0;
 	txq->mapping_err = 0;
 	txq->dbqt = dbqt;
 
@@ -3912,30 +4467,30 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 	return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 }
 
-int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
-			 struct net_device *dev, unsigned int iqid,
-			 unsigned int uld_type)
+static int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_txq *q,
+				 struct net_device *dev, u32 cmd, u32 iqid)
 {
 	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
-	int ret, nentries;
-	struct fw_eq_ofld_cmd c;
-	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
-	int cmd = FW_EQ_OFLD_CMD;
+	struct sge *s = &adap->sge;
+	struct fw_eq_ofld_cmd c;
+	u32 fb_min, nentries;
+	int ret;
 
 	/* Add status entries */
-	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
-
-	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
-			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
-			NUMA_NO_NODE);
-	if (!txq->q.desc)
+	nentries = q->size + s->stat_len / sizeof(struct tx_desc);
+	q->desc = alloc_ring(adap->pdev_dev, q->size, sizeof(struct tx_desc),
+			     sizeof(struct tx_sw_desc), &q->phys_addr,
+			     &q->sdesc, s->stat_len, NUMA_NO_NODE);
+	if (!q->desc)
 		return -ENOMEM;
 
+	if (chip_ver <= CHELSIO_T5)
+		fb_min = FETCHBURSTMIN_64B_X;
+	else
+		fb_min = FETCHBURSTMIN_64B_T6_X;
+
 	memset(&c, 0, sizeof(c));
-	if (unlikely(uld_type == CXGB4_TX_CRYPTO))
-		cmd = FW_EQ_CTRL_CMD;
 	c.op_to_vfn = htonl(FW_CMD_OP_V(cmd) | FW_CMD_REQUEST_F |
 			    FW_CMD_WRITE_F | FW_CMD_EXEC_F |
 			    FW_EQ_OFLD_CMD_PFN_V(adap->pf) |
@@ -3947,27 +4502,42 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 		      FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
 		      FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
 	c.dcaen_to_eqsize =
-		htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
-					     ? FETCHBURSTMIN_64B_X
-					     : FETCHBURSTMIN_64B_T6_X) |
+		htonl(FW_EQ_OFLD_CMD_FBMIN_V(fb_min) |
 		      FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
 		      FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
 		      FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
-	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
+	c.eqaddr = cpu_to_be64(q->phys_addr);
 
 	ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
 	if (ret) {
-		kfree(txq->q.sdesc);
-		txq->q.sdesc = NULL;
+		kfree(q->sdesc);
+		q->sdesc = NULL;
 		dma_free_coherent(adap->pdev_dev,
 				  nentries * sizeof(struct tx_desc),
-				  txq->q.desc, txq->q.phys_addr);
-		txq->q.desc = NULL;
+				  q->desc, q->phys_addr);
+		q->desc = NULL;
 		return ret;
 	}
 
+	init_txq(adap, q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
+	return 0;
+}
+
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+			 struct net_device *dev, unsigned int iqid,
+			 unsigned int uld_type)
+{
+	u32 cmd = FW_EQ_OFLD_CMD;
+	int ret;
+
+	if (unlikely(uld_type == CXGB4_TX_CRYPTO))
+		cmd = FW_EQ_CTRL_CMD;
+
+	ret = t4_sge_alloc_ofld_txq(adap, &txq->q, dev, cmd, iqid);
+	if (ret)
+		return ret;
+
 	txq->q.q_type = CXGB4_TXQ_ULD;
-	init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
 	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
@@ -3976,6 +4546,26 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
 	return 0;
 }
 
+int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq,
+			     struct net_device *dev, u32 iqid)
+{
+	int ret;
+
+	ret = t4_sge_alloc_ofld_txq(adap, &txq->q, dev, FW_EQ_OFLD_CMD, iqid);
+	if (ret)
+		return ret;
+
+	txq->q.q_type = CXGB4_TXQ_ULD;
+	spin_lock_init(&txq->lock);
+	txq->adap = adap;
+	txq->tso = 0;
+	txq->uso = 0;
+	txq->tx_cso = 0;
+	txq->vlan_ins = 0;
+	txq->mapping_err = 0;
+	return 0;
+}
+
 void free_txq(struct adapter *adap, struct sge_txq *q)
 {
 	struct sge *s = &adap->sge;
@@ -4031,6 +4621,17 @@ void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q)
 				     q->fl.size ? &q->fl : NULL);
 }
 
+void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq)
+{
+	if (txq->q.desc) {
+		t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
+				txq->q.cntxt_id);
+		free_tx_desc(adap, &txq->q, txq->q.in_use, false);
+		kfree(txq->q.sdesc);
+		free_txq(adap, &txq->q);
+	}
+}
+
 /**
  *	t4_free_sge_resources - free SGE resources
  *	@adap: the adapter
@@ -4060,6 +4661,10 @@ void t4_free_sge_resources(struct adapter *adap)
 		if (eq->rspq.desc)
 			free_rspq_fl(adap, &eq->rspq,
 				     eq->fl.size ? &eq->fl : NULL);
+		if (eq->msix) {
+			cxgb4_free_msix_idx_in_bmap(adap, eq->msix->idx);
+			eq->msix = NULL;
+		}
 
 		etq = &adap->sge.ethtxq[i];
 		if (etq->q.desc) {
@@ -4086,8 +4691,15 @@ void t4_free_sge_resources(struct adapter *adap)
 		}
 	}
 
-	if (adap->sge.fw_evtq.desc)
+	if (adap->sge.fw_evtq.desc) {
 		free_rspq_fl(adap, &adap->sge.fw_evtq, NULL);
+		if (adap->sge.fwevtq_msix_idx >= 0)
+			cxgb4_free_msix_idx_in_bmap(adap,
+						    adap->sge.fwevtq_msix_idx);
+	}
+
+	if (adap->sge.nd_msix_idx >= 0)
+		cxgb4_free_msix_idx_in_bmap(adap, adap->sge.nd_msix_idx);
 
 	if (adap->sge.intrq.desc)
 		free_rspq_fl(adap, &adap->sge.intrq, NULL);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f2a7824da42b..19d18acfc9a6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -8777,8 +8777,8 @@ int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
 		       unsigned int *speedp, unsigned int *mtup)
 {
 	unsigned int fw_caps = pi->adapter->params.fw_caps_support;
-	struct fw_port_cmd port_cmd;
 	unsigned int action, link_ok, mtu;
+	struct fw_port_cmd port_cmd;
 	fw_port_cap32_t linkattr;
 	int ret;
 
@@ -8813,9 +8813,12 @@ int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
 			be32_to_cpu(port_cmd.u.info32.auxlinfo32_mtu32));
 	}
 
-	*link_okp = link_ok;
-	*speedp = fwcap_to_speed(linkattr);
-	*mtup = mtu;
+	if (link_okp)
+		*link_okp = link_ok;
+	if (speedp)
+		*speedp = fwcap_to_speed(linkattr);
+	if (mtup)
+		*mtup = mtu;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 38dd41eb959e..575c6abcdae7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1421,6 +1421,11 @@ enum {
 	CPL_FW4_ACK_FLAGS_FLOWC		= 0x4,	/* fw_flowc_wr complete */
 };
 
+#define CPL_FW4_ACK_FLOWID_S    0
+#define CPL_FW4_ACK_FLOWID_M    0xffffff
+#define CPL_FW4_ACK_FLOWID_G(x) \
+	(((x) >> CPL_FW4_ACK_FLOWID_S) & CPL_FW4_ACK_FLOWID_M)
+
 struct cpl_fw6_msg {
 	u8 opcode;
 	u8 type;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 65313f6b5704..ac4fb43bdec6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -87,6 +87,7 @@ enum fw_wr_opcodes {
 	FW_ULPTX_WR                    = 0x04,
 	FW_TP_WR                       = 0x05,
 	FW_ETH_TX_PKT_WR               = 0x08,
+	FW_ETH_TX_EO_WR                = 0x1c,
 	FW_OFLD_CONNECTION_WR          = 0x2f,
 	FW_FLOWC_WR                    = 0x0a,
 	FW_OFLD_TX_DATA_WR             = 0x0b,
@@ -534,6 +535,47 @@ struct fw_eth_tx_pkt_wr {
 	__be64 r3;
 };
 
+enum fw_eth_tx_eo_type {
+	FW_ETH_TX_EO_TYPE_UDPSEG = 0,
+	FW_ETH_TX_EO_TYPE_TCPSEG,
+};
+
+struct fw_eth_tx_eo_wr {
+	__be32 op_immdlen;
+	__be32 equiq_to_len16;
+	__be64 r3;
+	union fw_eth_tx_eo {
+		struct fw_eth_tx_eo_udpseg {
+			__u8   type;
+			__u8   ethlen;
+			__be16 iplen;
+			__u8   udplen;
+			__u8   rtplen;
+			__be16 r4;
+			__be16 mss;
+			__be16 schedpktsize;
+			__be32 plen;
+		} udpseg;
+		struct fw_eth_tx_eo_tcpseg {
+			__u8   type;
+			__u8   ethlen;
+			__be16 iplen;
+			__u8   tcplen;
+			__u8   tsclk_tsoff;
+			__be16 r4;
+			__be16 mss;
+			__be16 r5;
+			__be32 plen;
+		} tcpseg;
+	} u;
+};
+
+#define FW_ETH_TX_EO_WR_IMMDLEN_S	0
+#define FW_ETH_TX_EO_WR_IMMDLEN_M	0x1ff
+#define FW_ETH_TX_EO_WR_IMMDLEN_V(x)	((x) << FW_ETH_TX_EO_WR_IMMDLEN_S)
+#define FW_ETH_TX_EO_WR_IMMDLEN_G(x)	\
+	(((x) >> FW_ETH_TX_EO_WR_IMMDLEN_S) & FW_ETH_TX_EO_WR_IMMDLEN_M)
+
 struct fw_ofld_connection_wr {
 	__be32 op_compl;
 	__be32 len16_pkd;
@@ -660,6 +702,12 @@ enum fw_flowc_mnem_tcpstate {
 	FW_FLOWC_MNEM_TCPSTATE_TIMEWAIT = 10, /* not expected */
 };
 
+enum fw_flowc_mnem_eostate {
+	FW_FLOWC_MNEM_EOSTATE_ESTABLISHED = 1, /* default */
+	/* graceful close, after sending outstanding payload */
+	FW_FLOWC_MNEM_EOSTATE_CLOSING = 2,
+};
+
 enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_PFNVFN,		/* PFN [15:8] VFN [7:0] */
 	FW_FLOWC_MNEM_CH,
@@ -1134,6 +1182,7 @@ enum fw_caps_config_nic {
 	FW_CAPS_CONFIG_NIC		= 0x00000001,
 	FW_CAPS_CONFIG_NIC_VM		= 0x00000002,
 	FW_CAPS_CONFIG_NIC_HASHFILTER	= 0x00000020,
+	FW_CAPS_CONFIG_NIC_ETHOFLD	= 0x00000040,
 };
 
 enum fw_caps_config_ofld {
@@ -1276,6 +1325,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+	FW_PARAMS_PARAM_DEV_NUM_TM_CLASS = 0x2B,
 	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
 };
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-11-25 20:02:57 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-11-25 20:02:57 -0800
commit	386403a115f95997c2715691226e11a7b5cffcfd (patch)
tree	a685df70bd3d5b295683713818ddf0752c3d75b6 /drivers/net/ethernet/chelsio
parent	Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 (diff)
parent	Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next (diff)
download	linux-dev-386403a115f95997c2715691226e11a7b5cffcfd.tar.xz linux-dev-386403a115f95997c2715691226e11a7b5cffcfd.zip