24 files changed, 5146 insertions, 4063 deletions
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index ebdfe5b26937..41d849f283f6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -84,8 +84,6 @@ struct lpfc_sli2_slim;
 #define LPFC_HB_MBOX_INTERVAL   5	/* Heart beat interval in seconds. */
 #define LPFC_HB_MBOX_TIMEOUT    30	/* Heart beat timeout  in seconds. */
 
-#define LPFC_LOOK_AHEAD_OFF	0	/* Look ahead logic is turned off */
-
 /* Error Attention event polling interval */
 #define LPFC_ERATT_POLL_INTERVAL	5 /* EATT poll interval in seconds */
 
@@ -146,6 +144,7 @@ struct lpfc_nvmet_ctxbuf {
 	struct lpfc_nvmet_rcv_ctx *context;
 	struct lpfc_iocbq *iocbq;
 	struct lpfc_sglq *sglq;
+	struct work_struct defer_work;
 };
 
 struct lpfc_dma_pool {
@@ -235,8 +234,6 @@ typedef struct lpfc_vpd {
 	} sli3Feat;
 } lpfc_vpd_t;
 
-struct lpfc_scsi_buf;
-
 
 /*
  * lpfc stat counters
@@ -466,6 +463,7 @@ struct lpfc_vport {
 	uint32_t cfg_use_adisc;
 	uint32_t cfg_discovery_threads;
 	uint32_t cfg_log_verbose;
+	uint32_t cfg_enable_fc4_type;
 	uint32_t cfg_max_luns;
 	uint32_t cfg_enable_da_id;
 	uint32_t cfg_max_scsicmpl_time;
@@ -479,6 +477,7 @@ struct lpfc_vport {
 	struct dentry *debug_disc_trc;
 	struct dentry *debug_nodelist;
 	struct dentry *debug_nvmestat;
+	struct dentry *debug_scsistat;
 	struct dentry *debug_nvmektime;
 	struct dentry *debug_cpucheck;
 	struct dentry *vport_debugfs_root;
@@ -596,6 +595,13 @@ struct lpfc_mbox_ext_buf_ctx {
 	struct list_head ext_dmabuf_list;
 };
 
+struct lpfc_epd_pool {
+	/* Expedite pool */
+	struct list_head list;
+	u32 count;
+	spinlock_t lock;	/* lock for expedite pool */
+};
+
 struct lpfc_ras_fwlog {
 	uint8_t *fwlog_buff;
 	uint32_t fw_buffcount; /* Buffer size posted to FW */
@@ -617,20 +623,19 @@ struct lpfc_ras_fwlog {
 
 struct lpfc_hba {
 	/* SCSI interface function jump table entries */
-	int (*lpfc_new_scsi_buf)
-		(struct lpfc_vport *, int);
-	struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
-		(struct lpfc_hba *, struct lpfc_nodelist *);
+	struct lpfc_io_buf * (*lpfc_get_scsi_buf)
+		(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		struct scsi_cmnd *cmnd);
 	int (*lpfc_scsi_prep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_scsi_unprep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_release_scsi_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_rampdown_queue_depth)
 		(struct lpfc_hba *);
 	void (*lpfc_scsi_prep_cmnd)
-		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		(struct lpfc_vport *, struct lpfc_io_buf *,
 		 struct lpfc_nodelist *);
 
 	/* IOCB interface function jump table entries */
@@ -673,13 +678,17 @@ struct lpfc_hba {
 		(struct lpfc_hba *);
 
 	int (*lpfc_bg_scsi_prep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	/* Add new entries here */
 
+	/* expedite pool */
+	struct lpfc_epd_pool epd_pool;
+
 	/* SLI4 specific HBA data structure */
 	struct lpfc_sli4_hba sli4_hba;
 
 	struct workqueue_struct *wq;
+	struct delayed_work     eq_delay_work;
 
 	struct lpfc_sli sli;
 	uint8_t pci_dev_grp;	/* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
@@ -713,7 +722,6 @@ struct lpfc_hba {
 #define HBA_FCOE_MODE		0x4 /* HBA function in FCoE Mode */
 #define HBA_SP_QUEUE_EVT	0x8 /* Slow-path qevt posted to worker thread*/
 #define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */
-#define FCP_XRI_ABORT_EVENT	0x20
 #define ELS_XRI_ABORT_EVENT	0x40
 #define ASYNC_EVENT		0x80
 #define LINK_DISABLED		0x100 /* Link disabled by user */
@@ -784,12 +792,12 @@ struct lpfc_hba {
 	uint8_t  nvmet_support;	/* driver supports NVMET */
 #define LPFC_NVMET_MAX_PORTS	32
 	uint8_t  mds_diags_support;
-	uint32_t initial_imax;
 	uint8_t  bbcredit_support;
 	uint8_t  enab_exp_wqcq_pages;
 
 	/* HBA Config Parameters */
 	uint32_t cfg_ack0;
+	uint32_t cfg_xri_rebalancing;
 	uint32_t cfg_enable_npiv;
 	uint32_t cfg_enable_rrq;
 	uint32_t cfg_topology;
@@ -811,12 +819,14 @@ struct lpfc_hba {
 	uint32_t cfg_use_msi;
 	uint32_t cfg_auto_imax;
 	uint32_t cfg_fcp_imax;
+	uint32_t cfg_cq_poll_threshold;
+	uint32_t cfg_cq_max_proc_limit;
 	uint32_t cfg_fcp_cpu_map;
-	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_hdw_queue;
+	uint32_t cfg_irq_chann;
 	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_embed_cmd;
-	uint32_t cfg_nvme_io_channel;
 	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_nvmet_mrq;
 	uint32_t cfg_enable_nvmet;
@@ -852,6 +862,7 @@ struct lpfc_hba {
 	uint32_t cfg_prot_guard;
 	uint32_t cfg_hostmem_hgp;
 	uint32_t cfg_log_verbose;
+	uint32_t cfg_enable_fc4_type;
 	uint32_t cfg_aer_support;
 	uint32_t cfg_sriov_nr_virtfn;
 	uint32_t cfg_request_firmware_upgrade;
@@ -872,15 +883,12 @@ struct lpfc_hba {
 	uint32_t cfg_ras_fwlog_level;
 	uint32_t cfg_ras_fwlog_buffsize;
 	uint32_t cfg_ras_fwlog_func;
-	uint32_t cfg_enable_fc4_type;
 	uint32_t cfg_enable_bbcr;	/* Enable BB Credit Recovery */
 	uint32_t cfg_enable_dpp;	/* Enable Direct Packet Push */
-	uint32_t cfg_xri_split;
 #define LPFC_ENABLE_FCP  1
 #define LPFC_ENABLE_NVME 2
 #define LPFC_ENABLE_BOTH 3
 	uint32_t cfg_enable_pbde;
-	uint32_t io_channel_irqs;	/* number of irqs for io channels */
 	struct nvmet_fc_target_port *targetport;
 	lpfc_vpd_t vpd;		/* vital product data */
 
@@ -952,14 +960,6 @@ struct lpfc_hba {
 	struct timer_list eratt_poll;
 	uint32_t eratt_poll_interval;
 
-	/*
-	 * stat  counters
-	 */
-	atomic_t fc4ScsiInputRequests;
-	atomic_t fc4ScsiOutputRequests;
-	atomic_t fc4ScsiControlRequests;
-	atomic_t fc4ScsiIoCmpls;
-
 	uint64_t bg_guard_err_cnt;
 	uint64_t bg_apptag_err_cnt;
 	uint64_t bg_reftag_err_cnt;
@@ -970,13 +970,6 @@ struct lpfc_hba {
 	struct list_head lpfc_scsi_buf_list_get;
 	struct list_head lpfc_scsi_buf_list_put;
 	uint32_t total_scsi_bufs;
-	spinlock_t nvme_buf_list_get_lock;  /* NVME buf alloc list lock */
-	spinlock_t nvme_buf_list_put_lock;  /* NVME buf free list lock */
-	struct list_head lpfc_nvme_buf_list_get;
-	struct list_head lpfc_nvme_buf_list_put;
-	uint32_t total_nvme_bufs;
-	uint32_t get_nvme_bufs;
-	uint32_t put_nvme_bufs;
 	struct list_head lpfc_iocb_list;
 	uint32_t total_iocbq_bufs;
 	struct list_head active_rrq_list;
@@ -1033,6 +1026,7 @@ struct lpfc_hba {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	struct dentry *hba_debugfs_root;
 	atomic_t debugfs_vport_count;
+	struct dentry *debug_multixri_pools;
 	struct dentry *debug_hbqinfo;
 	struct dentry *debug_dumpHostSlim;
 	struct dentry *debug_dumpHBASlim;
@@ -1050,6 +1044,10 @@ struct lpfc_hba {
 
 	struct dentry *debug_nvmeio_trc;
 	struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
+	struct dentry *debug_hdwqinfo;
+#ifdef LPFC_HDWQ_LOCK_STAT
+	struct dentry *debug_lockstat;
+#endif
 	atomic_t nvmeio_trc_cnt;
 	uint32_t nvmeio_trc_size;
 	uint32_t nvmeio_trc_output_idx;
@@ -1090,7 +1088,6 @@ struct lpfc_hba {
 
 	uint8_t temp_sensor_support;
 	/* Fields used for heart beat. */
-	unsigned long last_eqdelay_time;
 	unsigned long last_completion_time;
 	unsigned long skipped_hb;
 	struct timer_list hb_tmofunc;
@@ -1164,16 +1161,12 @@ struct lpfc_hba {
 	uint16_t sfp_warning;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-#define LPFC_CHECK_CPU_CNT    32
-	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
-	uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
-	uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
-	uint32_t cpucheck_ccmpl_io[LPFC_CHECK_CPU_CNT];
 	uint16_t cpucheck_on;
 #define LPFC_CHECK_OFF		0
 #define LPFC_CHECK_NVME_IO	1
 #define LPFC_CHECK_NVMET_RCV	2
 #define LPFC_CHECK_NVMET_IO	4
+#define LPFC_CHECK_SCSI_IO	8
 	uint16_t ktime_on;
 	uint64_t ktime_data_samples;
 	uint64_t ktime_status_samples;
@@ -1297,3 +1290,23 @@ lpfc_phba_elsring(struct lpfc_hba *phba)
 	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }
+
+/**
+ * lpfc_sli4_mod_hba_eq_delay - update EQ delay
+ * @phba: Pointer to HBA context object.
+ * @q: The Event Queue to update.
+ * @delay: The delay value (in us) to be written.
+ *
+ **/
+static inline void
+lpfc_sli4_mod_hba_eq_delay(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			   u32 delay)
+{
+	struct lpfc_register reg_data;
+
+	reg_data.word0 = 0;
+	bf_set(lpfc_sliport_eqdelay_id, &reg_data, eq->queue_id);
+	bf_set(lpfc_sliport_eqdelay_delay, &reg_data, delay);
+	writel(reg_data.word0, phba->sli4_hba.u.if_type2.EQDregaddr);
+	eq->q_mode = delay;
+}
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4bae72cbf3f6..ce3e541434dc 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -64,9 +64,6 @@
 #define LPFC_MIN_MRQ_POST	512
 #define LPFC_MAX_MRQ_POST	2048
 
-#define LPFC_MAX_NVME_INFO_TMP_LEN	100
-#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
-
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -155,7 +152,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nodelist *ndlp;
 	struct nvme_fc_remote_port *nrport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	uint64_t data1, data2, data3;
 	uint64_t totin, totout, tot;
 	char *statep;
@@ -163,7 +160,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	int len = 0;
 	char tmp[LPFC_MAX_NVME_INFO_TMP_LEN] = {0};
 
-	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+	if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
 		len = scnprintf(buf, PAGE_SIZE, "NVME Disabled\n");
 		return len;
 	}
@@ -334,11 +331,10 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 
 	rcu_read_lock();
 	scnprintf(tmp, sizeof(tmp),
-		  "XRI Dist lpfc%d Total %d NVME %d SCSI %d ELS %d\n",
+		  "XRI Dist lpfc%d Total %d IO %d ELS %d\n",
 		  phba->brd_no,
 		  phba->sli4_hba.max_cfg_param.max_xri,
-		  phba->sli4_hba.nvme_xri_max,
-		  phba->sli4_hba.scsi_xri_max,
+		  phba->sli4_hba.io_xri_max,
 		  lpfc_sli4_get_els_iocb_cnt(phba));
 	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
 		goto buffer_done;
@@ -457,13 +453,13 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 
 	totin = 0;
 	totout = 0;
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		cstat = &lport->cstat[i];
-		tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+		tot = cstat->io_cmpls;
 		totin += tot;
-		data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-		data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-		data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
 		totout += (data1 + data2 + data3);
 	}
 	scnprintf(tmp, sizeof(tmp),
@@ -510,6 +506,57 @@ buffer_done:
 }
 
 static ssize_t
+lpfc_scsi_stat_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = shost_priv(shost);
+	struct lpfc_hba *phba = vport->phba;
+	int len;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, PAGE_SIZE, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, PAGE_SIZE);
+
+buffer_done:
+	len = strnlen(buf, PAGE_SIZE);
+
+	return len;
+}
+
+static ssize_t
 lpfc_bg_info_show(struct device *dev, struct device_attribute *attr,
 		  char *buf)
 {
@@ -2574,6 +2621,7 @@ lpfc_##attr##_store(struct device *dev, struct device_attribute *attr, \
 
 
 static DEVICE_ATTR(nvme_info, 0444, lpfc_nvme_info_show, NULL);
+static DEVICE_ATTR(scsi_stat, 0444, lpfc_scsi_stat_show, NULL);
 static DEVICE_ATTR(bg_info, S_IRUGO, lpfc_bg_info_show, NULL);
 static DEVICE_ATTR(bg_guard_err, S_IRUGO, lpfc_bg_guard_err_show, NULL);
 static DEVICE_ATTR(bg_apptag_err, S_IRUGO, lpfc_bg_apptag_err_show, NULL);
@@ -3724,29 +3772,13 @@ LPFC_ATTR_R(nvmet_mrq_post,
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 1
+ * Supported values are [1,3]. Default value is 3
  */
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
 	    LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
 	    "Enable FC4 Protocol support - FCP / NVME");
 
 /*
- * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
- * This parameter is only used if:
- *     lpfc_enable_fc4_type is 3 - register both FCP and NVME and
- *     port is not configured for NVMET.
- *
- * ELS/CT always get 10% of XRIs, up to a maximum of 250
- * The remaining XRIs get split up based on lpfc_xri_split per port:
- *
- * Supported Values are in percentages
- * the xri_split value is the percentage the SCSI port will get. The remaining
- * percentage will go to NVME.
- */
-LPFC_ATTR_R(xri_split, 50, 10, 90,
-	    "Percentage of FCP XRI resources versus NVME");
-
-/*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
 # You can set a bit mask to record specific types of verbose messages:
@@ -4903,6 +4935,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
 	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_eq_intr_info *eqi;
+	uint32_t usdelay;
 	int val = 0, i;
 
 	/* fcp_imax is only valid for SLI4 */
@@ -4923,12 +4957,27 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
 		return -EINVAL;
 
+	phba->cfg_auto_imax = (val) ? 0 : 1;
+	if (phba->cfg_fcp_imax && !val) {
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
+		for_each_present_cpu(i) {
+			eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+			eqi->icnt = 0;
+		}
+	}
+
 	phba->cfg_fcp_imax = (uint32_t)val;
-	phba->initial_imax = phba->cfg_fcp_imax;
 
-	for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
+	if (phba->cfg_fcp_imax)
+		usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax;
+	else
+		usdelay = 0;
+
+	for (i = 0; i < phba->cfg_irq_chann; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT,
-					 val);
+					 usdelay);
 
 	return strlen(buf);
 }
@@ -4982,15 +5031,119 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
 
 static DEVICE_ATTR_RW(lpfc_fcp_imax);
 
+/**
+ * lpfc_cq_max_proc_limit_store
+ *
+ * @dev: class device that is converted into a Scsi_host.
+ * @attr: device attribute, not used.
+ * @buf: string with the cq max processing limit of cqes
+ * @count: unused variable.
+ *
+ * Description:
+ * If val is in a valid range, then set value on each cq
+ *
+ * Returns:
+ * The length of the buf: if successful
+ * -ERANGE: if val is not in the valid range
+ * -EINVAL: if bad value format or intended mode is not supported.
+ **/
+static ssize_t
+lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_queue *eq, *cq;
+	unsigned long val;
+	int i;
+
+	/* cq_max_proc_limit is only valid for SLI4 */
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return -EINVAL;
+
+	/* Sanity check on user data */
+	if (!isdigit(buf[0]))
+		return -EINVAL;
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	if (val < LPFC_CQ_MIN_PROC_LIMIT || val > LPFC_CQ_MAX_PROC_LIMIT)
+		return -ERANGE;
+
+	phba->cfg_cq_max_proc_limit = (uint32_t)val;
+
+	/* set the values on the cq's */
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		if (!eq)
+			continue;
+
+		list_for_each_entry(cq, &eq->child_list, list)
+			cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+						 cq->entry_count);
+	}
+
+	return strlen(buf);
+}
+
 /*
- * lpfc_auto_imax: Controls Auto-interrupt coalescing values support.
- *       0       No auto_imax support
- *       1       auto imax on
- * Auto imax will change the value of fcp_imax on a per EQ basis, using
- * the EQ Delay Multiplier, depending on the activity for that EQ.
- * Value range [0,1]. Default value is 1.
+ * lpfc_cq_max_proc_limit: The maximum number CQE entries processed in an
+ *   itteration of CQ processing.
  */
-LPFC_ATTR_RW(auto_imax, 1, 0, 1, "Enable Auto imax");
+static int lpfc_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+module_param(lpfc_cq_max_proc_limit, int, 0644);
+MODULE_PARM_DESC(lpfc_cq_max_proc_limit,
+	    "Set the maximum number CQEs processed in an iteration of "
+	    "CQ processing");
+lpfc_param_show(cq_max_proc_limit)
+
+/*
+ * lpfc_cq_poll_threshold: Set the threshold of CQE completions in a
+ *   single handler call which should request a polled completion rather
+ *   than re-enabling interrupts.
+ */
+LPFC_ATTR_RW(cq_poll_threshold, LPFC_CQ_DEF_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MIN_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MAX_THRESHOLD_TO_POLL,
+	     "CQE Processing Threshold to enable Polling");
+
+/**
+ * lpfc_cq_max_proc_limit_init - Set the initial cq max_proc_limit
+ * @phba: lpfc_hba pointer.
+ * @val: entry limit
+ *
+ * Description:
+ * If val is in a valid range, then initialize the adapter's maximum
+ * value.
+ *
+ * Returns:
+ *  Always returns 0 for success, even if value not always set to
+ *  requested value. If value out of range or not supported, will fall
+ *  back to default.
+ **/
+static int
+lpfc_cq_max_proc_limit_init(struct lpfc_hba *phba, int val)
+{
+	phba->cfg_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (val >= LPFC_CQ_MIN_PROC_LIMIT && val <= LPFC_CQ_MAX_PROC_LIMIT) {
+		phba->cfg_cq_max_proc_limit = val;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0371 "LPFC_DRIVER_NAME"_cq_max_proc_limit: "
+			"%d out of range, using default\n",
+			phba->cfg_cq_max_proc_limit);
+
+	return 0;
+}
+
+static DEVICE_ATTR_RW(lpfc_cq_max_proc_limit);
 
 /**
  * lpfc_state_show - Display current driver CPU affinity
@@ -5023,50 +5176,70 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
 	case 1:
 		len += snprintf(buf + len, PAGE_SIZE-len,
 				"fcp_cpu_map: HBA centric mapping (%d): "
-				"%d online CPUs\n",
-				phba->cfg_fcp_cpu_map,
-				phba->sli4_hba.num_online_cpu);
-		break;
-	case 2:
-		len += snprintf(buf + len, PAGE_SIZE-len,
-				"fcp_cpu_map: Driver centric mapping (%d): "
-				"%d online CPUs\n",
-				phba->cfg_fcp_cpu_map,
-				phba->sli4_hba.num_online_cpu);
+				"%d of %d CPUs online from %d possible CPUs\n",
+				phba->cfg_fcp_cpu_map, num_online_cpus(),
+				num_present_cpus(),
+				phba->sli4_hba.num_possible_cpu);
 		break;
 	}
 
-	while (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu) {
+	while (phba->sli4_hba.curr_disp_cpu <
+	       phba->sli4_hba.num_possible_cpu) {
 		cpup = &phba->sli4_hba.cpu_map[phba->sli4_hba.curr_disp_cpu];
 
-		/* margin should fit in this and the truncated message */
-		if (cpup->irq == LPFC_VECTOR_MAP_EMPTY)
-			len += snprintf(buf + len, PAGE_SIZE-len,
-					"CPU %02d io_chan %02d "
-					"physid %d coreid %d\n",
+		if (!cpu_present(phba->sli4_hba.curr_disp_cpu))
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"CPU %02d not present\n",
+					phba->sli4_hba.curr_disp_cpu);
+		else if (cpup->irq == LPFC_VECTOR_MAP_EMPTY) {
+			if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
+				len += snprintf(
+					buf + len, PAGE_SIZE - len,
+					"CPU %02d hdwq None "
+					"physid %d coreid %d ht %d\n",
 					phba->sli4_hba.curr_disp_cpu,
-					cpup->channel_id, cpup->phys_id,
-					cpup->core_id);
-		else
-			len += snprintf(buf + len, PAGE_SIZE-len,
-					"CPU %02d io_chan %02d "
-					"physid %d coreid %d IRQ %d\n",
+					cpup->phys_id,
+					cpup->core_id, cpup->hyper);
+			else
+				len += snprintf(
+					buf + len, PAGE_SIZE - len,
+					"CPU %02d EQ %04d hdwq %04d "
+					"physid %d coreid %d ht %d\n",
+					phba->sli4_hba.curr_disp_cpu,
+					cpup->eq, cpup->hdwq, cpup->phys_id,
+					cpup->core_id, cpup->hyper);
+		} else {
+			if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
+				len += snprintf(
+					buf + len, PAGE_SIZE - len,
+					"CPU %02d hdwq None "
+					"physid %d coreid %d ht %d IRQ %d\n",
+					phba->sli4_hba.curr_disp_cpu,
+					cpup->phys_id,
+					cpup->core_id, cpup->hyper, cpup->irq);
+			else
+				len += snprintf(
+					buf + len, PAGE_SIZE - len,
+					"CPU %02d EQ %04d hdwq %04d "
+					"physid %d coreid %d ht %d IRQ %d\n",
 					phba->sli4_hba.curr_disp_cpu,
-					cpup->channel_id, cpup->phys_id,
-					cpup->core_id, cpup->irq);
+					cpup->eq, cpup->hdwq, cpup->phys_id,
+					cpup->core_id, cpup->hyper, cpup->irq);
+		}
 
 		phba->sli4_hba.curr_disp_cpu++;
 
 		/* display max number of CPUs keeping some margin */
 		if (phba->sli4_hba.curr_disp_cpu <
-				phba->sli4_hba.num_present_cpu &&
+				phba->sli4_hba.num_possible_cpu &&
 				(len >= (PAGE_SIZE - 64))) {
-			len += snprintf(buf + len, PAGE_SIZE-len, "more...\n");
+			len += snprintf(buf + len,
+					PAGE_SIZE - len, "more...\n");
 			break;
 		}
 	}
 
-	if (phba->sli4_hba.curr_disp_cpu == phba->sli4_hba.num_present_cpu)
+	if (phba->sli4_hba.curr_disp_cpu == phba->sli4_hba.num_possible_cpu)
 		phba->sli4_hba.curr_disp_cpu = 0;
 
 	return len;
@@ -5094,14 +5267,13 @@ lpfc_fcp_cpu_map_store(struct device *dev, struct device_attribute *attr,
 # lpfc_fcp_cpu_map: Defines how to map CPUs to IRQ vectors
 # for the HBA.
 #
-# Value range is [0 to 2]. Default value is LPFC_DRIVER_CPU_MAP (2).
+# Value range is [0 to 1]. Default value is LPFC_HBA_CPU_MAP (1).
 #	0 - Do not affinitze IRQ vectors
 #	1 - Affintize HBA vectors with respect to each HBA
 #	    (start with CPU0 for each HBA)
-#	2 - Affintize HBA vectors with respect to the entire driver
-#	    (round robin thru all CPUs across all HBAs)
+# This also defines how Hardware Queues are mapped to specific CPUs.
 */
-static int lpfc_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+static int lpfc_fcp_cpu_map = LPFC_HBA_CPU_MAP;
 module_param(lpfc_fcp_cpu_map, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(lpfc_fcp_cpu_map,
 		 "Defines how to map CPUs to IRQ vectors per HBA");
@@ -5135,7 +5307,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val)
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"3326 lpfc_fcp_cpu_map: %d out of range, using "
 			"default\n", val);
-	phba->cfg_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+	phba->cfg_fcp_cpu_map = LPFC_HBA_CPU_MAP;
 
 	return 0;
 }
@@ -5235,13 +5407,20 @@ static DEVICE_ATTR_RW(lpfc_max_scsicmpl_time);
 LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
 
 /*
+# lpfc_xri_rebalancing: enable or disable XRI rebalancing feature
+# range is [0,1]. Default value is 1.
+*/
+LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
+
+/*
  * lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
  * range is [0,1]. Default value is 0.
- * For [0], FCP commands are issued to Work Queues ina round robin fashion.
+ * For [0], FCP commands are issued to Work Queues based on upper layer
+ * hardware queue index.
  * For [1], FCP commands are issued to a Work Queue associated with the
  *          current CPU.
  *
- * LPFC_FCP_SCHED_ROUND_ROBIN == 0
+ * LPFC_FCP_SCHED_BY_HDWQ == 0
  * LPFC_FCP_SCHED_BY_CPU == 1
  *
  * The driver dynamically sets this to 1 (BY_CPU) if it's able to set up cpu
@@ -5249,11 +5428,11 @@ LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
  * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
  * through WQs will be used.
  */
-LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN,
-	     LPFC_FCP_SCHED_ROUND_ROBIN,
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_CPU,
+	     LPFC_FCP_SCHED_BY_HDWQ,
 	     LPFC_FCP_SCHED_BY_CPU,
 	     "Determine scheduling algorithm for "
-	     "issuing commands [0] - Round Robin, [1] - Current CPU");
+	     "issuing commands [0] - Hardware Queue, [1] - Current CPU");
 
 /*
  * lpfc_ns_query: Determine algrithmn for NameServer queries after RSCN
@@ -5415,41 +5594,39 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2,
 	     "Embed NVME Command in WQE");
 
 /*
- * lpfc_fcp_io_channel: Set the number of FCP IO channels the driver
- * will advertise it supports to the SCSI layer. This also will map to
- * the number of WQs the driver will create.
+ * lpfc_hdw_queue: Set the number of Hardware Queues the driver
+ * will advertise it supports to the NVME and  SCSI layers. This also
+ * will map to the number of CQ/WQ pairs the driver will create.
  *
- *      0    = Configure the number of io channels to the number of active CPUs.
- *      1,32 = Manually specify how many io channels to use.
+ * The NVME Layer will try to create this many, plus 1 administrative
+ * hardware queue. The administrative queue will always map to WQ 0
+ * A hardware IO queue maps (qidx) to a specific driver CQ/WQ.
  *
- * Value range is [0,32]. Default value is 4.
+ *      0    = Configure the number of hdw queues to the number of active CPUs.
+ *      1,128 = Manually specify how many hdw queues to use.
+ *
+ * Value range is [0,128]. Default value is 0.
  */
-LPFC_ATTR_R(fcp_io_channel,
-	    LPFC_FCP_IO_CHAN_DEF,
-	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
-	    "Set the number of FCP I/O channels");
+LPFC_ATTR_R(hdw_queue,
+	    LPFC_HBA_HDWQ_DEF,
+	    LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
+	    "Set the number of I/O Hardware Queues");
 
 /*
- * lpfc_nvme_io_channel: Set the number of IO hardware queues the driver
- * will advertise it supports to the NVME layer. This also will map to
- * the number of WQs the driver will create.
- *
- * This module parameter is valid when lpfc_enable_fc4_type is set
- * to support NVME.
- *
- * The NVME Layer will try to create this many, plus 1 administrative
- * hardware queue. The administrative queue will always map to WQ 0
- * A hardware IO queue maps (qidx) to a specific driver WQ.
+ * lpfc_irq_chann: Set the number of IRQ vectors that are available
+ * for Hardware Queues to utilize.  This also will map to the number
+ * of EQ / MSI-X vectors the driver will create. This should never be
+ * more than the number of Hardware Queues
  *
- *      0    = Configure the number of io channels to the number of active CPUs.
- *      1,32 = Manually specify how many io channels to use.
+ *      0     = Configure number of IRQ Channels to the number of active CPUs.
+ *      1,128 = Manually specify how many IRQ Channels to use.
  *
- * Value range is [0,32]. Default value is 0.
+ * Value range is [0,128]. Default value is 0.
  */
-LPFC_ATTR_R(nvme_io_channel,
-	    LPFC_NVME_IO_CHAN_DEF,
-	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
-	    "Set the number of NVME I/O channels");
+LPFC_ATTR_R(irq_chann,
+	    LPFC_HBA_HDWQ_DEF,
+	    LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
+	    "Set the number of I/O IRQ Channels");
 
 /*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
@@ -5492,16 +5669,6 @@ LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
 LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
 
 /*
-# lpfc_fcp_look_ahead: Look ahead for completions in FCP start routine
-#       0  = disabled (default)
-#       1  = enabled
-# Value range is [0,1]. Default value is 0.
-#
-# This feature in under investigation and may be supported in the future.
-*/
-unsigned int lpfc_fcp_look_ahead = LPFC_LOOK_AHEAD_OFF;
-
-/*
 # lpfc_prot_mask: i
 #	- Bit mask of host protection capabilities used to register with the
 #	  SCSI mid-layer
@@ -5677,6 +5844,7 @@ LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push");
 
 struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_nvme_info,
+	&dev_attr_scsi_stat,
 	&dev_attr_bg_info,
 	&dev_attr_bg_guard_err,
 	&dev_attr_bg_apptag_err,
@@ -5704,11 +5872,11 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
 	&dev_attr_lpfc_enable_fc4_type,
-	&dev_attr_lpfc_xri_split,
 	&dev_attr_lpfc_fcp_class,
 	&dev_attr_lpfc_use_adisc,
 	&dev_attr_lpfc_first_burst_size,
 	&dev_attr_lpfc_ack0,
+	&dev_attr_lpfc_xri_rebalancing,
 	&dev_attr_lpfc_topology,
 	&dev_attr_lpfc_scan_down,
 	&dev_attr_lpfc_link_speed,
@@ -5742,12 +5910,13 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_use_msi,
 	&dev_attr_lpfc_nvme_oas,
 	&dev_attr_lpfc_nvme_embed_cmd,
-	&dev_attr_lpfc_auto_imax,
 	&dev_attr_lpfc_fcp_imax,
+	&dev_attr_lpfc_cq_poll_threshold,
+	&dev_attr_lpfc_cq_max_proc_limit,
 	&dev_attr_lpfc_fcp_cpu_map,
-	&dev_attr_lpfc_fcp_io_channel,
+	&dev_attr_lpfc_hdw_queue,
+	&dev_attr_lpfc_irq_chann,
 	&dev_attr_lpfc_suppress_rsp,
-	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvmet_mrq,
 	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
@@ -6775,6 +6944,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_multi_ring_rctl_init(phba, lpfc_multi_ring_rctl);
 	lpfc_multi_ring_type_init(phba, lpfc_multi_ring_type);
 	lpfc_ack0_init(phba, lpfc_ack0);
+	lpfc_xri_rebalancing_init(phba, lpfc_xri_rebalancing);
 	lpfc_topology_init(phba, lpfc_topology);
 	lpfc_link_speed_init(phba, lpfc_link_speed);
 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
@@ -6787,8 +6957,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_use_msi_init(phba, lpfc_use_msi);
 	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
 	lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd);
-	lpfc_auto_imax_init(phba, lpfc_auto_imax);
 	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
+	lpfc_cq_poll_threshold_init(phba, lpfc_cq_poll_threshold);
+	lpfc_cq_max_proc_limit_init(phba, lpfc_cq_max_proc_limit);
 	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
@@ -6824,8 +6995,8 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
 	lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
-	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
-	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
+	lpfc_hdw_queue_init(phba, lpfc_hdw_queue);
+	lpfc_irq_chann_init(phba, lpfc_irq_chann);
 	lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
 	lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
 
@@ -6834,38 +7005,27 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 		phba->nvmet_support = 0;
 		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
 		phba->cfg_enable_bbcr = 0;
+		phba->cfg_xri_rebalancing = 0;
 	} else {
 		/* We MUST have FCP support */
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
 	}
 
-	if (phba->cfg_auto_imax && !phba->cfg_fcp_imax)
-		phba->cfg_auto_imax = 0;
-	phba->initial_imax = phba->cfg_fcp_imax;
+	phba->cfg_auto_imax = (phba->cfg_fcp_imax) ? 0 : 1;
 
 	phba->cfg_enable_pbde = 0;
 
 	/* A value of 0 means use the number of CPUs found in the system */
-	if (phba->cfg_fcp_io_channel == 0)
-		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
-	if (phba->cfg_nvme_io_channel == 0)
-		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
-
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
-		phba->cfg_fcp_io_channel = 0;
-
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
-		phba->cfg_nvme_io_channel = 0;
-
-	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
-		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
-	else
-		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+	if (phba->cfg_hdw_queue == 0)
+		phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann == 0)
+		phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+		phba->cfg_irq_chann = phba->cfg_hdw_queue;
 
 	phba->cfg_soft_wwnn = 0L;
 	phba->cfg_soft_wwpn = 0L;
-	lpfc_xri_split_init(phba, lpfc_xri_split);
 	lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt);
 	lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth);
 	lpfc_hba_log_verbose_init(phba, lpfc_log_verbose);
@@ -6903,16 +7063,16 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 void
 lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 {
-	if (phba->cfg_nvme_io_channel > phba->sli4_hba.num_present_cpu)
-		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
-
-	if (phba->cfg_fcp_io_channel > phba->sli4_hba.num_present_cpu)
-		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu)
+		phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu)
+		phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+		phba->cfg_irq_chann = phba->cfg_hdw_queue;
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
 	    phba->nvmet_support) {
 		phba->cfg_enable_fc4_type &= ~LPFC_ENABLE_FCP;
-		phba->cfg_fcp_io_channel = 0;
 
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
 				"6013 %s x%x fb_size x%x, fb_max x%x\n",
@@ -6929,11 +7089,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		}
 
 		if (!phba->cfg_nvmet_mrq)
-			phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 
 		/* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
-		if (phba->cfg_nvmet_mrq > phba->cfg_nvme_io_channel) {
-			phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
+		if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) {
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 					"6018 Adjust lpfc_nvmet_mrq to %d\n",
 					phba->cfg_nvmet_mrq);
@@ -6947,11 +7107,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_OFF;
 		phba->cfg_nvmet_fb_size = 0;
 	}
-
-	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
-		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
-	else
-		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
 }
 
 /**
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 2dc564e59430..f2494d3b365c 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2947,7 +2947,7 @@ static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
 			cmd->un.cont64[i].addrLow = putPaddrLow(mp[i]->phys);
 			cmd->un.cont64[i].tus.f.bdeSize =
 				((struct lpfc_dmabufext *)mp[i])->size;
-					cmd->ulpBdeCount = ++i;
+			cmd->ulpBdeCount = ++i;
 
 			if ((--num_bde > 0) && (i < 2))
 				continue;
@@ -4682,7 +4682,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
 	 * Don't allow mailbox commands to be sent when blocked or when in
 	 * the middle of discovery
 	 */
-	 if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
+	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
 		rc = -EAGAIN;
 		goto job_done;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 39f3fa988732..e0b14d791b8c 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -199,11 +199,6 @@ void lpfc_reset_hba(struct lpfc_hba *);
 int lpfc_emptyq_wait(struct lpfc_hba *phba, struct list_head *hd,
 			spinlock_t *slock);
 
-int lpfc_fof_queue_create(struct lpfc_hba *);
-int lpfc_fof_queue_setup(struct lpfc_hba *);
-int lpfc_fof_queue_destroy(struct lpfc_hba *);
-irqreturn_t lpfc_sli4_fof_intr_handler(int, void *);
-
 int lpfc_sli_setup(struct lpfc_hba *);
 int lpfc_sli4_setup(struct lpfc_hba *phba);
 void lpfc_sli_queue_init(struct lpfc_hba *phba);
@@ -320,8 +315,8 @@ void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
-int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t rnum,
-			struct lpfc_iocbq *iocbq);
+int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+			struct lpfc_iocbq *pwqe);
 struct lpfc_sglq *__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xri);
 struct lpfc_sglq *__lpfc_sli_get_nvmet_sglq(struct lpfc_hba *phba,
 					    struct lpfc_iocbq *piocbq);
@@ -445,7 +440,6 @@ extern spinlock_t _dump_buf_lock;
 extern int _dump_buf_done;
 extern spinlock_t pgcnt_lock;
 extern unsigned int pgcnt;
-extern unsigned int lpfc_fcp_look_ahead;
 
 /* Interface exported by fabric iocb scheduler */
 void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
@@ -520,8 +514,13 @@ int lpfc_sli4_read_config(struct lpfc_hba *);
 void lpfc_sli4_node_prep(struct lpfc_hba *);
 int lpfc_sli4_els_sgl_update(struct lpfc_hba *phba);
 int lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba);
-int lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba);
-int lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba);
+int lpfc_io_buf_flush(struct lpfc_hba *phba, struct list_head *sglist);
+int lpfc_io_buf_replenish(struct lpfc_hba *phba, struct list_head *cbuf);
+int lpfc_sli4_io_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
+		struct list_head *blist, int xricnt);
+int lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc);
+void lpfc_io_free(struct lpfc_hba *phba);
 void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
 uint32_t lpfc_sli_port_speed_get(struct lpfc_hba *);
 int lpfc_sli4_request_firmware_update(struct lpfc_hba *, uint8_t);
@@ -574,6 +573,21 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
 				struct lpfc_wcqe_complete *abts_cmpl);
+void lpfc_create_multixri_pools(struct lpfc_hba *phba);
+void lpfc_create_destroy_pools(struct lpfc_hba *phba);
+void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);
+void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 cnt);
+void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid);
+void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid);
+void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid);
+#ifdef LPFC_MXP_STAT
+void lpfc_snapshot_mxp(struct lpfc_hba *, u32);
+#endif
+struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
+				struct lpfc_nodelist *ndlp, u32 hwqid,
+				int);
+void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
+			 struct lpfc_sli4_hdw_queue *qp);
 void lpfc_nvme_cmd_template(void);
 void lpfc_nvmet_cmd_template(void);
 extern int lpfc_enable_nvmet_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 552da8bf43e4..7290573110fe 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -1656,16 +1656,16 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		CtReq->un.rft.PortId = cpu_to_be32(vport->fc_myDID);
 
 		/* Register FC4 FCP type if enabled.  */
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+		if (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH ||
+		    vport->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
 			CtReq->un.rft.fcpReg = 1;
 
 		/* Register NVME type if enabled.  Defined LE and swapped.
 		 * rsvd[0] is used as word1 because of the hard-coded
 		 * word0 usage in the ct_request data structure.
 		 */
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+		if (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH ||
+		    vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
 			CtReq->un.rft.rsvd[0] =
 				cpu_to_be32(LPFC_FC4_TYPE_BITMASK);
 
@@ -1732,8 +1732,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		 * caller can specify NVME (type x28) as well.  But only
 		 * these that FC4 type is supported.
 		 */
-		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
+		if (((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		     (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
 		    (context == FC_TYPE_NVME)) {
 			if ((vport == phba->pport) && phba->nvmet_support) {
 				CtReq->un.rff.fbits = (FC4_FEATURE_TARGET |
@@ -1744,8 +1744,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 			}
 			CtReq->un.rff.type_code = context;
 
-		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) &&
+		} else if (((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (vport->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) &&
 			   (context == FC_TYPE_FCP))
 			CtReq->un.rff.type_code = context;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index a58f0b3f03a9..1215eaa530db 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2007-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -378,6 +378,271 @@ skipit:
 	return len;
 }
 
+static int lpfc_debugfs_last_xripool;
+
+/**
+ * lpfc_debugfs_common_xri_data - Dump Hardware Queue info to a buffer
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the Hardware Queue info from the @phba to @buf up to
+ * @size number of bytes. A header that describes the current hdwq state will be
+ * dumped to @buf first and then info on each hdwq entry will be dumped to @buf
+ * until @size bytes have been dumped or all the hdwq info has been dumped.
+ *
+ * Notes:
+ * This routine will rotate through each configured Hardware Queue each
+ * time called.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	int len = 0;
+	int i, out;
+	unsigned long iflag;
+
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		if (len > (LPFC_DUMP_MULTIXRIPOOL_SIZE - 80))
+			break;
+		qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool];
+
+		len +=  snprintf(buf + len, size - len, "HdwQ %d Info ", i);
+		spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
+		spin_lock(&qp->abts_nvme_buf_list_lock);
+		spin_lock(&qp->io_buf_list_get_lock);
+		spin_lock(&qp->io_buf_list_put_lock);
+		out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
+			qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
+		len +=  snprintf(buf + len, size - len,
+				 "tot:%d get:%d put:%d mt:%d "
+				 "ABTS scsi:%d nvme:%d Out:%d\n",
+			qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
+			qp->empty_io_bufs, qp->abts_scsi_io_bufs,
+			qp->abts_nvme_io_bufs, out);
+		spin_unlock(&qp->io_buf_list_put_lock);
+		spin_unlock(&qp->io_buf_list_get_lock);
+		spin_unlock(&qp->abts_nvme_buf_list_lock);
+		spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+
+		lpfc_debugfs_last_xripool++;
+		if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue)
+			lpfc_debugfs_last_xripool = 0;
+	}
+
+	return len;
+}
+
+/**
+ * lpfc_debugfs_multixripools_data - Display multi-XRI pools information
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine displays current multi-XRI pools information including XRI
+ * count in public, private and txcmplq. It also displays current high and
+ * low watermark.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	u32 i;
+	u32 hwq_count;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	u32 txcmplq_cnt;
+	char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0};
+
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (!phba->sli4_hba.hdwq)
+		return 0;
+
+	if (!phba->cfg_xri_rebalancing) {
+		i = lpfc_debugfs_commonxripools_data(phba, buf, size);
+		return i;
+	}
+
+	/*
+	 * Pbl: Current number of free XRIs in public pool
+	 * Pvt: Current number of free XRIs in private pool
+	 * Busy: Current number of outstanding XRIs
+	 * HWM: Current high watermark
+	 * pvt_empty: Incremented by 1 when IO submission fails (no xri)
+	 * pbl_empty: Incremented by 1 when all pbl_pool are empty during
+	 *            IO submission
+	 */
+	scnprintf(tmp, sizeof(tmp),
+		  "HWQ:  Pbl  Pvt Busy  HWM |  pvt_empty  pbl_empty ");
+	if (strlcat(buf, tmp, size) >= size)
+		return strnlen(buf, size);
+
+#ifdef LPFC_MXP_STAT
+	/*
+	 * MAXH: Max high watermark seen so far
+	 * above_lmt: Incremented by 1 if xri_owned > xri_limit during
+	 *            IO submission
+	 * below_lmt: Incremented by 1 if xri_owned <= xri_limit  during
+	 *            IO submission
+	 * locPbl_hit: Incremented by 1 if successfully get a batch of XRI from
+	 *             local pbl_pool
+	 * othPbl_hit: Incremented by 1 if successfully get a batch of XRI from
+	 *             other pbl_pool
+	 */
+	scnprintf(tmp, sizeof(tmp),
+		  "MAXH  above_lmt  below_lmt locPbl_hit othPbl_hit");
+	if (strlcat(buf, tmp, size) >= size)
+		return strnlen(buf, size);
+
+	/*
+	 * sPbl: snapshot of Pbl 15 sec after stat gets cleared
+	 * sPvt: snapshot of Pvt 15 sec after stat gets cleared
+	 * sBusy: snapshot of Busy 15 sec after stat gets cleared
+	 */
+	scnprintf(tmp, sizeof(tmp),
+		  " | sPbl sPvt sBusy");
+	if (strlcat(buf, tmp, size) >= size)
+		return strnlen(buf, size);
+#endif
+
+	scnprintf(tmp, sizeof(tmp), "\n");
+	if (strlcat(buf, tmp, size) >= size)
+		return strnlen(buf, size);
+
+	hwq_count = phba->cfg_hdw_queue;
+	for (i = 0; i < hwq_count; i++) {
+		qp = &phba->sli4_hba.hdwq[i];
+		multixri_pool = qp->p_multixri_pool;
+		if (!multixri_pool)
+			continue;
+		pbl_pool = &multixri_pool->pbl_pool;
+		pvt_pool = &multixri_pool->pvt_pool;
+		txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+		if (qp->nvme_wq)
+			txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+
+		scnprintf(tmp, sizeof(tmp),
+			  "%03d: %4d %4d %4d %4d | %10d %10d ",
+			  i, pbl_pool->count, pvt_pool->count,
+			  txcmplq_cnt, pvt_pool->high_watermark,
+			  qp->empty_io_bufs, multixri_pool->pbl_empty_count);
+		if (strlcat(buf, tmp, size) >= size)
+			break;
+
+#ifdef LPFC_MXP_STAT
+		scnprintf(tmp, sizeof(tmp),
+			  "%4d %10d %10d %10d %10d",
+			  multixri_pool->stat_max_hwm,
+			  multixri_pool->above_limit_count,
+			  multixri_pool->below_limit_count,
+			  multixri_pool->local_pbl_hit_count,
+			  multixri_pool->other_pbl_hit_count);
+		if (strlcat(buf, tmp, size) >= size)
+			break;
+
+		scnprintf(tmp, sizeof(tmp),
+			  " | %4d %4d %5d",
+			  multixri_pool->stat_pbl_count,
+			  multixri_pool->stat_pvt_count,
+			  multixri_pool->stat_busy_count);
+		if (strlcat(buf, tmp, size) >= size)
+			break;
+#endif
+
+		scnprintf(tmp, sizeof(tmp), "\n");
+		if (strlcat(buf, tmp, size) >= size)
+			break;
+	}
+	return strnlen(buf, size);
+}
+
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+static int lpfc_debugfs_last_lock;
+
+/**
+ * lpfc_debugfs_lockstat_data - Dump Hardware Queue info to a buffer
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the Hardware Queue info from the @phba to @buf up to
+ * @size number of bytes. A header that describes the current hdwq state will be
+ * dumped to @buf first and then info on each hdwq entry will be dumped to @buf
+ * until @size bytes have been dumped or all the hdwq info has been dumped.
+ *
+ * Notes:
+ * This routine will rotate through each configured Hardware Queue each
+ * time called.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_lockstat_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	int len = 0;
+	int i;
+
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (!phba->sli4_hba.hdwq)
+		return 0;
+
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		if (len > (LPFC_HDWQINFO_SIZE - 100))
+			break;
+		qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_lock];
+
+		len +=  snprintf(buf + len, size - len, "HdwQ %03d Lock ", i);
+		if (phba->cfg_xri_rebalancing) {
+			len +=  snprintf(buf + len, size - len,
+					 "get_pvt:%d mv_pvt:%d "
+					 "mv2pub:%d mv2pvt:%d "
+					 "put_pvt:%d put_pub:%d wq:%d\n",
+					 qp->lock_conflict.alloc_pvt_pool,
+					 qp->lock_conflict.mv_from_pvt_pool,
+					 qp->lock_conflict.mv_to_pub_pool,
+					 qp->lock_conflict.mv_to_pvt_pool,
+					 qp->lock_conflict.free_pvt_pool,
+					 qp->lock_conflict.free_pub_pool,
+					 qp->lock_conflict.wq_access);
+		} else {
+			len +=  snprintf(buf + len, size - len,
+					 "get:%d put:%d free:%d wq:%d\n",
+					 qp->lock_conflict.alloc_xri_get,
+					 qp->lock_conflict.alloc_xri_put,
+					 qp->lock_conflict.free_xri,
+					 qp->lock_conflict.wq_access);
+		}
+
+		lpfc_debugfs_last_lock++;
+		if (lpfc_debugfs_last_lock >= phba->cfg_hdw_queue)
+			lpfc_debugfs_last_lock = 0;
+	}
+
+	return len;
+}
+#endif
+
 static int lpfc_debugfs_last_hba_slim_off;
 
 /**
@@ -773,11 +1038,11 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	struct nvme_fc_local_port *localport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_nvme_lport *lport;
 	uint64_t data1, data2, data3;
 	uint64_t tot, totin, totout;
-	int cnt, i, maxch;
+	int cnt, i;
 	int len = 0;
 
 	if (phba->nvmet_support) {
@@ -863,17 +1128,17 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 		len +=  snprintf(buf + len, size - len, "\n");
 
 		cnt = 0;
-		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_for_each_entry_safe(ctxp, next_ctxp,
 				&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				list) {
 			cnt++;
 		}
-		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		if (cnt) {
 			len += snprintf(buf + len, size - len,
 					"ABORT: %d ctx entries\n", cnt);
-			spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+			spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 			list_for_each_entry_safe(ctxp, next_ctxp,
 				    &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				    list) {
@@ -885,7 +1150,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 						ctxp->oxid, ctxp->state,
 						ctxp->flag);
 			}
-			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+			spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		}
 
 		/* Calculate outstanding IOs */
@@ -901,7 +1166,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				phba->sli4_hba.nvmet_io_wait_total,
 				tot);
 	} else {
-		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
 
 		localport = vport->localport;
@@ -912,26 +1177,22 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			return len;
 
 		len += snprintf(buf + len, size - len,
-				"\nNVME Lport Statistics\n");
+				"\nNVME HDWQ Statistics\n");
 
 		len += snprintf(buf + len, size - len,
 				"LS: Xmt %016x Cmpl %016x\n",
 				atomic_read(&lport->fc4NvmeLsRequests),
 				atomic_read(&lport->fc4NvmeLsCmpls));
 
-		if (phba->cfg_nvme_io_channel < 32)
-			maxch = phba->cfg_nvme_io_channel;
-		else
-			maxch = 32;
 		totin = 0;
 		totout = 0;
-		for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-			cstat = &lport->cstat[i];
-			tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+			tot = cstat->io_cmpls;
 			totin += tot;
-			data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-			data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-			data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+			data1 = cstat->input_requests;
+			data2 = cstat->output_requests;
+			data3 = cstat->control_requests;
 			totout += (data1 + data2 + data3);
 
 			/* Limit to 32, debugfs display buffer limitation */
@@ -939,7 +1200,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				continue;
 
 			len += snprintf(buf + len, PAGE_SIZE - len,
-					"FCP (%d): Rd %016llx Wr %016llx "
+					"HDWQ (%d): Rd %016llx Wr %016llx "
 					"IO %016llx ",
 					i, data1, data2, data3);
 			len += snprintf(buf + len, PAGE_SIZE - len,
@@ -979,6 +1240,66 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	return len;
 }
 
+/**
+ * lpfc_debugfs_scsistat_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the SCSI statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_scsistat_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	int len;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, size, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, size);
+
+buffer_done:
+	len = strnlen(buf, size);
+
+	return len;
+}
 
 /**
  * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer
@@ -1299,62 +1620,73 @@ static int
 lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
-	int i;
+	struct lpfc_sli4_hdw_queue *qp;
+	int i, j, max_cnt;
 	int len = 0;
-	uint32_t tot_xmt = 0;
-	uint32_t tot_rcv = 0;
-	uint32_t tot_cmpl = 0;
-	uint32_t tot_ccmpl = 0;
+	uint32_t tot_xmt;
+	uint32_t tot_rcv;
+	uint32_t tot_cmpl;
 
-	if (phba->nvmet_support == 0) {
-		/* NVME Initiator */
-		len += snprintf(buf + len, PAGE_SIZE - len,
-				"CPUcheck %s\n",
-				(phba->cpucheck_on & LPFC_CHECK_NVME_IO ?
-					"Enabled" : "Disabled"));
-		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-			if (i >= LPFC_CHECK_CPU_CNT)
-				break;
-			len += snprintf(buf + len, PAGE_SIZE - len,
-					"%02d: xmit x%08x cmpl x%08x\n",
-					i, phba->cpucheck_xmt_io[i],
-					phba->cpucheck_cmpl_io[i]);
-			tot_xmt += phba->cpucheck_xmt_io[i];
-			tot_cmpl += phba->cpucheck_cmpl_io[i];
-		}
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"CPUcheck %s ",
+			(phba->cpucheck_on & LPFC_CHECK_NVME_IO ?
+				"Enabled" : "Disabled"));
+	if (phba->nvmet_support) {
 		len += snprintf(buf + len, PAGE_SIZE - len,
-				"tot:xmit x%08x cmpl x%08x\n",
-				tot_xmt, tot_cmpl);
-		return len;
+				"%s\n",
+				(phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ?
+					"Rcv Enabled\n" : "Rcv Disabled\n"));
+	} else {
+		len += snprintf(buf + len, PAGE_SIZE - len, "\n");
 	}
+	max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ;
+
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		qp = &phba->sli4_hba.hdwq[i];
+
+		tot_rcv = 0;
+		tot_xmt = 0;
+		tot_cmpl = 0;
+		for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) {
+			tot_xmt += qp->cpucheck_xmt_io[j];
+			tot_cmpl += qp->cpucheck_cmpl_io[j];
+			if (phba->nvmet_support)
+				tot_rcv += qp->cpucheck_rcv_io[j];
+		}
+
+		/* Only display Hardware Qs with something */
+		if (!tot_xmt && !tot_cmpl && !tot_rcv)
+			continue;
 
-	/* NVME Target */
-	len += snprintf(buf + len, PAGE_SIZE - len,
-			"CPUcheck %s ",
-			(phba->cpucheck_on & LPFC_CHECK_NVMET_IO ?
-				"IO Enabled - " : "IO Disabled - "));
-	len += snprintf(buf + len, PAGE_SIZE - len,
-			"%s\n",
-			(phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ?
-				"Rcv Enabled\n" : "Rcv Disabled\n"));
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-		if (i >= LPFC_CHECK_CPU_CNT)
-			break;
 		len += snprintf(buf + len, PAGE_SIZE - len,
-				"%02d: xmit x%08x ccmpl x%08x "
-				"cmpl x%08x rcv x%08x\n",
-				i, phba->cpucheck_xmt_io[i],
-				phba->cpucheck_ccmpl_io[i],
-				phba->cpucheck_cmpl_io[i],
-				phba->cpucheck_rcv_io[i]);
-		tot_xmt += phba->cpucheck_xmt_io[i];
-		tot_rcv += phba->cpucheck_rcv_io[i];
-		tot_cmpl += phba->cpucheck_cmpl_io[i];
-		tot_ccmpl += phba->cpucheck_ccmpl_io[i];
+				"HDWQ %03d: ", i);
+		for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) {
+			/* Only display non-zero counters */
+			if (!qp->cpucheck_xmt_io[j] &&
+			    !qp->cpucheck_cmpl_io[j] &&
+			    !qp->cpucheck_rcv_io[j])
+				continue;
+			if (phba->nvmet_support) {
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"CPU %03d: %x/%x/%x ", j,
+						qp->cpucheck_rcv_io[j],
+						qp->cpucheck_xmt_io[j],
+						qp->cpucheck_cmpl_io[j]);
+			} else {
+				len += snprintf(buf + len, PAGE_SIZE - len,
+						"CPU %03d: %x/%x ", j,
+						qp->cpucheck_xmt_io[j],
+						qp->cpucheck_cmpl_io[j]);
+			}
+		}
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"Total: %x\n", tot_xmt);
+		if (len >= max_cnt) {
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"Truncated ...\n");
+			return len;
+		}
 	}
-	len += snprintf(buf + len, PAGE_SIZE - len,
-			"tot:xmit x%08x ccmpl x%08x cmpl x%08x rcv x%08x\n",
-			tot_xmt, tot_ccmpl, tot_cmpl, tot_rcv);
 	return len;
 }
 
@@ -1501,7 +1833,7 @@ lpfc_debugfs_disc_trc_open(struct inode *inode, struct file *file)
 	int rc = -ENOMEM;
 
 	if (!lpfc_debugfs_max_disc_trc) {
-		 rc = -ENOSPC;
+		rc = -ENOSPC;
 		goto out;
 	}
 
@@ -1551,7 +1883,7 @@ lpfc_debugfs_slow_ring_trc_open(struct inode *inode, struct file *file)
 	int rc = -ENOMEM;
 
 	if (!lpfc_debugfs_max_slow_ring_trc) {
-		 rc = -ENOSPC;
+		rc = -ENOSPC;
 		goto out;
 	}
 
@@ -1620,6 +1952,135 @@ out:
 }
 
 /**
+ * lpfc_debugfs_multixripools_open - Open the multixripool debugfs buffer
+ * @inode: The inode pointer that contains a hba pointer.
+ * @file: The file pointer to attach the log output.
+ *
+ * Description:
+ * This routine is the entry point for the debugfs open file operation. It gets
+ * the hba from the i_private field in @inode, allocates the necessary buffer
+ * for the log, fills the buffer from the in-memory log for this hba, and then
+ * returns a pointer to that log in the private_data field in @file.
+ *
+ * Returns:
+ * This function returns zero if successful. On error it will return a negative
+ * error value.
+ **/
+static int
+lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_hba *phba = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	/* Round to page boundary */
+	debug->buffer = kzalloc(LPFC_DUMP_MULTIXRIPOOL_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_multixripools_data(
+		phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+/**
+ * lpfc_debugfs_lockstat_open - Open the lockstat debugfs buffer
+ * @inode: The inode pointer that contains a vport pointer.
+ * @file: The file pointer to attach the log output.
+ *
+ * Description:
+ * This routine is the entry point for the debugfs open file operation. It gets
+ * the vport from the i_private field in @inode, allocates the necessary buffer
+ * for the log, fills the buffer from the in-memory log for this vport, and then
+ * returns a pointer to that log in the private_data field in @file.
+ *
+ * Returns:
+ * This function returns zero if successful. On error it will return a negative
+ * error value.
+ **/
+static int
+lpfc_debugfs_lockstat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_hba *phba = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	/* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_HDWQINFO_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_lockstat_data(phba, debug->buffer,
+		LPFC_HBQINFO_SIZE);
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	struct lpfc_sli4_hdw_queue *qp;
+	char mybuf[64];
+	char *pbuf;
+	int i;
+
+	/* Protect copy from user */
+	if (!access_ok(buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+	    (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			qp = &phba->sli4_hba.hdwq[i];
+			qp->lock_conflict.alloc_xri_get = 0;
+			qp->lock_conflict.alloc_xri_put = 0;
+			qp->lock_conflict.free_xri = 0;
+			qp->lock_conflict.wq_access = 0;
+			qp->lock_conflict.alloc_pvt_pool = 0;
+			qp->lock_conflict.mv_from_pvt_pool = 0;
+			qp->lock_conflict.mv_to_pub_pool = 0;
+			qp->lock_conflict.mv_to_pvt_pool = 0;
+			qp->lock_conflict.free_pvt_pool = 0;
+			qp->lock_conflict.free_pub_pool = 0;
+			qp->lock_conflict.wq_access = 0;
+		}
+	}
+	return nbytes;
+}
+#endif
+
+/**
  * lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer
  * @inode: The inode pointer that contains a vport pointer.
  * @file: The file pointer to attach the log output.
@@ -2008,6 +2469,75 @@ lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/**
+ * lpfc_debugfs_multixripools_write - Clear multi-XRI pools statistics
+ * @file: The file pointer to read from.
+ * @buf: The buffer to copy the user data from.
+ * @nbytes: The number of bytes to get.
+ * @ppos: The position in the file to start reading from.
+ *
+ * Description:
+ * This routine clears multi-XRI pools statistics when buf contains "clear".
+ *
+ * Return Value:
+ * It returns the @nbytges passing in from debugfs user space when successful.
+ * In case of error conditions, it returns proper error code back to the user
+ * space.
+ **/
+static ssize_t
+lpfc_debugfs_multixripools_write(struct file *file, const char __user *buf,
+				 size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	char mybuf[64];
+	char *pbuf;
+	u32 i;
+	u32 hwq_count;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "clear", strlen("clear"))) == 0) {
+		hwq_count = phba->cfg_hdw_queue;
+		for (i = 0; i < hwq_count; i++) {
+			qp = &phba->sli4_hba.hdwq[i];
+			multixri_pool = qp->p_multixri_pool;
+			if (!multixri_pool)
+				continue;
+
+			qp->empty_io_bufs = 0;
+			multixri_pool->pbl_empty_count = 0;
+#ifdef LPFC_MXP_STAT
+			multixri_pool->above_limit_count = 0;
+			multixri_pool->below_limit_count = 0;
+			multixri_pool->stat_max_hwm = 0;
+			multixri_pool->local_pbl_hit_count = 0;
+			multixri_pool->other_pbl_hit_count = 0;
+
+			multixri_pool->stat_pbl_count = 0;
+			multixri_pool->stat_pvt_count = 0;
+			multixri_pool->stat_busy_count = 0;
+			multixri_pool->stat_snapshot_taken = 0;
+#endif
+		}
+		return strlen(pbuf);
+	}
+
+	return -EINVAL;
+}
 
 static int
 lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
@@ -2098,6 +2628,64 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 }
 
 static int
+lpfc_debugfs_scsistat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kzalloc(LPFC_SCSISTAT_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_scsistat_data(vport, debug->buffer,
+		LPFC_SCSISTAT_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_scsistat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba *phba = vport->phba;
+	char mybuf[6] = {0};
+	int i;
+
+	/* Protect copy from user */
+	if (!access_ok(buf, nbytes))
+		return -EFAULT;
+
+	if (copy_from_user(mybuf, buf, (nbytes >= sizeof(mybuf)) ?
+				       (sizeof(mybuf) - 1) : nbytes))
+		return -EFAULT;
+
+	if ((strncmp(&mybuf[0], "reset", strlen("reset")) == 0) ||
+	    (strncmp(&mybuf[0], "zero", strlen("zero")) == 0)) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			memset(&phba->sli4_hba.hdwq[i].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[i].scsi_cstat));
+		}
+	}
+
+	return nbytes;
+}
+
+static int
 lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
 {
 	struct lpfc_vport *vport = inode->i_private;
@@ -2348,7 +2936,7 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
 	}
 
 	debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
-		LPFC_NVMEKTIME_SIZE);
+		LPFC_CPUCHECK_SIZE);
 
 	debug->i_private = inode->i_private;
 	file->private_data = debug;
@@ -2365,9 +2953,10 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 	struct lpfc_debug *debug = file->private_data;
 	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
 	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_sli4_hdw_queue *qp;
 	char mybuf[64];
 	char *pbuf;
-	int i;
+	int i, j;
 
 	if (nbytes > 64)
 		nbytes = 64;
@@ -2382,8 +2971,18 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 		if (phba->nvmet_support)
 			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
 		else
+			phba->cpucheck_on |= (LPFC_CHECK_NVME_IO |
+				LPFC_CHECK_SCSI_IO);
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) {
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+		else
 			phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
 		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) {
+		phba->cpucheck_on |= LPFC_CHECK_SCSI_IO;
+		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
 		if (phba->nvmet_support)
@@ -2397,13 +2996,14 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "zero",
 		   sizeof("zero") - 1) == 0)) {
-		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-			if (i >= LPFC_CHECK_CPU_CNT)
-				break;
-			phba->cpucheck_rcv_io[i] = 0;
-			phba->cpucheck_xmt_io[i] = 0;
-			phba->cpucheck_cmpl_io[i] = 0;
-			phba->cpucheck_ccmpl_io[i] = 0;
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			qp = &phba->sli4_hba.hdwq[i];
+
+			for (j = 0; j < LPFC_CHECK_CPU_CNT; j++) {
+				qp->cpucheck_rcv_io[j] = 0;
+				qp->cpucheck_xmt_io[j] = 0;
+				qp->cpucheck_cmpl_io[j] = 0;
+			}
 		}
 		return strlen(pbuf);
 	}
@@ -3166,10 +3766,10 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index, qp->entry_repost);
+			qp->hba_index, qp->notify_interval);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3182,21 +3782,23 @@ lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
 	struct lpfc_queue *qp;
 	int qidx;
 
-	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
-		qp = phba->sli4_hba.fcp_wq[qidx];
+	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+		qp = phba->sli4_hba.hdwq[qidx].fcp_wq;
 		if (qp->assoc_qid != cq_id)
 			continue;
 		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
 		if (*len >= max_cnt)
 			return 1;
 	}
-	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
-		qp = phba->sli4_hba.nvme_wq[qidx];
-		if (qp->assoc_qid != cq_id)
-			continue;
-		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
-		if (*len >= max_cnt)
-			return 1;
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			qp = phba->sli4_hba.hdwq[qidx].nvme_wq;
+			if (qp->assoc_qid != cq_id)
+				continue;
+			*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
+			if (*len >= max_cnt)
+				return 1;
+		}
 	}
 	return 0;
 }
@@ -3217,10 +3819,10 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], NTFI[%03d], PLMT[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index, qp->entry_repost);
+			qp->notify_interval, qp->max_proc_limit);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3243,15 +3845,15 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index, qp->entry_repost);
+			qp->host_index, qp->hba_index, qp->notify_interval);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index, datqp->entry_repost);
+			datqp->hba_index, datqp->notify_interval);
 	return len;
 }
 
@@ -3260,31 +3862,25 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 		int *len, int max_cnt, int eqidx, int eq_id)
 {
 	struct lpfc_queue *qp;
-	int qidx, rc;
+	int rc;
 
-	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
-		qp = phba->sli4_hba.fcp_cq[qidx];
-		if (qp->assoc_qid != eq_id)
-			continue;
+	qp = phba->sli4_hba.hdwq[eqidx].fcp_cq;
 
-		*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+	*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
 
-		/* Reset max counter */
-		qp->CQ_max_cqe = 0;
+	/* Reset max counter */
+	qp->CQ_max_cqe = 0;
 
-		if (*len >= max_cnt)
-			return 1;
+	if (*len >= max_cnt)
+		return 1;
 
-		rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
-				max_cnt, qp->queue_id);
-		if (rc)
-			return 1;
-	}
+	rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+				   max_cnt, qp->queue_id);
+	if (rc)
+		return 1;
 
-	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
-		qp = phba->sli4_hba.nvme_cq[qidx];
-		if (qp->assoc_qid != eq_id)
-			continue;
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		qp = phba->sli4_hba.hdwq[eqidx].nvme_cq;
 
 		*len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
 
@@ -3295,7 +3891,7 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 
 		rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
-				max_cnt, qp->queue_id);
+					   max_cnt, qp->queue_id);
 		if (rc)
 			return 1;
 	}
@@ -3338,9 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			(unsigned long long)qp->q_cnt_4, qp->q_mode);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], NTFI[%03d], PLMT[%03d], AFFIN[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index, qp->entry_repost);
+			qp->host_index, qp->notify_interval,
+			qp->max_proc_limit, qp->chann);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
@@ -3387,24 +3984,19 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 	spin_lock_irq(&phba->hbalock);
 
 	/* Fast-path event queue */
-	if (phba->sli4_hba.hba_eq && phba->io_channel_irqs) {
+	if (phba->sli4_hba.hdwq && phba->cfg_hdw_queue) {
 
 		x = phba->lpfc_idiag_last_eq;
-		if (phba->cfg_fof && (x >= phba->io_channel_irqs)) {
-			phba->lpfc_idiag_last_eq = 0;
-			goto fof;
-		}
 		phba->lpfc_idiag_last_eq++;
-		if (phba->lpfc_idiag_last_eq >= phba->io_channel_irqs)
-			if (phba->cfg_fof == 0)
-				phba->lpfc_idiag_last_eq = 0;
+		if (phba->lpfc_idiag_last_eq >= phba->cfg_hdw_queue)
+			phba->lpfc_idiag_last_eq = 0;
 
 		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-					"EQ %d out of %d HBA EQs\n",
-					x, phba->io_channel_irqs);
+					"HDWQ %d out of %d HBA HDWQs\n",
+					x, phba->cfg_hdw_queue);
 
 		/* Fast-path EQ */
-		qp = phba->sli4_hba.hba_eq[x];
+		qp = phba->sli4_hba.hdwq[x].hba_eq;
 		if (!qp)
 			goto out;
 
@@ -3479,35 +4071,6 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 		goto out;
 	}
 
-fof:
-	if (phba->cfg_fof) {
-		/* FOF EQ */
-		qp = phba->sli4_hba.fof_eq;
-		len = __lpfc_idiag_print_eq(qp, "FOF", pbuffer, len);
-
-		/* Reset max counter */
-		if (qp)
-			qp->EQ_max_eqe = 0;
-
-		if (len >= max_cnt)
-			goto too_big;
-
-		/* OAS CQ */
-		qp = phba->sli4_hba.oas_cq;
-		len = __lpfc_idiag_print_cq(qp, "OAS", pbuffer, len);
-		/* Reset max counter */
-		if (qp)
-			qp->CQ_max_cqe = 0;
-		if (len >= max_cnt)
-			goto too_big;
-
-		/* OAS WQ */
-		qp = phba->sli4_hba.oas_wq;
-		len = __lpfc_idiag_print_wq(qp, "OAS", pbuffer, len);
-		if (len >= max_cnt)
-			goto too_big;
-	}
-
 	spin_unlock_irq(&phba->hbalock);
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 
@@ -3725,9 +4288,9 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 	switch (quetp) {
 	case LPFC_IDIAG_EQ:
 		/* HBA event queue */
-		if (phba->sli4_hba.hba_eq) {
-			for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
-				qp = phba->sli4_hba.hba_eq[qidx];
+		if (phba->sli4_hba.hdwq) {
+			for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+				qp = phba->sli4_hba.hdwq[qidx].hba_eq;
 				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(qp,
@@ -3776,10 +4339,10 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			goto pass_check;
 		}
 		/* FCP complete queue */
-		if (phba->sli4_hba.fcp_cq) {
-			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
+		if (phba->sli4_hba.hdwq) {
+			for (qidx = 0; qidx < phba->cfg_hdw_queue;
 								qidx++) {
-				qp = phba->sli4_hba.fcp_cq[qidx];
+				qp = phba->sli4_hba.hdwq[qidx].fcp_cq;
 				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
@@ -3792,23 +4355,20 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			}
 		}
 		/* NVME complete queue */
-		if (phba->sli4_hba.nvme_cq) {
+		if (phba->sli4_hba.hdwq) {
 			qidx = 0;
 			do {
-				if (phba->sli4_hba.nvme_cq[qidx] &&
-				    phba->sli4_hba.nvme_cq[qidx]->queue_id ==
-				    queid) {
+				qp = phba->sli4_hba.hdwq[qidx].nvme_cq;
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.nvme_cq[qidx],
-						index, count);
+						qp, index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.nvme_cq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
-			} while (++qidx < phba->cfg_nvme_io_channel);
+			} while (++qidx < phba->cfg_hdw_queue);
 		}
 		goto error_out;
 		break;
@@ -3849,11 +4409,11 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
 			goto pass_check;
 		}
-		/* FCP work queue */
-		if (phba->sli4_hba.fcp_wq) {
-			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-								qidx++) {
-				qp = phba->sli4_hba.fcp_wq[qidx];
+
+		if (phba->sli4_hba.hdwq) {
+			/* FCP/SCSI work queue */
+			for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+				qp = phba->sli4_hba.hdwq[qidx].fcp_wq;
 				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
@@ -3864,12 +4424,9 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 					goto pass_check;
 				}
 			}
-		}
-		/* NVME work queue */
-		if (phba->sli4_hba.nvme_wq) {
-			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
-								qidx++) {
-				qp = phba->sli4_hba.nvme_wq[qidx];
+			/* NVME work queue */
+			for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+				qp = phba->sli4_hba.hdwq[qidx].nvme_wq;
 				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
@@ -3882,26 +4439,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			}
 		}
 
-		/* NVME work queues */
-		if (phba->sli4_hba.nvme_wq) {
-			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
-				qidx++) {
-				if (!phba->sli4_hba.nvme_wq[qidx])
-					continue;
-				if (phba->sli4_hba.nvme_wq[qidx]->queue_id ==
-				    queid) {
-					/* Sanity check */
-					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.nvme_wq[qidx],
-						index, count);
-					if (rc)
-						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.nvme_wq[qidx];
-					goto pass_check;
-				}
-			}
-		}
 		goto error_out;
 		break;
 	case LPFC_IDIAG_RQ:
@@ -4866,6 +5403,16 @@ static const struct file_operations lpfc_debugfs_op_nodelist = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_multixripools
+static const struct file_operations lpfc_debugfs_op_multixripools = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_multixripools_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_multixripools_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_hbqinfo
 static const struct file_operations lpfc_debugfs_op_hbqinfo = {
 	.owner =        THIS_MODULE,
@@ -4875,6 +5422,18 @@ static const struct file_operations lpfc_debugfs_op_hbqinfo = {
 	.release =      lpfc_debugfs_release,
 };
 
+#ifdef LPFC_HDWQ_LOCK_STAT
+#undef lpfc_debugfs_op_lockstat
+static const struct file_operations lpfc_debugfs_op_lockstat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_lockstat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =        lpfc_debugfs_lockstat_write,
+	.release =      lpfc_debugfs_release,
+};
+#endif
+
 #undef lpfc_debugfs_op_dumpHBASlim
 static const struct file_operations lpfc_debugfs_op_dumpHBASlim = {
 	.owner =        THIS_MODULE,
@@ -4903,6 +5462,16 @@ static const struct file_operations lpfc_debugfs_op_nvmestat = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_scsistat
+static const struct file_operations lpfc_debugfs_op_scsistat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_scsistat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_scsistat_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_nvmektime
 static const struct file_operations lpfc_debugfs_op_nvmektime = {
 	.owner =        THIS_MODULE,
@@ -5280,11 +5849,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 	if (!lpfc_debugfs_root) {
 		lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL);
 		atomic_set(&lpfc_debugfs_hba_count, 0);
-		if (!lpfc_debugfs_root) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0408 Cannot create debugfs root\n");
-			goto debug_failed;
-		}
 	}
 	if (!lpfc_debugfs_start_time)
 		lpfc_debugfs_start_time = jiffies;
@@ -5295,25 +5859,42 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 		pport_setup = true;
 		phba->hba_debugfs_root =
 			debugfs_create_dir(name, lpfc_debugfs_root);
-		if (!phba->hba_debugfs_root) {
+		atomic_inc(&lpfc_debugfs_hba_count);
+		atomic_set(&phba->debugfs_vport_count, 0);
+
+		/* Multi-XRI pools */
+		snprintf(name, sizeof(name), "multixripools");
+		phba->debug_multixri_pools =
+			debugfs_create_file(name, S_IFREG | 0644,
+					    phba->hba_debugfs_root,
+					    phba,
+					    &lpfc_debugfs_op_multixripools);
+		if (!phba->debug_multixri_pools) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0412 Cannot create debugfs hba\n");
+					 "0527 Cannot create debugfs multixripools\n");
 			goto debug_failed;
 		}
-		atomic_inc(&lpfc_debugfs_hba_count);
-		atomic_set(&phba->debugfs_vport_count, 0);
 
 		/* Setup hbqinfo */
 		snprintf(name, sizeof(name), "hbqinfo");
 		phba->debug_hbqinfo =
-			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
-				 phba->hba_debugfs_root,
-				 phba, &lpfc_debugfs_op_hbqinfo);
-		if (!phba->debug_hbqinfo) {
+			debugfs_create_file(name, S_IFREG | 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_debugfs_op_hbqinfo);
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+		/* Setup lockstat */
+		snprintf(name, sizeof(name), "lockstat");
+		phba->debug_lockstat =
+			debugfs_create_file(name, S_IFREG | 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_debugfs_op_lockstat);
+		if (!phba->debug_lockstat) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0411 Cannot create debugfs hbqinfo\n");
+					 "0913 Cant create debugfs lockstat\n");
 			goto debug_failed;
 		}
+#endif
 
 		/* Setup dumpHBASlim */
 		if (phba->sli_rev < LPFC_SLI_REV4) {
@@ -5323,12 +5904,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 					S_IFREG|S_IRUGO|S_IWUSR,
 					phba->hba_debugfs_root,
 					phba, &lpfc_debugfs_op_dumpHBASlim);
-			if (!phba->debug_dumpHBASlim) {
-				lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-						 "0413 Cannot create debugfs "
-						"dumpHBASlim\n");
-				goto debug_failed;
-			}
 		} else
 			phba->debug_dumpHBASlim = NULL;
 
@@ -5340,12 +5915,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 					S_IFREG|S_IRUGO|S_IWUSR,
 					phba->hba_debugfs_root,
 					phba, &lpfc_debugfs_op_dumpHostSlim);
-			if (!phba->debug_dumpHostSlim) {
-				lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-						 "0414 Cannot create debugfs "
-						 "dumpHostSlim\n");
-				goto debug_failed;
-			}
 		} else
 			phba->debug_dumpHostSlim = NULL;
 
@@ -5355,11 +5924,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				 phba->hba_debugfs_root,
 				 phba, &lpfc_debugfs_op_dumpData);
-		if (!phba->debug_dumpData) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0800 Cannot create debugfs dumpData\n");
-			goto debug_failed;
-		}
 
 		/* Setup dumpDif */
 		snprintf(name, sizeof(name), "dumpDif");
@@ -5367,11 +5931,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				 phba->hba_debugfs_root,
 				 phba, &lpfc_debugfs_op_dumpDif);
-		if (!phba->debug_dumpDif) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0801 Cannot create debugfs dumpDif\n");
-			goto debug_failed;
-		}
 
 		/* Setup DIF Error Injections */
 		snprintf(name, sizeof(name), "InjErrLBA");
@@ -5379,11 +5938,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_InjErrLBA) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0807 Cannot create debugfs InjErrLBA\n");
-			goto debug_failed;
-		}
 		phba->lpfc_injerr_lba = LPFC_INJERR_LBA_OFF;
 
 		snprintf(name, sizeof(name), "InjErrNPortID");
@@ -5391,88 +5945,48 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_InjErrNPortID) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0809 Cannot create debugfs InjErrNPortID\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "InjErrWWPN");
 		phba->debug_InjErrWWPN =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_InjErrWWPN) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0810 Cannot create debugfs InjErrWWPN\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "writeGuardInjErr");
 		phba->debug_writeGuard =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_writeGuard) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0802 Cannot create debugfs writeGuard\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "writeAppInjErr");
 		phba->debug_writeApp =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_writeApp) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0803 Cannot create debugfs writeApp\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "writeRefInjErr");
 		phba->debug_writeRef =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_writeRef) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0804 Cannot create debugfs writeRef\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "readGuardInjErr");
 		phba->debug_readGuard =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_readGuard) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0808 Cannot create debugfs readGuard\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "readAppInjErr");
 		phba->debug_readApp =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_readApp) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0805 Cannot create debugfs readApp\n");
-			goto debug_failed;
-		}
 
 		snprintf(name, sizeof(name), "readRefInjErr");
 		phba->debug_readRef =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 			phba->hba_debugfs_root,
 			phba, &lpfc_debugfs_op_dif_err);
-		if (!phba->debug_readRef) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"0806 Cannot create debugfs readApp\n");
-			goto debug_failed;
-		}
 
 		/* Setup slow ring trace */
 		if (lpfc_debugfs_max_slow_ring_trc) {
@@ -5496,12 +6010,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				 phba->hba_debugfs_root,
 				 phba, &lpfc_debugfs_op_slow_ring_trc);
-		if (!phba->debug_slow_ring_trc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0415 Cannot create debugfs "
-					 "slow_ring_trace\n");
-			goto debug_failed;
-		}
 		if (!phba->slow_ring_trc) {
 			phba->slow_ring_trc = kmalloc(
 				(sizeof(struct lpfc_debugfs_trc) *
@@ -5524,11 +6032,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			debugfs_create_file(name, 0644,
 					    phba->hba_debugfs_root,
 					    phba, &lpfc_debugfs_op_nvmeio_trc);
-		if (!phba->debug_nvmeio_trc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0574 No create debugfs nvmeio_trc\n");
-			goto debug_failed;
-		}
 
 		atomic_set(&phba->nvmeio_trc_cnt, 0);
 		if (lpfc_debugfs_max_nvmeio_trc) {
@@ -5576,11 +6079,6 @@ nvmeio_off:
 	if (!vport->vport_debugfs_root) {
 		vport->vport_debugfs_root =
 			debugfs_create_dir(name, phba->hba_debugfs_root);
-		if (!vport->vport_debugfs_root) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0417 Can't create debugfs\n");
-			goto debug_failed;
-		}
 		atomic_inc(&phba->debugfs_vport_count);
 	}
 
@@ -5617,31 +6115,26 @@ nvmeio_off:
 		debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				 vport->vport_debugfs_root,
 				 vport, &lpfc_debugfs_op_disc_trc);
-	if (!vport->debug_disc_trc) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "0419 Cannot create debugfs "
-				 "discovery_trace\n");
-		goto debug_failed;
-	}
 	snprintf(name, sizeof(name), "nodelist");
 	vport->debug_nodelist =
 		debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				 vport->vport_debugfs_root,
 				 vport, &lpfc_debugfs_op_nodelist);
-	if (!vport->debug_nodelist) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "2985 Can't create debugfs nodelist\n");
-		goto debug_failed;
-	}
 
 	snprintf(name, sizeof(name), "nvmestat");
 	vport->debug_nvmestat =
 		debugfs_create_file(name, 0644,
 				    vport->vport_debugfs_root,
 				    vport, &lpfc_debugfs_op_nvmestat);
-	if (!vport->debug_nvmestat) {
+
+	snprintf(name, sizeof(name), "scsistat");
+	vport->debug_scsistat =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_scsistat);
+	if (!vport->debug_scsistat) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "0811 Cannot create debugfs nvmestat\n");
+				 "0914 Cannot create debugfs scsistat\n");
 		goto debug_failed;
 	}
 
@@ -5650,22 +6143,12 @@ nvmeio_off:
 		debugfs_create_file(name, 0644,
 				    vport->vport_debugfs_root,
 				    vport, &lpfc_debugfs_op_nvmektime);
-	if (!vport->debug_nvmektime) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "0815 Cannot create debugfs nvmektime\n");
-		goto debug_failed;
-	}
 
 	snprintf(name, sizeof(name), "cpucheck");
 	vport->debug_cpucheck =
 		debugfs_create_file(name, 0644,
 				    vport->vport_debugfs_root,
 				    vport, &lpfc_debugfs_op_cpucheck);
-	if (!vport->debug_cpucheck) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "0819 Cannot create debugfs cpucheck\n");
-		goto debug_failed;
-	}
 
 	/*
 	 * The following section is for additional directories/files for the
@@ -5685,11 +6168,6 @@ nvmeio_off:
 	if (!phba->idiag_root) {
 		phba->idiag_root =
 			debugfs_create_dir(name, phba->hba_debugfs_root);
-		if (!phba->idiag_root) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2922 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 		/* Initialize iDiag data structure */
 		memset(&idiag, 0, sizeof(idiag));
 	}
@@ -5700,11 +6178,6 @@ nvmeio_off:
 		phba->idiag_pci_cfg =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_pciCfg);
-		if (!phba->idiag_pci_cfg) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2923 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 		idiag.offset.last_rd = 0;
 	}
 
@@ -5714,11 +6187,6 @@ nvmeio_off:
 		phba->idiag_bar_acc =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_barAcc);
-		if (!phba->idiag_bar_acc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					"3056 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 		idiag.offset.last_rd = 0;
 	}
 
@@ -5728,11 +6196,6 @@ nvmeio_off:
 		phba->idiag_que_info =
 			debugfs_create_file(name, S_IFREG|S_IRUGO,
 			phba->idiag_root, phba, &lpfc_idiag_op_queInfo);
-		if (!phba->idiag_que_info) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2924 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 	}
 
 	/* iDiag access PCI function queue */
@@ -5741,11 +6204,6 @@ nvmeio_off:
 		phba->idiag_que_acc =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_queAcc);
-		if (!phba->idiag_que_acc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2926 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 	}
 
 	/* iDiag access PCI function doorbell registers */
@@ -5754,11 +6212,6 @@ nvmeio_off:
 		phba->idiag_drb_acc =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_drbAcc);
-		if (!phba->idiag_drb_acc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2927 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 	}
 
 	/* iDiag access PCI function control registers */
@@ -5767,11 +6220,6 @@ nvmeio_off:
 		phba->idiag_ctl_acc =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_ctlAcc);
-		if (!phba->idiag_ctl_acc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "2981 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 	}
 
 	/* iDiag access mbox commands */
@@ -5780,11 +6228,6 @@ nvmeio_off:
 		phba->idiag_mbx_acc =
 			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
 				phba->idiag_root, phba, &lpfc_idiag_op_mbxAcc);
-		if (!phba->idiag_mbx_acc) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					"2980 Can't create idiag debugfs\n");
-			goto debug_failed;
-		}
 	}
 
 	/* iDiag extents access commands */
@@ -5796,12 +6239,6 @@ nvmeio_off:
 						    S_IFREG|S_IRUGO|S_IWUSR,
 						    phba->idiag_root, phba,
 						    &lpfc_idiag_op_extAcc);
-			if (!phba->idiag_ext_acc) {
-				lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-						"2986 Cant create "
-						"idiag debugfs\n");
-				goto debug_failed;
-			}
 		}
 	}
 
@@ -5839,6 +6276,9 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 	debugfs_remove(vport->debug_nvmestat); /* nvmestat */
 	vport->debug_nvmestat = NULL;
 
+	debugfs_remove(vport->debug_scsistat); /* scsistat */
+	vport->debug_scsistat = NULL;
+
 	debugfs_remove(vport->debug_nvmektime); /* nvmektime */
 	vport->debug_nvmektime = NULL;
 
@@ -5853,9 +6293,16 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 
 	if (atomic_read(&phba->debugfs_vport_count) == 0) {
 
+		debugfs_remove(phba->debug_multixri_pools); /* multixripools*/
+		phba->debug_multixri_pools = NULL;
+
 		debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
 		phba->debug_hbqinfo = NULL;
 
+#ifdef LPFC_HDWQ_LOCK_STAT
+		debugfs_remove(phba->debug_lockstat); /* lockstat */
+		phba->debug_lockstat = NULL;
+#endif
 		debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
 		phba->debug_dumpHBASlim = NULL;
 
@@ -5988,11 +6435,13 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	lpfc_debug_dump_wq(phba, DUMP_ELS, 0);
 	lpfc_debug_dump_wq(phba, DUMP_NVMELS, 0);
 
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
 		lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
 
-	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
-		lpfc_debug_dump_wq(phba, DUMP_NVME, idx);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+			lpfc_debug_dump_wq(phba, DUMP_NVME, idx);
+	}
 
 	lpfc_debug_dump_hdr_rq(phba);
 	lpfc_debug_dump_dat_rq(phba);
@@ -6003,15 +6452,17 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	lpfc_debug_dump_cq(phba, DUMP_ELS, 0);
 	lpfc_debug_dump_cq(phba, DUMP_NVMELS, 0);
 
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
 		lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
 
-	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
-		lpfc_debug_dump_cq(phba, DUMP_NVME, idx);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+			lpfc_debug_dump_cq(phba, DUMP_NVME, idx);
+	}
 
 	/*
 	 * Dump Event Queues (EQs)
 	 */
-	for (idx = 0; idx < phba->io_channel_irqs; idx++)
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
 		lpfc_debug_dump_hba_eq(phba, idx);
 }
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 30efc7bf91bd..93ab7dfb8ee0 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2007-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -50,6 +50,9 @@
 #define LPFC_CPUCHECK_SIZE 8192
 #define LPFC_NVMEIO_TRC_SIZE 8192
 
+/* scsistat output buffer size */
+#define LPFC_SCSISTAT_SIZE 8192
+
 #define LPFC_DEBUG_OUT_LINE_SZ	80
 
 /*
@@ -284,6 +287,9 @@ struct lpfc_idiag {
 
 #endif
 
+/* multixripool output buffer size */
+#define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
+
 enum {
 	DUMP_FCP,
 	DUMP_NVME,
@@ -410,10 +416,10 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 	char *qtypestr;
 
 	if (qtype == DUMP_FCP) {
-		wq = phba->sli4_hba.fcp_wq[wqidx];
+		wq = phba->sli4_hba.hdwq[wqidx].fcp_wq;
 		qtypestr = "FCP";
 	} else if (qtype == DUMP_NVME) {
-		wq = phba->sli4_hba.nvme_wq[wqidx];
+		wq = phba->sli4_hba.hdwq[wqidx].nvme_wq;
 		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
@@ -454,14 +460,15 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 	int eqidx;
 
 	/* fcp/nvme wq and cq are 1:1, thus same indexes */
+	eq = NULL;
 
 	if (qtype == DUMP_FCP) {
-		wq = phba->sli4_hba.fcp_wq[wqidx];
-		cq = phba->sli4_hba.fcp_cq[wqidx];
+		wq = phba->sli4_hba.hdwq[wqidx].fcp_wq;
+		cq = phba->sli4_hba.hdwq[wqidx].fcp_cq;
 		qtypestr = "FCP";
 	} else if (qtype == DUMP_NVME) {
-		wq = phba->sli4_hba.nvme_wq[wqidx];
-		cq = phba->sli4_hba.nvme_cq[wqidx];
+		wq = phba->sli4_hba.hdwq[wqidx].nvme_wq;
+		cq = phba->sli4_hba.hdwq[wqidx].nvme_cq;
 		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
@@ -478,17 +485,17 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 	} else
 		return;
 
-	for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++) {
-		if (cq->assoc_qid == phba->sli4_hba.hba_eq[eqidx]->queue_id)
+	for (eqidx = 0; eqidx < phba->cfg_hdw_queue; eqidx++) {
+		eq = phba->sli4_hba.hdwq[eqidx].hba_eq;
+		if (cq->assoc_qid == eq->queue_id)
 			break;
 	}
-	if (eqidx == phba->io_channel_irqs) {
+	if (eqidx == phba->cfg_hdw_queue) {
 		pr_err("Couldn't find EQ for CQ. Using EQ[0]\n");
 		eqidx = 0;
+		eq = phba->sli4_hba.hdwq[0].hba_eq;
 	}
 
-	eq = phba->sli4_hba.hba_eq[eqidx];
-
 	if (qtype == DUMP_FCP || qtype == DUMP_NVME)
 		pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
 			"->EQ[Idx:%d|Qid:%d]:\n",
@@ -516,7 +523,7 @@ lpfc_debug_dump_hba_eq(struct lpfc_hba *phba, int qidx)
 {
 	struct lpfc_queue *qp;
 
-	qp = phba->sli4_hba.hba_eq[qidx];
+	qp = phba->sli4_hba.hdwq[qidx].hba_eq;
 
 	pr_err("EQ[Idx:%d|Qid:%d]\n", qidx, qp->queue_id);
 
@@ -564,21 +571,21 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
 {
 	int wq_idx;
 
-	for (wq_idx = 0; wq_idx < phba->cfg_fcp_io_channel; wq_idx++)
-		if (phba->sli4_hba.fcp_wq[wq_idx]->queue_id == qid)
+	for (wq_idx = 0; wq_idx < phba->cfg_hdw_queue; wq_idx++)
+		if (phba->sli4_hba.hdwq[wq_idx].fcp_wq->queue_id == qid)
 			break;
-	if (wq_idx < phba->cfg_fcp_io_channel) {
+	if (wq_idx < phba->cfg_hdw_queue) {
 		pr_err("FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
-		lpfc_debug_dump_q(phba->sli4_hba.fcp_wq[wq_idx]);
+		lpfc_debug_dump_q(phba->sli4_hba.hdwq[wq_idx].fcp_wq);
 		return;
 	}
 
-	for (wq_idx = 0; wq_idx < phba->cfg_nvme_io_channel; wq_idx++)
-		if (phba->sli4_hba.nvme_wq[wq_idx]->queue_id == qid)
+	for (wq_idx = 0; wq_idx < phba->cfg_hdw_queue; wq_idx++)
+		if (phba->sli4_hba.hdwq[wq_idx].nvme_wq->queue_id == qid)
 			break;
-	if (wq_idx < phba->cfg_nvme_io_channel) {
+	if (wq_idx < phba->cfg_hdw_queue) {
 		pr_err("NVME WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
-		lpfc_debug_dump_q(phba->sli4_hba.nvme_wq[wq_idx]);
+		lpfc_debug_dump_q(phba->sli4_hba.hdwq[wq_idx].nvme_wq);
 		return;
 	}
 
@@ -646,23 +653,23 @@ lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
 {
 	int cq_idx;
 
-	for (cq_idx = 0; cq_idx < phba->cfg_fcp_io_channel; cq_idx++)
-		if (phba->sli4_hba.fcp_cq[cq_idx]->queue_id == qid)
+	for (cq_idx = 0; cq_idx < phba->cfg_hdw_queue; cq_idx++)
+		if (phba->sli4_hba.hdwq[cq_idx].fcp_cq->queue_id == qid)
 			break;
 
-	if (cq_idx < phba->cfg_fcp_io_channel) {
+	if (cq_idx < phba->cfg_hdw_queue) {
 		pr_err("FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
-		lpfc_debug_dump_q(phba->sli4_hba.fcp_cq[cq_idx]);
+		lpfc_debug_dump_q(phba->sli4_hba.hdwq[cq_idx].fcp_cq);
 		return;
 	}
 
-	for (cq_idx = 0; cq_idx < phba->cfg_nvme_io_channel; cq_idx++)
-		if (phba->sli4_hba.nvme_cq[cq_idx]->queue_id == qid)
+	for (cq_idx = 0; cq_idx < phba->cfg_hdw_queue; cq_idx++)
+		if (phba->sli4_hba.hdwq[cq_idx].nvme_cq->queue_id == qid)
 			break;
 
-	if (cq_idx < phba->cfg_nvme_io_channel) {
+	if (cq_idx < phba->cfg_hdw_queue) {
 		pr_err("NVME CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
-		lpfc_debug_dump_q(phba->sli4_hba.nvme_cq[cq_idx]);
+		lpfc_debug_dump_q(phba->sli4_hba.hdwq[cq_idx].nvme_cq);
 		return;
 	}
 
@@ -697,13 +704,13 @@ lpfc_debug_dump_eq_by_id(struct lpfc_hba *phba, int qid)
 {
 	int eq_idx;
 
-	for (eq_idx = 0; eq_idx < phba->io_channel_irqs; eq_idx++)
-		if (phba->sli4_hba.hba_eq[eq_idx]->queue_id == qid)
+	for (eq_idx = 0; eq_idx < phba->cfg_hdw_queue; eq_idx++)
+		if (phba->sli4_hba.hdwq[eq_idx].hba_eq->queue_id == qid)
 			break;
 
-	if (eq_idx < phba->io_channel_irqs) {
+	if (eq_idx < phba->cfg_hdw_queue) {
 		printk(KERN_ERR "FCP EQ[Idx:%d|Qid:%d]\n", eq_idx, qid);
-		lpfc_debug_dump_q(phba->sli4_hba.hba_eq[eq_idx]);
+		lpfc_debug_dump_q(phba->sli4_hba.hdwq[eq_idx].hba_eq);
 		return;
 	}
 }
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index b3a4789468c3..fc077cb87900 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -2827,8 +2827,8 @@ out:
 		!(vport->fc_flag & FC_PT2PT_PLOGI)) {
 		phba->pport->fc_myDID = 0;
 
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+		if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 			if (phba->nvmet_support)
 				lpfc_nvmet_update_targetport(phba);
 			else
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index b183b882d506..aa4961a2caf8 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -638,8 +638,6 @@ lpfc_work_done(struct lpfc_hba *phba)
 	if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) {
 		if (phba->hba_flag & HBA_RRQ_ACTIVE)
 			lpfc_handle_rrq_active(phba);
-		if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
-			lpfc_sli4_fcp_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
 			lpfc_sli4_els_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ASYNC_EVENT)
@@ -859,10 +857,9 @@ lpfc_port_link_failure(struct lpfc_vport *vport)
 void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
-	struct lpfc_hba  *phba = vport->phba;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 
-	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+	if (vport->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
 		fc_host_post_event(shost, fc_get_event_number(),
 				   FCH_EVT_LINKDOWN, 0);
 
@@ -925,8 +922,8 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 			vports[i]->fc_myDID = 0;
 
-			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 				if (phba->nvmet_support)
 					lpfc_nvmet_update_targetport(phba);
 				else
@@ -1012,7 +1009,7 @@ lpfc_linkup_port(struct lpfc_vport *vport)
 		(vport != phba->pport))
 		return;
 
-	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+	if (vport->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
 		fc_host_post_event(shost, fc_get_event_number(),
 				   FCH_EVT_LINKUP, 0);
 
@@ -3660,8 +3657,8 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		spin_unlock_irq(shost->host_lock);
 		vport->fc_myDID = 0;
 
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+		if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 			if (phba->nvmet_support)
 				lpfc_nvmet_update_targetport(phba);
 			else
@@ -3923,11 +3920,9 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 int
 lpfc_issue_gidft(struct lpfc_vport *vport)
 {
-	struct lpfc_hba *phba = vport->phba;
-
 	/* Good status, issue CT Request to NameServer */
-	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) {
+	if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (vport->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) {
 		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_FCP)) {
 			/* Cannot issue NameServer FCP Query, so finish up
 			 * discovery
@@ -3942,8 +3937,8 @@ lpfc_issue_gidft(struct lpfc_vport *vport)
 		vport->gidft_inp++;
 	}
 
-	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+	if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_NVME)) {
 			/* Cannot issue NameServer NVME Query, so finish up
 			 * discovery
@@ -4059,12 +4054,12 @@ out:
 		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
 		lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0);
 
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+		if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (vport->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
 			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_FCP);
 
-		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+		if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
 			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0,
 				    FC_TYPE_NVME);
 
@@ -4100,7 +4095,7 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct fc_rport_identifiers rport_ids;
 	struct lpfc_hba  *phba = vport->phba;
 
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+	if (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
 		return;
 
 	/* Remote port has reappeared. Re-register w/ FC transport */
@@ -4175,9 +4170,8 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 {
 	struct fc_rport *rport = ndlp->rport;
 	struct lpfc_vport *vport = ndlp->vport;
-	struct lpfc_hba  *phba = vport->phba;
 
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+	if (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
 		return;
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index c15b9b6fb840..ff875b833192 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -194,7 +194,7 @@ struct lpfc_sli_intf {
 #define LPFC_ACT_INTR_CNT	4
 
 /* Algrithmns for scheduling FCP commands to WQs */
-#define	LPFC_FCP_SCHED_ROUND_ROBIN	0
+#define	LPFC_FCP_SCHED_BY_HDWQ		0
 #define	LPFC_FCP_SCHED_BY_CPU		1
 
 /* Algrithmns for NameServer Query after RSCN */
@@ -208,12 +208,18 @@ struct lpfc_sli_intf {
 /* Configuration of Interrupts / sec for entire HBA port */
 #define LPFC_MIN_IMAX          5000
 #define LPFC_MAX_IMAX          5000000
-#define LPFC_DEF_IMAX          150000
+#define LPFC_DEF_IMAX          0
+
+#define LPFC_IMAX_THRESHOLD    1000
+#define LPFC_MAX_AUTO_EQ_DELAY 120
+#define LPFC_EQ_DELAY_STEP     15
+#define LPFC_EQD_ISR_TRIGGER   20000
+/* 1s intervals */
+#define LPFC_EQ_DELAY_MSECS    1000
 
 #define LPFC_MIN_CPU_MAP       0
-#define LPFC_MAX_CPU_MAP       2
+#define LPFC_MAX_CPU_MAP       1
 #define LPFC_HBA_CPU_MAP       1
-#define LPFC_DRIVER_CPU_MAP    2  /* Default */
 
 /* PORT_CAPABILITIES constants. */
 #define LPFC_MAX_SUPPORTED_PAGES	8
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index bede11e16349..3b5873f6751e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -37,6 +37,7 @@
 #include <linux/miscdevice.h>
 #include <linux/percpu.h>
 #include <linux/msi.h>
+#include <linux/irq.h>
 #include <linux/bitops.h>
 
 #include <scsi/scsi.h>
@@ -71,7 +72,6 @@ unsigned long _dump_buf_dif_order;
 spinlock_t _dump_buf_lock;
 
 /* Used when mapping IRQ vectors in a driver centric manner */
-uint16_t *lpfc_used_cpu;
 uint32_t lpfc_present_cpu;
 
 static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
@@ -93,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 static void lpfc_sli4_disable_intr(struct lpfc_hba *);
 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
+static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
+static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1037,14 +1039,14 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 static int
 lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 {
-	struct lpfc_scsi_buf *psb, *psb_next;
+	struct lpfc_io_buf *psb, *psb_next;
 	struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
+	struct lpfc_sli4_hdw_queue *qp;
 	LIST_HEAD(aborts);
 	LIST_HEAD(nvme_aborts);
 	LIST_HEAD(nvmet_aborts);
-	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
-	int cnt;
+	int cnt, idx;
 
 
 	lpfc_sli_hbqbuf_free_all(phba);
@@ -1071,55 +1073,65 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 
 
 	spin_unlock(&phba->sli4_hba.sgl_list_lock);
-	/* abts_scsi_buf_list_lock required because worker thread uses this
+
+	/* abts_xxxx_buf_list_lock required because worker thread uses this
 	 * list.
 	 */
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
-		spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
-		list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
-				 &aborts);
-		spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	}
-
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
-		list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
-				 &nvme_aborts);
-		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
-				 &nvmet_aborts);
-		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
-	}
+	cnt = 0;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		qp = &phba->sli4_hba.hdwq[idx];
 
-	spin_unlock_irq(&phba->hbalock);
-
-	list_for_each_entry_safe(psb, psb_next, &aborts, list) {
-		psb->pCmd = NULL;
-		psb->status = IOSTAT_SUCCESS;
-	}
-	spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
-	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
-	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+		spin_lock(&qp->abts_scsi_buf_list_lock);
+		list_splice_init(&qp->lpfc_abts_scsi_buf_list,
+				 &aborts);
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-		cnt = 0;
-		list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+		list_for_each_entry_safe(psb, psb_next, &aborts, list) {
 			psb->pCmd = NULL;
 			psb->status = IOSTAT_SUCCESS;
 			cnt++;
 		}
-		spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
-		phba->put_nvme_bufs += cnt;
-		list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
-		spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+		spin_lock(&qp->io_buf_list_put_lock);
+		list_splice_init(&aborts, &qp->lpfc_io_buf_list_put);
+		qp->put_io_bufs += qp->abts_scsi_io_bufs;
+		qp->abts_scsi_io_bufs = 0;
+		spin_unlock(&qp->io_buf_list_put_lock);
+		spin_unlock(&qp->abts_scsi_buf_list_lock);
+
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+			spin_lock(&qp->abts_nvme_buf_list_lock);
+			list_splice_init(&qp->lpfc_abts_nvme_buf_list,
+					 &nvme_aborts);
+			list_for_each_entry_safe(psb, psb_next, &nvme_aborts,
+						 list) {
+				psb->pCmd = NULL;
+				psb->status = IOSTAT_SUCCESS;
+				cnt++;
+			}
+			spin_lock(&qp->io_buf_list_put_lock);
+			qp->put_io_bufs += qp->abts_nvme_io_bufs;
+			qp->abts_nvme_io_bufs = 0;
+			list_splice_init(&nvme_aborts,
+					 &qp->lpfc_io_buf_list_put);
+			spin_unlock(&qp->io_buf_list_put_lock);
+			spin_unlock(&qp->abts_nvme_buf_list_lock);
+
+		}
+	}
 
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				 &nvmet_aborts);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
 			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
 			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
 
+	spin_unlock_irq(&phba->hbalock);
 	lpfc_sli4_free_sp_events(phba);
-	return 0;
+	return cnt;
 }
 
 /**
@@ -1239,6 +1251,96 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
 	return;
 }
 
+static void
+lpfc_hb_eq_delay_work(struct work_struct *work)
+{
+	struct lpfc_hba *phba = container_of(to_delayed_work(work),
+					     struct lpfc_hba, eq_delay_work);
+	struct lpfc_eq_intr_info *eqi, *eqi_new;
+	struct lpfc_queue *eq, *eq_next;
+	unsigned char *eqcnt = NULL;
+	uint32_t usdelay;
+	int i;
+
+	if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
+		return;
+
+	if (phba->link_state == LPFC_HBA_ERROR ||
+	    phba->pport->fc_flag & FC_OFFLINE_MODE)
+		goto requeue;
+
+	eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char),
+			GFP_KERNEL);
+	if (!eqcnt)
+		goto requeue;
+
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		if (eq && eqcnt[eq->last_cpu] < 2)
+			eqcnt[eq->last_cpu]++;
+		continue;
+	}
+
+	for_each_present_cpu(i) {
+		if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
+			continue;
+
+		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+
+		usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
+			   LPFC_EQ_DELAY_STEP;
+		if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
+			usdelay = LPFC_MAX_AUTO_EQ_DELAY;
+
+		eqi->icnt = 0;
+
+		list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) {
+			if (eq->last_cpu != i) {
+				eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info,
+						      eq->last_cpu);
+				list_move_tail(&eq->cpu_list, &eqi_new->list);
+				continue;
+			}
+			if (usdelay != eq->q_mode)
+				lpfc_modify_hba_eq_delay(phba, eq->hdwq, 1,
+							 usdelay);
+		}
+	}
+
+	kfree(eqcnt);
+
+requeue:
+	queue_delayed_work(phba->wq, &phba->eq_delay_work,
+			   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+}
+
+/**
+ * lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * For each heartbeat, this routine does some heuristic methods to adjust
+ * XRI distribution. The goal is to fully utilize free XRIs.
+ **/
+static void lpfc_hb_mxp_handler(struct lpfc_hba *phba)
+{
+	u32 i;
+	u32 hwq_count;
+
+	hwq_count = phba->cfg_hdw_queue;
+	for (i = 0; i < hwq_count; i++) {
+		/* Adjust XRIs in private pool */
+		lpfc_adjust_pvt_pool_count(phba, i);
+
+		/* Adjust high watermark */
+		lpfc_adjust_high_watermark(phba, i);
+
+#ifdef LPFC_MXP_STAT
+		/* Snapshot pbl, pvt and busy count */
+		lpfc_snapshot_mxp(phba, i);
+#endif
+	}
+}
+
 /**
  * lpfc_hb_timeout_handler - The HBA-timer timeout handler
  * @phba: pointer to lpfc hba data structure.
@@ -1264,16 +1366,11 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 	int retval, i;
 	struct lpfc_sli *psli = &phba->sli;
 	LIST_HEAD(completions);
-	struct lpfc_queue *qp;
-	unsigned long time_elapsed;
-	uint32_t tick_cqe, max_cqe, val;
-	uint64_t tot, data1, data2, data3;
-	struct lpfc_nvmet_tgtport *tgtp;
-	struct lpfc_register reg_data;
-	struct nvme_fc_local_port *localport;
-	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
-	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
+
+	if (phba->cfg_xri_rebalancing) {
+		/* Multi-XRI pools handler */
+		lpfc_hb_mxp_handler(phba);
+	}
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
@@ -1288,107 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 		(phba->pport->fc_flag & FC_OFFLINE_MODE))
 		return;
 
-	if (phba->cfg_auto_imax) {
-		if (!phba->last_eqdelay_time) {
-			phba->last_eqdelay_time = jiffies;
-			goto skip_eqdelay;
-		}
-		time_elapsed = jiffies - phba->last_eqdelay_time;
-		phba->last_eqdelay_time = jiffies;
-
-		tot = 0xffff;
-		/* Check outstanding IO count */
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			if (phba->nvmet_support) {
-				tgtp = phba->targetport->private;
-				/* Calculate outstanding IOs */
-				tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
-				tot += atomic_read(&tgtp->xmt_fcp_release);
-				tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;
-			} else {
-				localport = phba->pport->localport;
-				if (!localport || !localport->private)
-					goto skip_eqdelay;
-				lport = (struct lpfc_nvme_lport *)
-					localport->private;
-				tot = 0;
-				for (i = 0;
-					i < phba->cfg_nvme_io_channel; i++) {
-					cstat = &lport->cstat[i];
-					data1 = atomic_read(
-						&cstat->fc4NvmeInputRequests);
-					data2 = atomic_read(
-						&cstat->fc4NvmeOutputRequests);
-					data3 = atomic_read(
-						&cstat->fc4NvmeControlRequests);
-					tot += (data1 + data2 + data3);
-					tot -= atomic_read(
-						&cstat->fc4NvmeIoCmpls);
-				}
-			}
-		}
-
-		/* Interrupts per sec per EQ */
-		val = phba->cfg_fcp_imax / phba->io_channel_irqs;
-		tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
-
-		/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
-		max_cqe = time_elapsed * tick_cqe;
-
-		for (i = 0; i < phba->io_channel_irqs; i++) {
-			/* Fast-path EQ */
-			qp = phba->sli4_hba.hba_eq[i];
-			if (!qp)
-				continue;
-
-			/* Use no EQ delay if we don't have many outstanding
-			 * IOs, or if we are only processing 1 CQE/ISR or less.
-			 * Otherwise, assume we can process up to lpfc_fcp_imax
-			 * interrupts per HBA.
-			 */
-			if (tot < LPFC_NODELAY_MAX_IO ||
-			    qp->EQ_cqe_cnt <= max_cqe)
-				val = 0;
-			else
-				val = phba->cfg_fcp_imax;
-
-			if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
-				/* Use EQ Delay Register method */
-
-				/* Convert for EQ Delay register */
-				if (val) {
-					/* First, interrupts per sec per EQ */
-					val = phba->cfg_fcp_imax /
-						phba->io_channel_irqs;
-
-					/* us delay between each interrupt */
-					val = LPFC_SEC_TO_USEC / val;
-				}
-				if (val != qp->q_mode) {
-					reg_data.word0 = 0;
-					bf_set(lpfc_sliport_eqdelay_id,
-					       &reg_data, qp->queue_id);
-					bf_set(lpfc_sliport_eqdelay_delay,
-					       &reg_data, val);
-					writel(reg_data.word0, eqdreg);
-				}
-			} else {
-				/* Use mbox command method */
-				if (val != qp->q_mode)
-					lpfc_modify_hba_eq_delay(phba, i,
-								 1, val);
-			}
-
-			/*
-			 * val is cfg_fcp_imax or 0 for mbox delay or us delay
-			 * between interrupts for EQDR.
-			 */
-			qp->q_mode = val;
-			qp->EQ_cqe_cnt = 0;
-		}
-	}
-
-skip_eqdelay:
 	spin_lock_irq(&phba->pport->work_port_lock);
 
 	if (time_after(phba->last_completion_time +
@@ -2943,7 +2939,9 @@ lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *phba)
 void
 lpfc_stop_hba_timers(struct lpfc_hba *phba)
 {
-	lpfc_stop_vport_timers(phba->pport);
+	if (phba->pport)
+		lpfc_stop_vport_timers(phba->pport);
+	cancel_delayed_work_sync(&phba->eq_delay_work);
 	del_timer_sync(&phba->sli.mbox_tmo);
 	del_timer_sync(&phba->fabric_block_timer);
 	del_timer_sync(&phba->eratt_poll);
@@ -3071,6 +3069,242 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_create_expedite_pool - create expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine moves a batch of XRIs from lpfc_io_buf_list_put of HWQ 0
+ * to expedite pool. Mark them as expedite.
+ **/
+void lpfc_create_expedite_pool(struct lpfc_hba *phba)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	struct lpfc_epd_pool *epd_pool;
+	unsigned long iflag;
+
+	epd_pool = &phba->epd_pool;
+	qp = &phba->sli4_hba.hdwq[0];
+
+	spin_lock_init(&epd_pool->lock);
+	spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+	spin_lock(&epd_pool->lock);
+	INIT_LIST_HEAD(&epd_pool->list);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &qp->lpfc_io_buf_list_put, list) {
+		list_move_tail(&lpfc_ncmd->list, &epd_pool->list);
+		lpfc_ncmd->expedite = true;
+		qp->put_io_bufs--;
+		epd_pool->count++;
+		if (epd_pool->count >= XRI_BATCH)
+			break;
+	}
+	spin_unlock(&epd_pool->lock);
+	spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+}
+
+/**
+ * lpfc_destroy_expedite_pool - destroy expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine returns XRIs from expedite pool to lpfc_io_buf_list_put
+ * of HWQ 0. Clear the mark.
+ **/
+void lpfc_destroy_expedite_pool(struct lpfc_hba *phba)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	struct lpfc_epd_pool *epd_pool;
+	unsigned long iflag;
+
+	epd_pool = &phba->epd_pool;
+	qp = &phba->sli4_hba.hdwq[0];
+
+	spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+	spin_lock(&epd_pool->lock);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &epd_pool->list, list) {
+		list_move_tail(&lpfc_ncmd->list,
+			       &qp->lpfc_io_buf_list_put);
+		lpfc_ncmd->flags = false;
+		qp->put_io_bufs++;
+		epd_pool->count--;
+	}
+	spin_unlock(&epd_pool->lock);
+	spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+}
+
+/**
+ * lpfc_create_multixri_pools - create multi-XRI pools
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine initialize public, private per HWQ. Then, move XRIs from
+ * lpfc_io_buf_list_put to public pool. High and low watermark are also
+ * Initialized.
+ **/
+void lpfc_create_multixri_pools(struct lpfc_hba *phba)
+{
+	u32 i, j;
+	u32 hwq_count;
+	u32 count_per_hwq;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"1234 num_hdw_queue=%d num_present_cpu=%d common_xri_cnt=%d\n",
+			phba->cfg_hdw_queue, phba->sli4_hba.num_present_cpu,
+			phba->sli4_hba.io_xri_cnt);
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		lpfc_create_expedite_pool(phba);
+
+	hwq_count = phba->cfg_hdw_queue;
+	count_per_hwq = phba->sli4_hba.io_xri_cnt / hwq_count;
+
+	for (i = 0; i < hwq_count; i++) {
+		multixri_pool = kzalloc(sizeof(*multixri_pool), GFP_KERNEL);
+
+		if (!multixri_pool) {
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"1238 Failed to allocate memory for "
+					"multixri_pool\n");
+
+			if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+				lpfc_destroy_expedite_pool(phba);
+
+			j = 0;
+			while (j < i) {
+				qp = &phba->sli4_hba.hdwq[j];
+				kfree(qp->p_multixri_pool);
+				j++;
+			}
+			phba->cfg_xri_rebalancing = 0;
+			return;
+		}
+
+		qp = &phba->sli4_hba.hdwq[i];
+		qp->p_multixri_pool = multixri_pool;
+
+		multixri_pool->xri_limit = count_per_hwq;
+		multixri_pool->rrb_next_hwqid = i;
+
+		/* Deal with public free xri pool */
+		pbl_pool = &multixri_pool->pbl_pool;
+		spin_lock_init(&pbl_pool->lock);
+		spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+		spin_lock(&pbl_pool->lock);
+		INIT_LIST_HEAD(&pbl_pool->list);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &qp->lpfc_io_buf_list_put, list) {
+			list_move_tail(&lpfc_ncmd->list, &pbl_pool->list);
+			qp->put_io_bufs--;
+			pbl_pool->count++;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"1235 Moved %d buffers from PUT list over to pbl_pool[%d]\n",
+				pbl_pool->count, i);
+		spin_unlock(&pbl_pool->lock);
+		spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+
+		/* Deal with private free xri pool */
+		pvt_pool = &multixri_pool->pvt_pool;
+		pvt_pool->high_watermark = multixri_pool->xri_limit / 2;
+		pvt_pool->low_watermark = XRI_BATCH;
+		spin_lock_init(&pvt_pool->lock);
+		spin_lock_irqsave(&pvt_pool->lock, iflag);
+		INIT_LIST_HEAD(&pvt_pool->list);
+		pvt_pool->count = 0;
+		spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+	}
+}
+
+/**
+ * lpfc_destroy_multixri_pools - destroy multi-XRI pools
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine returns XRIs from public/private to lpfc_io_buf_list_put.
+ **/
+void lpfc_destroy_multixri_pools(struct lpfc_hba *phba)
+{
+	u32 i;
+	u32 hwq_count;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		lpfc_destroy_expedite_pool(phba);
+
+	hwq_count = phba->cfg_hdw_queue;
+
+	for (i = 0; i < hwq_count; i++) {
+		qp = &phba->sli4_hba.hdwq[i];
+		multixri_pool = qp->p_multixri_pool;
+		if (!multixri_pool)
+			continue;
+
+		qp->p_multixri_pool = NULL;
+
+		spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+
+		/* Deal with public free xri pool */
+		pbl_pool = &multixri_pool->pbl_pool;
+		spin_lock(&pbl_pool->lock);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"1236 Moving %d buffers from pbl_pool[%d] TO PUT list\n",
+				pbl_pool->count, i);
+
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &pbl_pool->list, list) {
+			list_move_tail(&lpfc_ncmd->list,
+				       &qp->lpfc_io_buf_list_put);
+			qp->put_io_bufs++;
+			pbl_pool->count--;
+		}
+
+		INIT_LIST_HEAD(&pbl_pool->list);
+		pbl_pool->count = 0;
+
+		spin_unlock(&pbl_pool->lock);
+
+		/* Deal with private free xri pool */
+		pvt_pool = &multixri_pool->pvt_pool;
+		spin_lock(&pvt_pool->lock);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"1237 Moving %d buffers from pvt_pool[%d] TO PUT list\n",
+				pvt_pool->count, i);
+
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &pvt_pool->list, list) {
+			list_move_tail(&lpfc_ncmd->list,
+				       &qp->lpfc_io_buf_list_put);
+			qp->put_io_bufs++;
+			pvt_pool->count--;
+		}
+
+		INIT_LIST_HEAD(&pvt_pool->list);
+		pvt_pool->count = 0;
+
+		spin_unlock(&pvt_pool->lock);
+		spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+
+		kfree(multixri_pool);
+	}
+}
+
+/**
  * lpfc_online - Initialize and bring a HBA online
  * @phba: pointer to lpfc hba data structure.
  *
@@ -3152,6 +3386,9 @@ lpfc_online(struct lpfc_hba *phba)
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
 
+	if (phba->cfg_xri_rebalancing)
+		lpfc_create_multixri_pools(phba);
+
 	lpfc_unblock_mgmt_io(phba);
 	return 0;
 }
@@ -3310,6 +3547,9 @@ lpfc_offline(struct lpfc_hba *phba)
 			spin_unlock_irq(shost->host_lock);
 		}
 	lpfc_destroy_vport_work_array(phba, vports);
+
+	if (phba->cfg_xri_rebalancing)
+		lpfc_destroy_multixri_pools(phba);
 }
 
 /**
@@ -3323,7 +3563,7 @@ lpfc_offline(struct lpfc_hba *phba)
 static void
 lpfc_scsi_free(struct lpfc_hba *phba)
 {
-	struct lpfc_scsi_buf *sb, *sb_next;
+	struct lpfc_io_buf *sb, *sb_next;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 		return;
@@ -3355,50 +3595,57 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	spin_unlock(&phba->scsi_buf_list_get_lock);
 	spin_unlock_irq(&phba->hbalock);
 }
+
 /**
- * lpfc_nvme_free - Free all the NVME buffers and IOCBs from driver lists
+ * lpfc_io_free - Free all the IO buffers and IOCBs from driver lists
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is to free all the NVME buffers and IOCBs from the driver
+ * This routine is to free all the IO buffers and IOCBs from the driver
  * list back to kernel. It is called from lpfc_pci_remove_one to free
  * the internal resources before the device is removed from the system.
  **/
-static void
-lpfc_nvme_free(struct lpfc_hba *phba)
+void
+lpfc_io_free(struct lpfc_hba *phba)
 {
-	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-
-	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
-		return;
+	struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	struct lpfc_sli4_hdw_queue *qp;
+	int idx;
 
 	spin_lock_irq(&phba->hbalock);
 
-	/* Release all the lpfc_nvme_bufs maintained by this host. */
-	spin_lock(&phba->nvme_buf_list_put_lock);
-	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
-				 &phba->lpfc_nvme_buf_list_put, list) {
-		list_del(&lpfc_ncmd->list);
-		phba->put_nvme_bufs--;
-		dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
-			      lpfc_ncmd->dma_handle);
-		kfree(lpfc_ncmd);
-		phba->total_nvme_bufs--;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		qp = &phba->sli4_hba.hdwq[idx];
+		/* Release all the lpfc_nvme_bufs maintained by this host. */
+		spin_lock(&qp->io_buf_list_put_lock);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &qp->lpfc_io_buf_list_put,
+					 list) {
+			list_del(&lpfc_ncmd->list);
+			qp->put_io_bufs--;
+			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			qp->total_io_bufs--;
+		}
+		spin_unlock(&qp->io_buf_list_put_lock);
+
+		spin_lock(&qp->io_buf_list_get_lock);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &qp->lpfc_io_buf_list_get,
+					 list) {
+			list_del(&lpfc_ncmd->list);
+			qp->get_io_bufs--;
+			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			qp->total_io_bufs--;
+		}
+		spin_unlock(&qp->io_buf_list_get_lock);
 	}
-	spin_unlock(&phba->nvme_buf_list_put_lock);
 
-	spin_lock(&phba->nvme_buf_list_get_lock);
-	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
-				 &phba->lpfc_nvme_buf_list_get, list) {
-		list_del(&lpfc_ncmd->list);
-		phba->get_nvme_bufs--;
-		dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
-			      lpfc_ncmd->dma_handle);
-		kfree(lpfc_ncmd);
-		phba->total_nvme_bufs--;
-	}
-	spin_unlock(&phba->nvme_buf_list_get_lock);
 	spin_unlock_irq(&phba->hbalock);
 }
+
 /**
  * lpfc_sli4_els_sgl_update - update ELS xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -3640,8 +3887,102 @@ out_free_mem:
 	return rc;
 }
 
+int
+lpfc_io_buf_flush(struct lpfc_hba *phba, struct list_head *cbuf)
+{
+	LIST_HEAD(blist);
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_cmd;
+	struct lpfc_io_buf *iobufp, *prev_iobufp;
+	int idx, cnt, xri, inserted;
+
+	cnt = 0;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		qp = &phba->sli4_hba.hdwq[idx];
+		spin_lock_irq(&qp->io_buf_list_get_lock);
+		spin_lock(&qp->io_buf_list_put_lock);
+
+		/* Take everything off the get and put lists */
+		list_splice_init(&qp->lpfc_io_buf_list_get, &blist);
+		list_splice(&qp->lpfc_io_buf_list_put, &blist);
+		INIT_LIST_HEAD(&qp->lpfc_io_buf_list_get);
+		INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+		cnt += qp->get_io_bufs + qp->put_io_bufs;
+		qp->get_io_bufs = 0;
+		qp->put_io_bufs = 0;
+		qp->total_io_bufs = 0;
+		spin_unlock(&qp->io_buf_list_put_lock);
+		spin_unlock_irq(&qp->io_buf_list_get_lock);
+	}
+
+	/*
+	 * Take IO buffers off blist and put on cbuf sorted by XRI.
+	 * This is because POST_SGL takes a sequential range of XRIs
+	 * to post to the firmware.
+	 */
+	for (idx = 0; idx < cnt; idx++) {
+		list_remove_head(&blist, lpfc_cmd, struct lpfc_io_buf, list);
+		if (!lpfc_cmd)
+			return cnt;
+		if (idx == 0) {
+			list_add_tail(&lpfc_cmd->list, cbuf);
+			continue;
+		}
+		xri = lpfc_cmd->cur_iocbq.sli4_xritag;
+		inserted = 0;
+		prev_iobufp = NULL;
+		list_for_each_entry(iobufp, cbuf, list) {
+			if (xri < iobufp->cur_iocbq.sli4_xritag) {
+				if (prev_iobufp)
+					list_add(&lpfc_cmd->list,
+						 &prev_iobufp->list);
+				else
+					list_add(&lpfc_cmd->list, cbuf);
+				inserted = 1;
+				break;
+			}
+			prev_iobufp = iobufp;
+		}
+		if (!inserted)
+			list_add_tail(&lpfc_cmd->list, cbuf);
+	}
+	return cnt;
+}
+
+int
+lpfc_io_buf_replenish(struct lpfc_hba *phba, struct list_head *cbuf)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_cmd;
+	int idx, cnt;
+
+	qp = phba->sli4_hba.hdwq;
+	cnt = 0;
+	while (!list_empty(cbuf)) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			list_remove_head(cbuf, lpfc_cmd,
+					 struct lpfc_io_buf, list);
+			if (!lpfc_cmd)
+				return cnt;
+			cnt++;
+			qp = &phba->sli4_hba.hdwq[idx];
+			lpfc_cmd->hdwq_no = idx;
+			lpfc_cmd->hdwq = qp;
+			lpfc_cmd->cur_iocbq.wqe_cmpl = NULL;
+			lpfc_cmd->cur_iocbq.iocb_cmpl = NULL;
+			spin_lock(&qp->io_buf_list_put_lock);
+			list_add_tail(&lpfc_cmd->list,
+				      &qp->lpfc_io_buf_list_put);
+			qp->put_io_bufs++;
+			qp->total_io_bufs++;
+			spin_unlock(&qp->io_buf_list_put_lock);
+		}
+	}
+	return cnt;
+}
+
 /**
- * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
+ * lpfc_sli4_io_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine first calculates the sizes of the current els and allocated
@@ -3653,94 +3994,192 @@ out_free_mem:
  *   0 - successful (for now, it always returns 0)
  **/
 int
-lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
+lpfc_sli4_io_sgl_update(struct lpfc_hba *phba)
 {
-	struct lpfc_scsi_buf *psb, *psb_next;
-	uint16_t i, lxri, els_xri_cnt, scsi_xri_cnt;
-	LIST_HEAD(scsi_sgl_list);
-	int rc;
-
-	/*
-	 * update on pci function's els xri-sgl list
-	 */
-	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	phba->total_scsi_bufs = 0;
+	struct lpfc_io_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL;
+	uint16_t i, lxri, els_xri_cnt;
+	uint16_t io_xri_cnt, io_xri_max;
+	LIST_HEAD(io_sgl_list);
+	int rc, cnt;
 
 	/*
-	 * update on pci function's allocated scsi xri-sgl list
+	 * update on pci function's allocated nvme xri-sgl list
 	 */
-	/* maximum number of xris available for scsi buffers */
-	phba->sli4_hba.scsi_xri_max = phba->sli4_hba.max_cfg_param.max_xri -
-				      els_xri_cnt;
 
-	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
-		return 0;
+	/* maximum number of xris available for nvme buffers */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	io_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	phba->sli4_hba.io_xri_max = io_xri_max;
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
-		phba->sli4_hba.scsi_xri_max =  /* Split them up */
-			(phba->sli4_hba.scsi_xri_max *
-			 phba->cfg_xri_split) / 100;
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"6074 Current allocated XRI sgl count:%d, "
+			"maximum XRI count:%d\n",
+			phba->sli4_hba.io_xri_cnt,
+			phba->sli4_hba.io_xri_max);
 
-	spin_lock_irq(&phba->scsi_buf_list_get_lock);
-	spin_lock(&phba->scsi_buf_list_put_lock);
-	list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
-	list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list);
-	spin_unlock(&phba->scsi_buf_list_put_lock);
-	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+	cnt = lpfc_io_buf_flush(phba, &io_sgl_list);
 
-	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"6060 Current allocated SCSI xri-sgl count:%d, "
-			"maximum  SCSI xri count:%d (split:%d)\n",
-			phba->sli4_hba.scsi_xri_cnt,
-			phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split);
-
-	if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
-		/* max scsi xri shrinked below the allocated scsi buffers */
-		scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt -
-					phba->sli4_hba.scsi_xri_max;
-		/* release the extra allocated scsi buffers */
-		for (i = 0; i < scsi_xri_cnt; i++) {
-			list_remove_head(&scsi_sgl_list, psb,
-					 struct lpfc_scsi_buf, list);
-			if (psb) {
+	if (phba->sli4_hba.io_xri_cnt > phba->sli4_hba.io_xri_max) {
+		/* max nvme xri shrunk below the allocated nvme buffers */
+		io_xri_cnt = phba->sli4_hba.io_xri_cnt -
+					phba->sli4_hba.io_xri_max;
+		/* release the extra allocated nvme buffers */
+		for (i = 0; i < io_xri_cnt; i++) {
+			list_remove_head(&io_sgl_list, lpfc_ncmd,
+					 struct lpfc_io_buf, list);
+			if (lpfc_ncmd) {
 				dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-					      psb->data, psb->dma_handle);
-				kfree(psb);
+					      lpfc_ncmd->data,
+					      lpfc_ncmd->dma_handle);
+				kfree(lpfc_ncmd);
 			}
 		}
-		spin_lock_irq(&phba->scsi_buf_list_get_lock);
-		phba->sli4_hba.scsi_xri_cnt -= scsi_xri_cnt;
-		spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+		phba->sli4_hba.io_xri_cnt -= io_xri_cnt;
 	}
 
-	/* update xris associated to remaining allocated scsi buffers */
-	psb = NULL;
-	psb_next = NULL;
-	list_for_each_entry_safe(psb, psb_next, &scsi_sgl_list, list) {
+	/* update xris associated to remaining allocated nvme buffers */
+	lpfc_ncmd = NULL;
+	lpfc_ncmd_next = NULL;
+	phba->sli4_hba.io_xri_cnt = cnt;
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &io_sgl_list, list) {
 		lxri = lpfc_sli4_next_xritag(phba);
 		if (lxri == NO_XRI) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"2560 Failed to allocate xri for "
-					"scsi buffer\n");
+					"6075 Failed to allocate xri for "
+					"nvme buffer\n");
 			rc = -ENOMEM;
 			goto out_free_mem;
 		}
-		psb->cur_iocbq.sli4_lxritag = lxri;
-		psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+		lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri;
+		lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
 	}
-	spin_lock_irq(&phba->scsi_buf_list_get_lock);
-	spin_lock(&phba->scsi_buf_list_put_lock);
-	list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-	spin_unlock(&phba->scsi_buf_list_put_lock);
-	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+	cnt = lpfc_io_buf_replenish(phba, &io_sgl_list);
 	return 0;
 
 out_free_mem:
-	lpfc_scsi_free(phba);
+	lpfc_io_free(phba);
 	return rc;
 }
 
+/**
+ * lpfc_new_io_buf - IO buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates nvme buffers for device with SLI-4 interface spec,
+ * the nvme buffer contains all the necessary information needed to initiate
+ * an I/O. After allocating up to @num_to_allocate IO buffers and put
+ * them on a list, it post them to the port by using SGL block post.
+ *
+ * Return codes:
+ *   int - number of IO buffers that were allocated and posted.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+int
+lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_iocbq *pwqeq;
+	uint16_t iotag, lxri = 0;
+	int bcnt, num_posted;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(post_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	/* Sanity check to ensure our sizing is right for both SCSI and NVME */
+	if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+				"6426 Common buffer size %ld exceeds %d\n",
+				sizeof(struct lpfc_io_buf),
+				LPFC_COMMON_IO_BUF_SZ);
+		return 0;
+	}
+
+	phba->sli4_hba.io_xri_cnt = 0;
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL);
+		if (!lpfc_ncmd)
+			break;
+		/*
+		 * Get memory from the pci pool to map the virt space to
+		 * pci bus space for an I/O. The DMA buffer includes the
+		 * number of SGE's necessary to support the sg_tablesize.
+		 */
+		lpfc_ncmd->data = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
+				GFP_KERNEL,
+				&lpfc_ncmd->dma_handle);
+		if (!lpfc_ncmd->data) {
+			kfree(lpfc_ncmd);
+			break;
+		}
+		memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size);
+
+		/*
+		 * 4K Page alignment is CRITICAL to BlockGuard, double check
+		 * to be sure.
+		 */
+		if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
+		    (((unsigned long)(lpfc_ncmd->data) &
+		    (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+					"3369 Memory alignment err: addr=%lx\n",
+					(unsigned long)lpfc_ncmd->data);
+			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			break;
+		}
+
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			break;
+		}
+		pwqeq = &lpfc_ncmd->cur_iocbq;
+
+		/* Allocate iotag for lpfc_ncmd->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, pwqeq);
+		if (iotag == 0) {
+			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6121 Failed to allocate IOTAG for"
+					" XRI:0x%x\n", lxri);
+			lpfc_sli4_free_xri(phba, lxri);
+			break;
+		}
+		pwqeq->sli4_lxritag = lxri;
+		pwqeq->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+		pwqeq->context1 = lpfc_ncmd;
+
+		/* Initialize local short-hand pointers. */
+		lpfc_ncmd->dma_sgl = lpfc_ncmd->data;
+		lpfc_ncmd->dma_phys_sgl = lpfc_ncmd->dma_handle;
+		lpfc_ncmd->cur_iocbq.context1 = lpfc_ncmd;
+		spin_lock_init(&lpfc_ncmd->buf_lock);
+
+		/* add the nvme buffer to a post list */
+		list_add_tail(&lpfc_ncmd->list, &post_nblist);
+		phba->sli4_hba.io_xri_cnt++;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6114 Allocate %d out of %d requested new NVME "
+			"buffers\n", bcnt, num_to_alloc);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist))
+		num_posted = lpfc_sli4_post_io_sgl_list(
+				phba, &post_nblist, bcnt);
+	else
+		num_posted = 0;
+
+	return num_posted;
+}
+
 static uint64_t
 lpfc_get_wwpn(struct lpfc_hba *phba)
 {
@@ -3777,111 +4216,6 @@ lpfc_get_wwpn(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine first calculates the sizes of the current els and allocated
- * scsi sgl lists, and then goes through all sgls to updates the physical
- * XRIs assigned due to port function reset. During port initialization, the
- * current els and allocated scsi sgl lists are 0s.
- *
- * Return codes
- *   0 - successful (for now, it always returns 0)
- **/
-int
-lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba)
-{
-	struct lpfc_nvme_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL;
-	uint16_t i, lxri, els_xri_cnt;
-	uint16_t nvme_xri_cnt, nvme_xri_max;
-	LIST_HEAD(nvme_sgl_list);
-	int rc, cnt;
-
-	phba->total_nvme_bufs = 0;
-	phba->get_nvme_bufs = 0;
-	phba->put_nvme_bufs = 0;
-
-	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
-		return 0;
-	/*
-	 * update on pci function's allocated nvme xri-sgl list
-	 */
-
-	/* maximum number of xris available for nvme buffers */
-	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvme_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
-	phba->sli4_hba.nvme_xri_max = nvme_xri_max;
-	phba->sli4_hba.nvme_xri_max -= phba->sli4_hba.scsi_xri_max;
-
-	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"6074 Current allocated NVME xri-sgl count:%d, "
-			"maximum  NVME xri count:%d\n",
-			phba->sli4_hba.nvme_xri_cnt,
-			phba->sli4_hba.nvme_xri_max);
-
-	spin_lock_irq(&phba->nvme_buf_list_get_lock);
-	spin_lock(&phba->nvme_buf_list_put_lock);
-	list_splice_init(&phba->lpfc_nvme_buf_list_get, &nvme_sgl_list);
-	list_splice(&phba->lpfc_nvme_buf_list_put, &nvme_sgl_list);
-	cnt = phba->get_nvme_bufs + phba->put_nvme_bufs;
-	phba->get_nvme_bufs = 0;
-	phba->put_nvme_bufs = 0;
-	spin_unlock(&phba->nvme_buf_list_put_lock);
-	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-
-	if (phba->sli4_hba.nvme_xri_cnt > phba->sli4_hba.nvme_xri_max) {
-		/* max nvme xri shrunk below the allocated nvme buffers */
-		spin_lock_irq(&phba->nvme_buf_list_get_lock);
-		nvme_xri_cnt = phba->sli4_hba.nvme_xri_cnt -
-					phba->sli4_hba.nvme_xri_max;
-		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-		/* release the extra allocated nvme buffers */
-		for (i = 0; i < nvme_xri_cnt; i++) {
-			list_remove_head(&nvme_sgl_list, lpfc_ncmd,
-					 struct lpfc_nvme_buf, list);
-			if (lpfc_ncmd) {
-				dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-					      lpfc_ncmd->data,
-					      lpfc_ncmd->dma_handle);
-				kfree(lpfc_ncmd);
-			}
-		}
-		spin_lock_irq(&phba->nvme_buf_list_get_lock);
-		phba->sli4_hba.nvme_xri_cnt -= nvme_xri_cnt;
-		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-	}
-
-	/* update xris associated to remaining allocated nvme buffers */
-	lpfc_ncmd = NULL;
-	lpfc_ncmd_next = NULL;
-	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
-				 &nvme_sgl_list, list) {
-		lxri = lpfc_sli4_next_xritag(phba);
-		if (lxri == NO_XRI) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"6075 Failed to allocate xri for "
-					"nvme buffer\n");
-			rc = -ENOMEM;
-			goto out_free_mem;
-		}
-		lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri;
-		lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
-	}
-	spin_lock_irq(&phba->nvme_buf_list_get_lock);
-	spin_lock(&phba->nvme_buf_list_put_lock);
-	list_splice_init(&nvme_sgl_list, &phba->lpfc_nvme_buf_list_get);
-	phba->get_nvme_bufs = cnt;
-	INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
-	spin_unlock(&phba->nvme_buf_list_put_lock);
-	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-	return 0;
-
-out_free_mem:
-	lpfc_nvme_free(phba);
-	return rc;
-}
-
-/**
  * lpfc_create_port - Create an FC port
  * @phba: pointer to lpfc hba data structure.
  * @instance: a unique integer ID to this FC port.
@@ -3956,17 +4290,29 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	vport->fc_rscn_flush = 0;
 	lpfc_get_vport_cfgparam(vport);
 
+	/* Adjust value in vport */
+	vport->cfg_enable_fc4_type = phba->cfg_enable_fc4_type;
+
 	shost->unique_id = instance;
 	shost->max_id = LPFC_MAX_TARGET;
 	shost->max_lun = vport->cfg_max_luns;
 	shost->this_id = -1;
 	shost->max_cmd_len = 16;
-	shost->nr_hw_queues = phba->cfg_fcp_io_channel;
+
 	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+			shost->nr_hw_queues = phba->cfg_hdw_queue;
+		else
+			shost->nr_hw_queues = phba->sli4_hba.num_present_cpu;
+
 		shost->dma_boundary =
 			phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
 		shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
-	}
+	} else
+		/* SLI-3 has a limited number of hardware queues (3),
+		 * thus there is only one for FCP processing.
+		 */
+		shost->nr_hw_queues = 1;
 
 	/*
 	 * Set initial can_queue value since 0 is no longer supported and
@@ -4220,7 +4566,8 @@ lpfc_stop_port_s4(struct lpfc_hba *phba)
 {
 	/* Reset some HBA SLI4 setup states */
 	lpfc_stop_hba_timers(phba);
-	phba->pport->work_port_events = 0;
+	if (phba->pport)
+		phba->pport->work_port_events = 0;
 	phba->sli4_hba.intr_enable = 0;
 }
 
@@ -5819,24 +6166,11 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 				"NVME" : " "),
 			(phba->nvmet_support ? "NVMET" : " "));
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
-		/* Initialize the scsi buffer list used by driver for scsi IO */
-		spin_lock_init(&phba->scsi_buf_list_get_lock);
-		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
-		spin_lock_init(&phba->scsi_buf_list_put_lock);
-		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-	}
-
-	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-		(phba->nvmet_support == 0)) {
-		/* Initialize the NVME buffer list used by driver for NVME IO */
-		spin_lock_init(&phba->nvme_buf_list_get_lock);
-		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_get);
-		phba->get_nvme_bufs = 0;
-		spin_lock_init(&phba->nvme_buf_list_put_lock);
-		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
-		phba->put_nvme_bufs = 0;
-	}
+	/* Initialize the IO buffer list used by driver for SLI3 SCSI */
+	spin_lock_init(&phba->scsi_buf_list_get_lock);
+	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
+	spin_lock_init(&phba->scsi_buf_list_put_lock);
+	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
 
 	/* Initialize the fabric iocb list */
 	INIT_LIST_HEAD(&phba->fabric_iocb_list);
@@ -5860,6 +6194,8 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	/* Heartbeat timer */
 	timer_setup(&phba->hb_tmofunc, lpfc_hb_timeout, 0);
 
+	INIT_DELAYED_WORK(&phba->eq_delay_work, lpfc_hb_eq_delay_work);
+
 	return 0;
 }
 
@@ -5877,7 +6213,7 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 static int
 lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 {
-	int rc;
+	int rc, entry_sz;
 
 	/*
 	 * Initialize timers used by driver
@@ -5922,6 +6258,11 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
 	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		entry_sz = sizeof(struct sli4_sge);
+	else
+		entry_sz = sizeof(struct ulp_bde64);
+
 	/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
 	if (phba->cfg_enable_bg) {
 		/*
@@ -5935,7 +6276,7 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp) +
-			(LPFC_MAX_SG_SEG_CNT * sizeof(struct ulp_bde64));
+			(LPFC_MAX_SG_SEG_CNT * entry_sz);
 
 		if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SEG_CNT_DIF)
 			phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT_DIF;
@@ -5950,7 +6291,7 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+			((phba->cfg_sg_seg_cnt + 2) * entry_sz);
 
 		/* Total BDEs in BPL for scsi_sg_list */
 		phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
@@ -6031,14 +6372,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
-	int fof_vectors = 0;
 	int extra;
 	uint64_t wwn;
 	u32 if_type;
 	u32 if_fam;
 
-	phba->sli4_hba.num_online_cpu = num_online_cpus();
 	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
+	phba->sli4_hba.num_possible_cpu = num_possible_cpus();
 	phba->sli4_hba.curr_disp_cpu = 0;
 
 	/* Get all the module params for configuring this host */
@@ -6200,8 +6540,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		/* Initialize the Abort nvme buffer list used by driver */
-		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
-		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		spin_lock_init(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
 	}
@@ -6337,6 +6676,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 						" NVME_TARGET_FC infrastructure"
 						" is not in kernel\n");
 #endif
+				/* Not supported for NVMET */
+				phba->cfg_xri_rebalancing = 0;
 				break;
 			}
 		}
@@ -6405,8 +6746,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* Verify OAS is supported */
 	lpfc_sli4_oas_verify(phba);
-	if (phba->cfg_fof)
-		fof_vectors = 1;
 
 	/* Verify RAS support on adapter */
 	lpfc_sli4_ras_init(phba);
@@ -6450,9 +6789,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_remove_rpi_hdrs;
 	}
 
-	phba->sli4_hba.hba_eq_hdl = kcalloc(fof_vectors + phba->io_channel_irqs,
-						sizeof(struct lpfc_hba_eq_hdl),
-						GFP_KERNEL);
+	phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_irq_chann,
+					    sizeof(struct lpfc_hba_eq_hdl),
+					    GFP_KERNEL);
 	if (!phba->sli4_hba.hba_eq_hdl) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2572 Failed allocate memory for "
@@ -6461,7 +6800,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_fcf_rr_bmask;
 	}
 
-	phba->sli4_hba.cpu_map = kcalloc(phba->sli4_hba.num_present_cpu,
+	phba->sli4_hba.cpu_map = kcalloc(phba->sli4_hba.num_possible_cpu,
 					sizeof(struct lpfc_vector_map_info),
 					GFP_KERNEL);
 	if (!phba->sli4_hba.cpu_map) {
@@ -6471,21 +6810,14 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		rc = -ENOMEM;
 		goto out_free_hba_eq_hdl;
 	}
-	if (lpfc_used_cpu == NULL) {
-		lpfc_used_cpu = kcalloc(lpfc_present_cpu, sizeof(uint16_t),
-						GFP_KERNEL);
-		if (!lpfc_used_cpu) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3335 Failed allocate memory for msi-x "
-					"interrupt vector mapping\n");
-			kfree(phba->sli4_hba.cpu_map);
-			rc = -ENOMEM;
-			goto out_free_hba_eq_hdl;
-		}
-		for (i = 0; i < lpfc_present_cpu; i++)
-			lpfc_used_cpu[i] = LPFC_VECTOR_MAP_EMPTY;
-	}
 
+	phba->sli4_hba.eq_info = alloc_percpu(struct lpfc_eq_intr_info);
+	if (!phba->sli4_hba.eq_info) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3321 Failed allocation for per_cpu stats\n");
+		rc = -ENOMEM;
+		goto out_free_hba_cpu_map;
+	}
 	/*
 	 * Enable sr-iov virtual functions if supported and configured
 	 * through the module parameter.
@@ -6505,6 +6837,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
+out_free_hba_cpu_map:
+	kfree(phba->sli4_hba.cpu_map);
 out_free_hba_eq_hdl:
 	kfree(phba->sli4_hba.hba_eq_hdl);
 out_free_fcf_rr_bmask:
@@ -6534,10 +6868,12 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 {
 	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
 
+	free_percpu(phba->sli4_hba.eq_info);
+
 	/* Free memory allocated for msi-x interrupt vector to CPU mapping */
 	kfree(phba->sli4_hba.cpu_map);
+	phba->sli4_hba.num_possible_cpu = 0;
 	phba->sli4_hba.num_present_cpu = 0;
-	phba->sli4_hba.num_online_cpu = 0;
 	phba->sli4_hba.curr_disp_cpu = 0;
 
 	/* Free memory allocated for fast-path work queue handles */
@@ -6875,11 +7211,8 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 	/* els xri-sgl book keeping */
 	phba->sli4_hba.els_xri_cnt = 0;
 
-	/* scsi xri-buffer book keeping */
-	phba->sli4_hba.scsi_xri_cnt = 0;
-
 	/* nvme xri-buffer book keeping */
-	phba->sli4_hba.nvme_xri_cnt = 0;
+	phba->sli4_hba.io_xri_cnt = 0;
 }
 
 /**
@@ -7093,6 +7426,9 @@ lpfc_hba_alloc(struct pci_dev *pdev)
 static void
 lpfc_hba_free(struct lpfc_hba *phba)
 {
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		kfree(phba->sli4_hba.hdwq);
+
 	/* Release the driver assigned board number */
 	idr_remove(&lpfc_hba_index, phba->brd_no);
 
@@ -7128,10 +7464,6 @@ lpfc_create_shost(struct lpfc_hba *phba)
 	phba->fc_arbtov = FF_DEF_ARBTOV;
 
 	atomic_set(&phba->sdev_cnt, 0);
-	atomic_set(&phba->fc4ScsiInputRequests, 0);
-	atomic_set(&phba->fc4ScsiOutputRequests, 0);
-	atomic_set(&phba->fc4ScsiControlRequests, 0);
-	atomic_set(&phba->fc4ScsiIoCmpls, 0);
 	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
 	if (!vport)
 		return -ENODEV;
@@ -7361,15 +7693,18 @@ lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
 	unsigned long bar0map_len, bar2map_len;
 	int i, hbq_count;
 	void *ptr;
-	int error = -ENODEV;
+	int error;
 
 	if (!pdev)
-		return error;
+		return -ENODEV;
 
 	/* Set the device DMA mask size */
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
-	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+	error = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (error)
+		error = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (error)
 		return error;
+	error = -ENODEV;
 
 	/* Get the bus address of Bar0 and Bar2 and the number of bytes
 	 * required by each mapping.
@@ -7906,7 +8241,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 	struct lpfc_rsrc_desc_fcfcoe *desc;
 	char *pdesc_0;
 	uint16_t forced_link_speed;
-	uint32_t if_type;
+	uint32_t if_type, qmin;
 	int length, i, rc = 0, rc2;
 
 	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -8011,38 +8346,44 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				phba->sli4_hba.max_cfg_param.max_rq);
 
 		/*
-		 * Calculate NVME queue resources based on how
-		 * many WQ/CQs are available.
+		 * Calculate queue resources based on how
+		 * many WQ/CQ/EQs are available.
 		 */
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			length = phba->sli4_hba.max_cfg_param.max_wq;
-			if (phba->sli4_hba.max_cfg_param.max_cq <
-			    phba->sli4_hba.max_cfg_param.max_wq)
-				length = phba->sli4_hba.max_cfg_param.max_cq;
+		qmin = phba->sli4_hba.max_cfg_param.max_wq;
+		if (phba->sli4_hba.max_cfg_param.max_cq < qmin)
+			qmin = phba->sli4_hba.max_cfg_param.max_cq;
+		if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
+			qmin = phba->sli4_hba.max_cfg_param.max_eq;
+		/*
+		 * Whats left after this can go toward NVME / FCP.
+		 * The minus 4 accounts for ELS, NVME LS, MBOX
+		 * plus one extra. When configured for
+		 * NVMET, FCP io channel WQs are not created.
+		 */
+		qmin -= 4;
 
-			/*
-			 * Whats left after this can go toward NVME.
-			 * The minus 6 accounts for ELS, NVME LS, MBOX
-			 * fof plus a couple extra. When configured for
-			 * NVMET, FCP io channel WQs are not created.
-			 */
-			length -= 6;
-			if (!phba->nvmet_support)
-				length -= phba->cfg_fcp_io_channel;
-
-			if (phba->cfg_nvme_io_channel > length) {
-				lpfc_printf_log(
-					phba, KERN_ERR, LOG_SLI,
-					"2005 Reducing NVME IO channel to %d: "
-					"WQ %d CQ %d NVMEIO %d FCPIO %d\n",
-					length,
+		/* If NVME is configured, double the number of CQ/WQs needed */
+		if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+		    !phba->nvmet_support)
+			qmin /= 2;
+
+		/* Check to see if there is enough for NVME */
+		if ((phba->cfg_irq_chann > qmin) ||
+		    (phba->cfg_hdw_queue > qmin)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2005 Reducing Queues: "
+					"WQ %d CQ %d EQ %d: min %d: "
+					"IRQ %d HDWQ %d\n",
 					phba->sli4_hba.max_cfg_param.max_wq,
 					phba->sli4_hba.max_cfg_param.max_cq,
-					phba->cfg_nvme_io_channel,
-					phba->cfg_fcp_io_channel);
+					phba->sli4_hba.max_cfg_param.max_eq,
+					qmin, phba->cfg_irq_chann,
+					phba->cfg_hdw_queue);
 
-				phba->cfg_nvme_io_channel = length;
-			}
+			if (phba->cfg_irq_chann > qmin)
+				phba->cfg_irq_chann = qmin;
+			if (phba->cfg_hdw_queue > qmin)
+				phba->cfg_hdw_queue = qmin;
 		}
 	}
 
@@ -8254,53 +8595,22 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
 static int
 lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 {
-	int io_channel;
-	int fof_vectors = phba->cfg_fof ? 1 : 0;
-
 	/*
 	 * Sanity check for configured queue parameters against the run-time
 	 * device parameters
 	 */
 
-	/* Sanity check on HBA EQ parameters */
-	io_channel = phba->io_channel_irqs;
-
-	if (phba->sli4_hba.num_online_cpu < io_channel) {
-		lpfc_printf_log(phba,
-				KERN_ERR, LOG_INIT,
-				"3188 Reducing IO channels to match number of "
-				"online CPUs: from %d to %d\n",
-				io_channel, phba->sli4_hba.num_online_cpu);
-		io_channel = phba->sli4_hba.num_online_cpu;
-	}
-
-	if (io_channel + fof_vectors > phba->sli4_hba.max_cfg_param.max_eq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2575 Reducing IO channels to match number of "
-				"available EQs: from %d to %d\n",
-				io_channel,
-				phba->sli4_hba.max_cfg_param.max_eq);
-		io_channel = phba->sli4_hba.max_cfg_param.max_eq - fof_vectors;
-	}
-
-	/* The actual number of FCP / NVME event queues adopted */
-	if (io_channel != phba->io_channel_irqs)
-		phba->io_channel_irqs = io_channel;
-	if (phba->cfg_fcp_io_channel > io_channel)
-		phba->cfg_fcp_io_channel = io_channel;
-	if (phba->cfg_nvme_io_channel > io_channel)
-		phba->cfg_nvme_io_channel = io_channel;
 	if (phba->nvmet_support) {
-		if (phba->cfg_nvme_io_channel < phba->cfg_nvmet_mrq)
-			phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
+		if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq)
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 	}
 	if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX)
 		phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IO channels: irqs %d fcp %d nvme %d MRQ: %d\n",
-			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
-			phba->cfg_nvme_io_channel, phba->cfg_nvmet_mrq);
+			"2574 IO channels: hdwQ %d IRQ %d MRQ: %d\n",
+			phba->cfg_hdw_queue, phba->cfg_irq_chann,
+			phba->cfg_nvmet_mrq);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -8327,7 +8637,9 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 		return 1;
 	}
 	qdesc->qe_valid = 1;
-	phba->sli4_hba.nvme_cq[wqidx] = qdesc;
+	qdesc->hdwq = wqidx;
+	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+	phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
 
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
 				      LPFC_WQE128_SIZE, LPFC_WQE_EXP_COUNT);
@@ -8337,7 +8649,9 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 				wqidx);
 		return 1;
 	}
-	phba->sli4_hba.nvme_wq[wqidx] = qdesc;
+	qdesc->hdwq = wqidx;
+	qdesc->chann = wqidx;
+	phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
 }
@@ -8365,7 +8679,9 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 		return 1;
 	}
 	qdesc->qe_valid = 1;
-	phba->sli4_hba.fcp_cq[wqidx] = qdesc;
+	qdesc->hdwq = wqidx;
+	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
+	phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
 
 	/* Create Fast Path FCP WQs */
 	if (phba->enab_exp_wqcq_pages) {
@@ -8386,7 +8702,9 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 				wqidx);
 		return 1;
 	}
-	phba->sli4_hba.fcp_wq[wqidx] = qdesc;
+	qdesc->hdwq = wqidx;
+	qdesc->chann = wqidx;
+	phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
 }
@@ -8409,16 +8727,14 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, io_channel;
+	int idx, eqidx;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_eq_intr_info *eqi;
 
 	/*
 	 * Create HBA Record arrays.
 	 * Both NVME and FCP will share that same vectors / EQs
 	 */
-	io_channel = phba->io_channel_irqs;
-	if (!io_channel)
-		return -ERANGE;
-
 	phba->sli4_hba.mq_esize = LPFC_MQE_SIZE;
 	phba->sli4_hba.mq_ecount = LPFC_MQE_DEF_COUNT;
 	phba->sli4_hba.wq_esize = LPFC_WQE_SIZE;
@@ -8430,87 +8746,36 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
 	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
 
-	phba->sli4_hba.hba_eq =  kcalloc(io_channel,
-					sizeof(struct lpfc_queue *),
-					GFP_KERNEL);
-	if (!phba->sli4_hba.hba_eq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2576 Failed allocate memory for "
-			"fast-path EQ record array\n");
-		goto out_error;
-	}
-
-	if (phba->cfg_fcp_io_channel) {
-		phba->sli4_hba.fcp_cq = kcalloc(phba->cfg_fcp_io_channel,
-						sizeof(struct lpfc_queue *),
-						GFP_KERNEL);
-		if (!phba->sli4_hba.fcp_cq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2577 Failed allocate memory for "
-					"fast-path CQ record array\n");
-			goto out_error;
-		}
-		phba->sli4_hba.fcp_wq = kcalloc(phba->cfg_fcp_io_channel,
-						sizeof(struct lpfc_queue *),
-						GFP_KERNEL);
-		if (!phba->sli4_hba.fcp_wq) {
+	if (!phba->sli4_hba.hdwq) {
+		phba->sli4_hba.hdwq = kcalloc(
+			phba->cfg_hdw_queue, sizeof(struct lpfc_sli4_hdw_queue),
+			GFP_KERNEL);
+		if (!phba->sli4_hba.hdwq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2578 Failed allocate memory for "
-					"fast-path FCP WQ record array\n");
+					"6427 Failed allocate memory for "
+					"fast-path Hardware Queue array\n");
 			goto out_error;
 		}
-		/*
-		 * Since the first EQ can have multiple CQs associated with it,
-		 * this array is used to quickly see if we have a FCP fast-path
-		 * CQ match.
-		 */
-		phba->sli4_hba.fcp_cq_map = kcalloc(phba->cfg_fcp_io_channel,
-							sizeof(uint16_t),
-							GFP_KERNEL);
-		if (!phba->sli4_hba.fcp_cq_map) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2545 Failed allocate memory for "
-					"fast-path CQ map\n");
-			goto out_error;
+		/* Prepare hardware queues to take IO buffers */
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			qp = &phba->sli4_hba.hdwq[idx];
+			spin_lock_init(&qp->io_buf_list_get_lock);
+			spin_lock_init(&qp->io_buf_list_put_lock);
+			INIT_LIST_HEAD(&qp->lpfc_io_buf_list_get);
+			INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+			qp->get_io_bufs = 0;
+			qp->put_io_bufs = 0;
+			qp->total_io_bufs = 0;
+			spin_lock_init(&qp->abts_scsi_buf_list_lock);
+			INIT_LIST_HEAD(&qp->lpfc_abts_scsi_buf_list);
+			qp->abts_scsi_io_bufs = 0;
+			spin_lock_init(&qp->abts_nvme_buf_list_lock);
+			INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list);
+			qp->abts_nvme_io_bufs = 0;
 		}
 	}
 
-	if (phba->cfg_nvme_io_channel) {
-		phba->sli4_hba.nvme_cq = kcalloc(phba->cfg_nvme_io_channel,
-						sizeof(struct lpfc_queue *),
-						GFP_KERNEL);
-		if (!phba->sli4_hba.nvme_cq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"6077 Failed allocate memory for "
-					"fast-path CQ record array\n");
-			goto out_error;
-		}
-
-		phba->sli4_hba.nvme_wq = kcalloc(phba->cfg_nvme_io_channel,
-						sizeof(struct lpfc_queue *),
-						GFP_KERNEL);
-		if (!phba->sli4_hba.nvme_wq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2581 Failed allocate memory for "
-					"fast-path NVME WQ record array\n");
-			goto out_error;
-		}
-
-		/*
-		 * Since the first EQ can have multiple CQs associated with it,
-		 * this array is used to quickly see if we have a NVME fast-path
-		 * CQ match.
-		 */
-		phba->sli4_hba.nvme_cq_map = kcalloc(phba->cfg_nvme_io_channel,
-							sizeof(uint16_t),
-							GFP_KERNEL);
-		if (!phba->sli4_hba.nvme_cq_map) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"6078 Failed allocate memory for "
-					"fast-path CQ map\n");
-			goto out_error;
-		}
-
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		if (phba->nvmet_support) {
 			phba->sli4_hba.nvmet_cqset = kcalloc(
 					phba->cfg_nvmet_mrq,
@@ -8548,8 +8813,19 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
 	/* Create HBA Event Queues (EQs) */
-	for (idx = 0; idx < io_channel; idx++) {
-		/* Create EQs */
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		/*
+		 * If there are more Hardware Queues than available
+		 * CQs, multiple Hardware Queues may share a common EQ.
+		 */
+		if (idx >= phba->cfg_irq_chann) {
+			/* Share an existing EQ */
+			eqidx = lpfc_find_eq_handle(phba, idx);
+			phba->sli4_hba.hdwq[idx].hba_eq =
+				phba->sli4_hba.hdwq[eqidx].hba_eq;
+			continue;
+		}
+		/* Create an EQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount);
@@ -8559,33 +8835,51 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 		}
 		qdesc->qe_valid = 1;
-		phba->sli4_hba.hba_eq[idx] = qdesc;
+		qdesc->hdwq = idx;
+
+		/* Save the CPU this EQ is affinitised to */
+		eqidx = lpfc_find_eq_handle(phba, idx);
+		qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
+						    LPFC_FIND_BY_EQ);
+		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+		qdesc->last_cpu = qdesc->chann;
+		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
+		list_add(&qdesc->cpu_list, &eqi->list);
 	}
 
-	/* FCP and NVME io channels are not required to be balanced */
 
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+	/* Allocate SCSI SLI4 CQ/WQs */
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
 		if (lpfc_alloc_fcp_wq_cq(phba, idx))
 			goto out_error;
+	}
 
-	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
-		if (lpfc_alloc_nvme_wq_cq(phba, idx))
-			goto out_error;
+	/* Allocate NVME SLI4 CQ/WQs */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			if (lpfc_alloc_nvme_wq_cq(phba, idx))
+				goto out_error;
+		}
 
-	if (phba->nvmet_support) {
-		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
-			qdesc = lpfc_sli4_queue_alloc(phba,
+		if (phba->nvmet_support) {
+			for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+				qdesc = lpfc_sli4_queue_alloc(
+						      phba,
 						      LPFC_DEFAULT_PAGE_SIZE,
 						      phba->sli4_hba.cq_esize,
 						      phba->sli4_hba.cq_ecount);
-			if (!qdesc) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3142 Failed allocate NVME "
-					"CQ Set (%d)\n", idx);
-				goto out_error;
+				if (!qdesc) {
+					lpfc_printf_log(
+						phba, KERN_ERR, LOG_INIT,
+						"3142 Failed allocate NVME "
+						"CQ Set (%d)\n", idx);
+					goto out_error;
+				}
+				qdesc->qe_valid = 1;
+				qdesc->hdwq = idx;
+				qdesc->chann = idx;
+				phba->sli4_hba.nvmet_cqset[idx] = qdesc;
 			}
-			qdesc->qe_valid = 1;
-			phba->sli4_hba.nvmet_cqset[idx] = qdesc;
 		}
 	}
 
@@ -8615,6 +8909,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 	qdesc->qe_valid = 1;
+	qdesc->chann = 0;
 	phba->sli4_hba.els_cq = qdesc;
 
 
@@ -8632,6 +8927,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				"0505 Failed allocate slow-path MQ\n");
 		goto out_error;
 	}
+	qdesc->chann = 0;
 	phba->sli4_hba.mbx_wq = qdesc;
 
 	/*
@@ -8647,6 +8943,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				"0504 Failed allocate slow-path ELS WQ\n");
 		goto out_error;
 	}
+	qdesc->chann = 0;
 	phba->sli4_hba.els_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 
@@ -8660,6 +8957,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"6079 Failed allocate NVME LS CQ\n");
 			goto out_error;
 		}
+		qdesc->chann = 0;
 		qdesc->qe_valid = 1;
 		phba->sli4_hba.nvmels_cq = qdesc;
 
@@ -8672,6 +8970,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"6080 Failed allocate NVME LS WQ\n");
 			goto out_error;
 		}
+		qdesc->chann = 0;
 		phba->sli4_hba.nvmels_wq = qdesc;
 		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	}
@@ -8702,7 +9001,8 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	}
 	phba->sli4_hba.dat_rq = qdesc;
 
-	if (phba->nvmet_support) {
+	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+	    phba->nvmet_support) {
 		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
@@ -8715,6 +9015,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 						"receive HRQ\n");
 				goto out_error;
 			}
+			qdesc->hdwq = idx;
 			phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
 
 			/* Only needed for header of RQ pair */
@@ -8741,13 +9042,29 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 						"receive DRQ\n");
 				goto out_error;
 			}
+			qdesc->hdwq = idx;
 			phba->sli4_hba.nvmet_mrq_data[idx] = qdesc;
 		}
 	}
 
-	/* Create the Queues needed for Flash Optimized Fabric operations */
-	if (phba->cfg_fof)
-		lpfc_fof_queue_create(phba);
+#if defined(BUILD_NVME)
+	/* Clear NVME stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].nvme_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
+		}
+	}
+#endif
+
+	/* Clear SCSI stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].scsi_cstat));
+		}
+	}
+
 	return 0;
 
 out_error:
@@ -8780,11 +9097,25 @@ lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
 }
 
 static inline void
-lpfc_sli4_release_queue_map(uint16_t **qmap)
+lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 {
-	if (*qmap != NULL) {
-		kfree(*qmap);
-		*qmap = NULL;
+	struct lpfc_sli4_hdw_queue *hdwq;
+	uint32_t idx;
+
+	hdwq = phba->sli4_hba.hdwq;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		if (idx < phba->cfg_irq_chann)
+			lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+		hdwq[idx].hba_eq = NULL;
+
+		lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
+		lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
+		lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
+		lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
+		hdwq[idx].fcp_cq = NULL;
+		hdwq[idx].nvme_cq = NULL;
+		hdwq[idx].fcp_wq = NULL;
+		hdwq[idx].nvme_wq = NULL;
 	}
 }
 
@@ -8803,33 +9134,9 @@ lpfc_sli4_release_queue_map(uint16_t **qmap)
 void
 lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 {
-	if (phba->cfg_fof)
-		lpfc_fof_queue_destroy(phba);
-
 	/* Release HBA eqs */
-	lpfc_sli4_release_queues(&phba->sli4_hba.hba_eq, phba->io_channel_irqs);
-
-	/* Release FCP cqs */
-	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq,
-				 phba->cfg_fcp_io_channel);
-
-	/* Release FCP wqs */
-	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq,
-				 phba->cfg_fcp_io_channel);
-
-	/* Release FCP CQ mapping array */
-	lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map);
-
-	/* Release NVME cqs */
-	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_cq,
-					phba->cfg_nvme_io_channel);
-
-	/* Release NVME wqs */
-	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_wq,
-					phba->cfg_nvme_io_channel);
-
-	/* Release NVME CQ mapping array */
-	lpfc_sli4_release_queue_map(&phba->sli4_hba.nvme_cq_map);
+	if (phba->sli4_hba.hdwq)
+		lpfc_sli4_release_hdwq(phba);
 
 	if (phba->nvmet_support) {
 		lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
@@ -8910,10 +9217,9 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 			qidx, (uint32_t)rc);
 		return rc;
 	}
-	cq->chann = qidx;
 
 	if (qtype != LPFC_MBOX) {
-		/* Setup nvme_cq_map for fast lookup */
+		/* Setup cq_map for fast lookup */
 		if (cq_map)
 			*cq_map = cq->queue_id;
 
@@ -8930,7 +9236,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 			/* no need to tear down cq - caller will do so */
 			return rc;
 		}
-		wq->chann = qidx;
 
 		/* Bind this CQ/WQ to the NVME ring */
 		pring = wq->pring;
@@ -8960,6 +9265,38 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 }
 
 /**
+ * lpfc_setup_cq_lookup - Setup the CQ lookup table
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine will populate the cq_lookup table by all
+ * available CQ queue_id's.
+ **/
+void
+lpfc_setup_cq_lookup(struct lpfc_hba *phba)
+{
+	struct lpfc_queue *eq, *childq;
+	struct lpfc_sli4_hdw_queue *qp;
+	int qidx;
+
+	qp = phba->sli4_hba.hdwq;
+	memset(phba->sli4_hba.cq_lookup, 0,
+	       (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+		eq = qp[qidx].hba_eq;
+		if (!eq)
+			continue;
+		list_for_each_entry(childq, &eq->child_list, list) {
+			if (childq->queue_id > phba->sli4_hba.cq_max)
+				continue;
+			if ((childq->subtype == LPFC_FCP) ||
+			    (childq->subtype == LPFC_NVME))
+				phba->sli4_hba.cq_lookup[childq->queue_id] =
+					childq;
+		}
+	}
+}
+
+/**
  * lpfc_sli4_queue_setup - Set up all the SLI4 queues
  * @phba: pointer to lpfc hba data structure.
  *
@@ -8976,9 +9313,10 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
+	struct lpfc_sli4_hdw_queue *qp;
 	LPFC_MBOXQ_t *mboxq;
 	int qidx;
-	uint32_t length, io_channel;
+	uint32_t length, usdelay;
 	int rc = -ENOMEM;
 
 	/* Check for dual-ULP support */
@@ -9029,25 +9367,25 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	/*
 	 * Set up HBA Event Queues (EQs)
 	 */
-	io_channel = phba->io_channel_irqs;
+	qp = phba->sli4_hba.hdwq;
 
 	/* Set up HBA event queue */
-	if (io_channel && !phba->sli4_hba.hba_eq) {
+	if (!qp) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3147 Fast-path EQs not allocated\n");
 		rc = -ENOMEM;
 		goto out_error;
 	}
-	for (qidx = 0; qidx < io_channel; qidx++) {
-		if (!phba->sli4_hba.hba_eq[qidx]) {
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+		if (!qp[qidx].hba_eq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0522 Fast-path EQ (%d) not "
 					"allocated\n", qidx);
 			rc = -ENOMEM;
 			goto out_destroy;
 		}
-		rc = lpfc_eq_create(phba, phba->sli4_hba.hba_eq[qidx],
-						phba->cfg_fcp_imax);
+		rc = lpfc_eq_create(phba, qp[qidx].hba_eq,
+				    phba->cfg_fcp_imax);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0523 Failed setup of fast-path EQ "
@@ -9056,26 +9394,17 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			goto out_destroy;
 		}
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2584 HBA EQ setup: queue[%d]-id=%d\n",
-				qidx, phba->sli4_hba.hba_eq[qidx]->queue_id);
+				"2584 HBA EQ setup: queue[%d]-id=%d\n", qidx,
+				qp[qidx].hba_eq->queue_id);
 	}
 
-	if (phba->cfg_nvme_io_channel) {
-		if (!phba->sli4_hba.nvme_cq || !phba->sli4_hba.nvme_wq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"6084 Fast-path NVME %s array not allocated\n",
-				(phba->sli4_hba.nvme_cq) ? "CQ" : "WQ");
-			rc = -ENOMEM;
-			goto out_destroy;
-		}
-
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
 			rc = lpfc_create_wq_cq(phba,
-					phba->sli4_hba.hba_eq[
-						qidx % io_channel],
-					phba->sli4_hba.nvme_cq[qidx],
-					phba->sli4_hba.nvme_wq[qidx],
-					&phba->sli4_hba.nvme_cq_map[qidx],
+					qp[qidx].hba_eq,
+					qp[qidx].nvme_cq,
+					qp[qidx].nvme_wq,
+					&phba->sli4_hba.hdwq[qidx].nvme_cq_map,
 					qidx, LPFC_NVME);
 			if (rc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9087,31 +9416,19 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		}
 	}
 
-	if (phba->cfg_fcp_io_channel) {
-		/* Set up fast-path FCP Response Complete Queue */
-		if (!phba->sli4_hba.fcp_cq || !phba->sli4_hba.fcp_wq) {
+	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+		rc = lpfc_create_wq_cq(phba,
+				       qp[qidx].hba_eq,
+				       qp[qidx].fcp_cq,
+				       qp[qidx].fcp_wq,
+				       &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
+				       qidx, LPFC_FCP);
+		if (rc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3148 Fast-path FCP %s array not allocated\n",
-				phba->sli4_hba.fcp_cq ? "WQ" : "CQ");
-			rc = -ENOMEM;
-			goto out_destroy;
-		}
-
-		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
-			rc = lpfc_create_wq_cq(phba,
-					phba->sli4_hba.hba_eq[
-						qidx % io_channel],
-					phba->sli4_hba.fcp_cq[qidx],
-					phba->sli4_hba.fcp_wq[qidx],
-					&phba->sli4_hba.fcp_cq_map[qidx],
-					qidx, LPFC_FCP);
-			if (rc) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0535 Failed to setup fastpath "
 					"FCP WQ/CQ (%d), rc = 0x%x\n",
 					qidx, (uint32_t)rc);
-				goto out_destroy;
-			}
+			goto out_destroy;
 		}
 	}
 
@@ -9130,7 +9447,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		goto out_destroy;
 	}
 
-	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+	rc = lpfc_create_wq_cq(phba, qp[0].hba_eq,
 			       phba->sli4_hba.mbx_cq,
 			       phba->sli4_hba.mbx_wq,
 			       NULL, 0, LPFC_MBOX);
@@ -9151,7 +9468,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		if (phba->cfg_nvmet_mrq > 1) {
 			rc = lpfc_cq_create_set(phba,
 					phba->sli4_hba.nvmet_cqset,
-					phba->sli4_hba.hba_eq,
+					qp,
 					LPFC_WCQ, LPFC_NVMET);
 			if (rc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9163,7 +9480,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		} else {
 			/* Set up NVMET Receive Complete Queue */
 			rc = lpfc_cq_create(phba, phba->sli4_hba.nvmet_cqset[0],
-					    phba->sli4_hba.hba_eq[0],
+					    qp[0].hba_eq,
 					    LPFC_WCQ, LPFC_NVMET);
 			if (rc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9177,7 +9494,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 					"6090 NVMET CQ setup: cq-id=%d, "
 					"parent eq-id=%d\n",
 					phba->sli4_hba.nvmet_cqset[0]->queue_id,
-					phba->sli4_hba.hba_eq[0]->queue_id);
+					qp[0].hba_eq->queue_id);
 		}
 	}
 
@@ -9189,14 +9506,14 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		rc = -ENOMEM;
 		goto out_destroy;
 	}
-	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
-					phba->sli4_hba.els_cq,
-					phba->sli4_hba.els_wq,
-					NULL, 0, LPFC_ELS);
+	rc = lpfc_create_wq_cq(phba, qp[0].hba_eq,
+			       phba->sli4_hba.els_cq,
+			       phba->sli4_hba.els_wq,
+			       NULL, 0, LPFC_ELS);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"0529 Failed setup of ELS WQ/CQ: rc = 0x%x\n",
-			(uint32_t)rc);
+				"0525 Failed setup of ELS WQ/CQ: rc = 0x%x\n",
+				(uint32_t)rc);
 		goto out_destroy;
 	}
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -9204,7 +9521,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			phba->sli4_hba.els_wq->queue_id,
 			phba->sli4_hba.els_cq->queue_id);
 
-	if (phba->cfg_nvme_io_channel) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		/* Set up NVME LS Complete Queue */
 		if (!phba->sli4_hba.nvmels_cq || !phba->sli4_hba.nvmels_wq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9213,14 +9530,14 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			rc = -ENOMEM;
 			goto out_destroy;
 		}
-		rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
-					phba->sli4_hba.nvmels_cq,
-					phba->sli4_hba.nvmels_wq,
-					NULL, 0, LPFC_NVME_LS);
+		rc = lpfc_create_wq_cq(phba, qp[0].hba_eq,
+				       phba->sli4_hba.nvmels_cq,
+				       phba->sli4_hba.nvmels_wq,
+				       NULL, 0, LPFC_NVME_LS);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0529 Failed setup of NVVME LS WQ/CQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
+					"0526 Failed setup of NVVME LS WQ/CQ: "
+					"rc = 0x%x\n", (uint32_t)rc);
 			goto out_destroy;
 		}
 
@@ -9306,20 +9623,29 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			phba->sli4_hba.dat_rq->queue_id,
 			phba->sli4_hba.els_cq->queue_id);
 
-	if (phba->cfg_fof) {
-		rc = lpfc_fof_queue_setup(phba);
-		if (rc) {
+	if (phba->cfg_fcp_imax)
+		usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax;
+	else
+		usdelay = 0;
+
+	for (qidx = 0; qidx < phba->cfg_irq_chann;
+	     qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
+		lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
+					 usdelay);
+
+	if (phba->sli4_hba.cq_max) {
+		kfree(phba->sli4_hba.cq_lookup);
+		phba->sli4_hba.cq_lookup = kcalloc((phba->sli4_hba.cq_max + 1),
+			sizeof(struct lpfc_queue *), GFP_KERNEL);
+		if (!phba->sli4_hba.cq_lookup) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0549 Failed setup of FOF Queues: "
-					"rc = 0x%x\n", rc);
+					"0549 Failed setup of CQ Lookup table: "
+					"size 0x%x\n", phba->sli4_hba.cq_max);
+			rc = -ENOMEM;
 			goto out_destroy;
 		}
+		lpfc_setup_cq_lookup(phba);
 	}
-
-	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
-		lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
-					 phba->cfg_fcp_imax);
-
 	return 0;
 
 out_destroy:
@@ -9343,12 +9669,9 @@ out_error:
 void
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
+	struct lpfc_sli4_hdw_queue *qp;
 	int qidx;
 
-	/* Unset the queues created for Flash Optimized Fabric operations */
-	if (phba->cfg_fof)
-		lpfc_fof_queue_destroy(phba);
-
 	/* Unset mailbox command work queue */
 	if (phba->sli4_hba.mbx_wq)
 		lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
@@ -9366,17 +9689,6 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq,
 				phba->sli4_hba.dat_rq);
 
-	/* Unset FCP work queue */
-	if (phba->sli4_hba.fcp_wq)
-		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
-			lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[qidx]);
-
-	/* Unset NVME work queue */
-	if (phba->sli4_hba.nvme_wq) {
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
-			lpfc_wq_destroy(phba, phba->sli4_hba.nvme_wq[qidx]);
-	}
-
 	/* Unset mailbox command complete queue */
 	if (phba->sli4_hba.mbx_cq)
 		lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
@@ -9389,11 +9701,6 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 	if (phba->sli4_hba.nvmels_cq)
 		lpfc_cq_destroy(phba, phba->sli4_hba.nvmels_cq);
 
-	/* Unset NVME response complete queue */
-	if (phba->sli4_hba.nvme_cq)
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
-			lpfc_cq_destroy(phba, phba->sli4_hba.nvme_cq[qidx]);
-
 	if (phba->nvmet_support) {
 		/* Unset NVMET MRQ queue */
 		if (phba->sli4_hba.nvmet_mrq_hdr) {
@@ -9412,15 +9719,22 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		}
 	}
 
-	/* Unset FCP response complete queue */
-	if (phba->sli4_hba.fcp_cq)
-		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
-			lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[qidx]);
+	/* Unset fast-path SLI4 queues */
+	if (phba->sli4_hba.hdwq) {
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			qp = &phba->sli4_hba.hdwq[qidx];
+			lpfc_wq_destroy(phba, qp->fcp_wq);
+			lpfc_wq_destroy(phba, qp->nvme_wq);
+			lpfc_cq_destroy(phba, qp->fcp_cq);
+			lpfc_cq_destroy(phba, qp->nvme_cq);
+			if (qidx < phba->cfg_irq_chann)
+				lpfc_eq_destroy(phba, qp->hba_eq);
+		}
+	}
 
-	/* Unset fast-path event queue */
-	if (phba->sli4_hba.hba_eq)
-		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
-			lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[qidx]);
+	kfree(phba->sli4_hba.cq_lookup);
+	phba->sli4_hba.cq_lookup = NULL;
+	phba->sli4_hba.cq_max = 0;
 }
 
 /**
@@ -9742,11 +10056,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 	uint32_t if_type;
 
 	if (!pdev)
-		return error;
+		return -ENODEV;
 
 	/* Set the device DMA mask size */
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) ||
-	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+	error = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (error)
+		error = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (error)
 		return error;
 
 	/*
@@ -9808,7 +10124,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 			dev_printk(KERN_ERR, &pdev->dev,
 				"ioremap failed for SLI4 PCI config "
 				"registers.\n");
-				goto out;
+			goto out;
 		}
 		lpfc_sli4_bar0_register_memmap(phba, if_type);
 	}
@@ -9913,13 +10229,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 	case LPFC_SLI_INTF_IF_TYPE_0:
 	case LPFC_SLI_INTF_IF_TYPE_2:
 		phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_eq_clr_intr;
-		phba->sli4_hba.sli4_eq_release = lpfc_sli4_eq_release;
-		phba->sli4_hba.sli4_cq_release = lpfc_sli4_cq_release;
+		phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_write_eq_db;
+		phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_write_cq_db;
 		break;
 	case LPFC_SLI_INTF_IF_TYPE_6:
 		phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_if6_eq_clr_intr;
-		phba->sli4_hba.sli4_eq_release = lpfc_sli4_if6_eq_release;
-		phba->sli4_hba.sli4_cq_release = lpfc_sli4_if6_cq_release;
+		phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_if6_write_eq_db;
+		phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_if6_write_cq_db;
 		break;
 	default:
 		break;
@@ -10200,25 +10516,97 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * @phba: pointer to lpfc hba data structure.
+ * @id: EQ vector index or Hardware Queue index
+ * @match: LPFC_FIND_BY_EQ = match by EQ
+ *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ */
+static uint16_t
+lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu;
+
+	/* Find the desired phys_id for the specified EQ */
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if ((match == LPFC_FIND_BY_EQ) &&
+		    (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+		    (cpup->eq == id))
+			return cpu;
+		if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
+			return cpu;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
+ *                       Hardware Queue
+ * @phba: pointer to lpfc hba data structure.
+ * @hdwq: Hardware Queue index
+ */
+static uint16_t
+lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu;
+
+	/* Find the desired phys_id for the specified EQ */
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if (cpup->hdwq == hdwq)
+			return cpup->eq;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_X86
+/**
+ * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
+ * @phba: pointer to lpfc hba data structure.
+ * @cpu: CPU map index
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ */
+static int
+lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
+		uint16_t phys_id, uint16_t core_id)
+{
+	struct lpfc_vector_map_info *cpup;
+	int idx;
+
+	for_each_present_cpu(idx) {
+		cpup = &phba->sli4_hba.cpu_map[idx];
+		/* Does the cpup match the one we are looking for */
+		if ((cpup->phys_id == phys_id) &&
+		    (cpup->core_id == core_id) &&
+		    (cpu != idx))
+			return 1;
+	}
+	return 0;
+}
+#endif
+
+/**
  * lpfc_cpu_affinity_check - Check vector CPU affinity mappings
  * @phba: pointer to lpfc hba data structure.
  * @vectors: number of msix vectors allocated.
  *
  * The routine will figure out the CPU affinity assignment for every
- * MSI-X vector allocated for the HBA.  The hba_eq_hdl will be updated
- * with a pointer to the CPU mask that defines ALL the CPUs this vector
- * can be associated with. If the vector can be unquely associated with
- * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * MSI-X vector allocated for the HBA.
  * In addition, the CPU to IO channel mapping will be calculated
  * and the phba->sli4_hba.cpu_map array will reflect this.
  */
 static void
 lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
+	int i, cpu, idx;
+	int max_phys_id, min_phys_id;
+	int max_core_id, min_core_id;
 	struct lpfc_vector_map_info *cpup;
-	int index = 0;
-	int vec = 0;
-	int cpu;
+	const struct cpumask *maskp;
 #ifdef CONFIG_X86
 	struct cpuinfo_x86 *cpuinfo;
 #endif
@@ -10226,32 +10614,71 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 	/* Init cpu_map array */
 	memset(phba->sli4_hba.cpu_map, 0xff,
 	       (sizeof(struct lpfc_vector_map_info) *
-	       phba->sli4_hba.num_present_cpu));
+	       phba->sli4_hba.num_possible_cpu));
+
+	max_phys_id = 0;
+	min_phys_id = 0xffff;
+	max_core_id = 0;
+	min_core_id = 0xffff;
 
 	/* Update CPU map with physical id and core id of each CPU */
-	cpup = phba->sli4_hba.cpu_map;
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
 #ifdef CONFIG_X86
 		cpuinfo = &cpu_data(cpu);
 		cpup->phys_id = cpuinfo->phys_proc_id;
 		cpup->core_id = cpuinfo->cpu_core_id;
+		cpup->hyper = lpfc_find_hyper(phba, cpu,
+					      cpup->phys_id, cpup->core_id);
 #else
 		/* No distinction between CPUs for other platforms */
 		cpup->phys_id = 0;
-		cpup->core_id = 0;
+		cpup->core_id = cpu;
+		cpup->hyper = 0;
 #endif
-		cpup->channel_id = index;  /* For now round robin */
-		cpup->irq = pci_irq_vector(phba->pcidev, vec);
-		vec++;
-		if (vec >= vectors)
-			vec = 0;
-		index++;
-		if (index >= phba->cfg_fcp_io_channel)
-			index = 0;
-		cpup++;
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"3328 CPU physid %d coreid %d\n",
+				cpup->phys_id, cpup->core_id);
+
+		if (cpup->phys_id > max_phys_id)
+			max_phys_id = cpup->phys_id;
+		if (cpup->phys_id < min_phys_id)
+			min_phys_id = cpup->phys_id;
+
+		if (cpup->core_id > max_core_id)
+			max_core_id = cpup->core_id;
+		if (cpup->core_id < min_core_id)
+			min_core_id = cpup->core_id;
 	}
-}
 
+	for_each_possible_cpu(i) {
+		struct lpfc_eq_intr_info *eqi =
+			per_cpu_ptr(phba->sli4_hba.eq_info, i);
+
+		INIT_LIST_HEAD(&eqi->list);
+		eqi->icnt = 0;
+	}
+
+	for (idx = 0; idx <  phba->cfg_irq_chann; idx++) {
+		maskp = pci_irq_get_affinity(phba->pcidev, idx);
+		if (!maskp)
+			continue;
+
+		for_each_cpu_and(cpu, maskp, cpu_present_mask) {
+			cpup = &phba->sli4_hba.cpu_map[cpu];
+			cpup->eq = idx;
+			cpup->hdwq = idx;
+			cpup->irq = pci_irq_vector(phba->pcidev, idx);
+
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3336 Set Affinity: CPU %d "
+					"hdwq %d irq %d\n",
+					cpu, cpup->hdwq, cpup->irq);
+		}
+	}
+	return;
+}
 
 /**
  * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
@@ -10271,12 +10698,10 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	char *name;
 
 	/* Set up MSI-X multi-message vectors */
-	vectors = phba->io_channel_irqs;
-	if (phba->cfg_fof)
-		vectors++;
+	vectors = phba->cfg_irq_chann;
 
 	rc = pci_alloc_irq_vectors(phba->pcidev,
-				(phba->nvmet_support) ? 1 : 2,
+				1,
 				vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -10294,17 +10719,10 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 
 		phba->sli4_hba.hba_eq_hdl[index].idx = index;
 		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
-		atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
-		if (phba->cfg_fof && (index == (vectors - 1)))
-			rc = request_irq(pci_irq_vector(phba->pcidev, index),
-				 &lpfc_sli4_fof_intr_handler, 0,
-				 name,
-				 &phba->sli4_hba.hba_eq_hdl[index]);
-		else
-			rc = request_irq(pci_irq_vector(phba->pcidev, index),
-				 &lpfc_sli4_hba_intr_handler, 0,
-				 name,
-				 &phba->sli4_hba.hba_eq_hdl[index]);
+		rc = request_irq(pci_irq_vector(phba->pcidev, index),
+			 &lpfc_sli4_hba_intr_handler, 0,
+			 name,
+			 &phba->sli4_hba.hba_eq_hdl[index]);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
 					"0486 MSI-X fast-path (%d) "
@@ -10313,24 +10731,16 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 		}
 	}
 
-	if (phba->cfg_fof)
-		vectors--;
-
-	if (vectors != phba->io_channel_irqs) {
+	if (vectors != phba->cfg_irq_chann) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3238 Reducing IO channels to match number of "
 				"MSI-X vectors, requested %d got %d\n",
-				phba->io_channel_irqs, vectors);
-		if (phba->cfg_fcp_io_channel > vectors)
-			phba->cfg_fcp_io_channel = vectors;
-		if (phba->cfg_nvme_io_channel > vectors)
-			phba->cfg_nvme_io_channel = vectors;
-		if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
-			phba->io_channel_irqs = phba->cfg_fcp_io_channel;
-		else
-			phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+				phba->cfg_irq_chann, vectors);
+		if (phba->cfg_irq_chann > vectors)
+			phba->cfg_irq_chann = vectors;
+		if (phba->cfg_nvmet_mrq > vectors)
+			phba->cfg_nvmet_mrq = vectors;
 	}
-	lpfc_cpu_affinity_check(phba, vectors);
 
 	return rc;
 
@@ -10385,15 +10795,11 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 		return rc;
 	}
 
-	for (index = 0; index < phba->io_channel_irqs; index++) {
+	for (index = 0; index < phba->cfg_irq_chann; index++) {
 		phba->sli4_hba.hba_eq_hdl[index].idx = index;
 		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
 
-	if (phba->cfg_fof) {
-		phba->sli4_hba.hba_eq_hdl[index].idx = index;
-		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
-	}
 	return 0;
 }
 
@@ -10454,17 +10860,10 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 			phba->intr_type = INTx;
 			intr_mode = 0;
 
-			for (idx = 0; idx < phba->io_channel_irqs; idx++) {
-				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
-				eqhdl->idx = idx;
-				eqhdl->phba = phba;
-				atomic_set(&eqhdl->hba_eq_in_use, 1);
-			}
-			if (phba->cfg_fof) {
+			for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
 				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
 				eqhdl->idx = idx;
 				eqhdl->phba = phba;
-				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 		}
 	}
@@ -10488,13 +10887,13 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 		int index;
 
 		/* Free up MSI-X multi-message vectors */
-		for (index = 0; index < phba->io_channel_irqs; index++)
-			free_irq(pci_irq_vector(phba->pcidev, index),
-					&phba->sli4_hba.hba_eq_hdl[index]);
-
-		if (phba->cfg_fof)
+		for (index = 0; index < phba->cfg_irq_chann; index++) {
+			irq_set_affinity_hint(
+				pci_irq_vector(phba->pcidev, index),
+				NULL);
 			free_irq(pci_irq_vector(phba->pcidev, index),
 					&phba->sli4_hba.hba_eq_hdl[index]);
+		}
 	} else {
 		free_irq(phba->pcidev->irq, phba);
 	}
@@ -10555,8 +10954,10 @@ lpfc_unset_hba(struct lpfc_hba *phba)
 static void
 lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 {
+	struct lpfc_sli4_hdw_queue *qp;
+	int idx, ccnt, fcnt;
 	int wait_time = 0;
-	int nvme_xri_cmpl = 1;
+	int io_xri_cmpl = 1;
 	int nvmet_xri_cmpl = 1;
 	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
@@ -10571,17 +10972,32 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
 		lpfc_nvme_wait_for_io_drain(phba);
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
-		fcp_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	ccnt = 0;
+	fcnt = 0;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		qp = &phba->sli4_hba.hdwq[idx];
+		fcp_xri_cmpl = list_empty(
+			&qp->lpfc_abts_scsi_buf_list);
+		if (!fcp_xri_cmpl) /* if list is NOT empty */
+			fcnt++;
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+			io_xri_cmpl = list_empty(
+				&qp->lpfc_abts_nvme_buf_list);
+			if (!io_xri_cmpl) /* if list is NOT empty */
+				ccnt++;
+		}
+	}
+	if (ccnt)
+		io_xri_cmpl = 0;
+	if (fcnt)
+		fcp_xri_cmpl = 0;
+
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-		nvme_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		nvmet_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 	}
 
-	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl ||
+	while (!fcp_xri_cmpl || !els_xri_cmpl || !io_xri_cmpl ||
 	       !nvmet_xri_cmpl) {
 		if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
 			if (!nvmet_xri_cmpl)
@@ -10589,7 +11005,7 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 						"6424 NVMET XRI exchange busy "
 						"wait time: %d seconds.\n",
 						wait_time/1000);
-			if (!nvme_xri_cmpl)
+			if (!io_xri_cmpl)
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6100 NVME XRI exchange busy "
 						"wait time: %d seconds.\n",
@@ -10610,17 +11026,31 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
 			wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1;
 		}
+
+		ccnt = 0;
+		fcnt = 0;
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			qp = &phba->sli4_hba.hdwq[idx];
+			fcp_xri_cmpl = list_empty(
+				&qp->lpfc_abts_scsi_buf_list);
+			if (!fcp_xri_cmpl) /* if list is NOT empty */
+				fcnt++;
+			if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+				io_xri_cmpl = list_empty(
+				    &qp->lpfc_abts_nvme_buf_list);
+				if (!io_xri_cmpl) /* if list is NOT empty */
+					ccnt++;
+			}
+		}
+		if (ccnt)
+			io_xri_cmpl = 0;
+		if (fcnt)
+			fcp_xri_cmpl = 0;
+
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			nvme_xri_cmpl = list_empty(
-				&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 			nvmet_xri_cmpl = list_empty(
 				&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 		}
-
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
-			fcp_xri_cmpl = list_empty(
-				&phba->sli4_hba.lpfc_abts_scsi_buf_list);
-
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
@@ -10645,7 +11075,8 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
 	struct pci_dev *pdev = phba->pcidev;
 
 	lpfc_stop_hba_timers(phba);
-	phba->sli4_hba.intr_enable = 0;
+	if (phba->pport)
+		phba->sli4_hba.intr_enable = 0;
 
 	/*
 	 * Gracefully wait out the potential current outstanding asynchronous
@@ -10864,8 +11295,6 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 		phba->nvme_support = 0;
 		phba->nvmet_support = 0;
 		phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_OFF;
-		phba->cfg_nvme_io_channel = 0;
-		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
 				"6101 Disabling NVME support: "
 				"Not supported by firmware: %d %d\n",
@@ -11190,6 +11619,8 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	 * corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
+	lpfc_free_iocb_list(phba);
+
 	lpfc_mem_free_all(phba);
 
 	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
@@ -11815,28 +12246,11 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* Get the default values for Model Name and Description */
 	lpfc_get_hba_model_desc(phba, phba->ModelName, phba->ModelDesc);
 
-	/* Create SCSI host to the physical port */
-	error = lpfc_create_shost(phba);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1415 Failed to create scsi host.\n");
-		goto out_unset_driver_resource;
-	}
-
-	/* Configure sysfs attributes */
-	vport = phba->pport;
-	error = lpfc_alloc_sysfs_attr(vport);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1416 Failed to allocate sysfs attr\n");
-		goto out_destroy_shost;
-	}
-
-	shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */
 	/* Now, trying to enable interrupt and bring up the device */
 	cfg_mode = phba->cfg_use_msi;
 
 	/* Put device to a known state before enabling interrupt */
+	phba->pport = NULL;
 	lpfc_stop_port(phba);
 
 	/* Configure and enable interrupt */
@@ -11845,18 +12259,34 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0426 Failed to enable interrupt.\n");
 		error = -ENODEV;
-		goto out_free_sysfs_attr;
+		goto out_unset_driver_resource;
 	}
 	/* Default to single EQ for non-MSI-X */
 	if (phba->intr_type != MSIX) {
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
-			phba->cfg_fcp_io_channel = 1;
+		phba->cfg_irq_chann = 1;
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			phba->cfg_nvme_io_channel = 1;
 			if (phba->nvmet_support)
 				phba->cfg_nvmet_mrq = 1;
 		}
-		phba->io_channel_irqs = 1;
+	}
+	lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
+
+	/* Create SCSI host to the physical port */
+	error = lpfc_create_shost(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1415 Failed to create scsi host.\n");
+		goto out_disable_intr;
+	}
+	vport = phba->pport;
+	shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */
+
+	/* Configure sysfs attributes */
+	error = lpfc_alloc_sysfs_attr(vport);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1416 Failed to allocate sysfs attr\n");
+		goto out_destroy_shost;
 	}
 
 	/* Set up SLI-4 HBA */
@@ -11864,7 +12294,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1421 Failed to set up hba\n");
 		error = -ENODEV;
-		goto out_disable_intr;
+		goto out_free_sysfs_attr;
 	}
 
 	/* Log the current active interrupt mode */
@@ -11877,19 +12307,20 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* NVME support in FW earlier in the driver load corrects the
 	 * FC4 type making a check for nvme_support unnecessary.
 	 */
-	if ((phba->nvmet_support == 0) &&
-	    (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
-		/* Create NVME binding with nvme_fc_transport. This
-		 * ensures the vport is initialized.  If the localport
-		 * create fails, it should not unload the driver to
-		 * support field issues.
-		 */
-		error = lpfc_nvme_create_localport(vport);
-		if (error) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"6004 NVME registration failed, "
-					"error x%x\n",
-					error);
+	if (phba->nvmet_support == 0) {
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+			/* Create NVME binding with nvme_fc_transport. This
+			 * ensures the vport is initialized.  If the localport
+			 * create fails, it should not unload the driver to
+			 * support field issues.
+			 */
+			error = lpfc_nvme_create_localport(vport);
+			if (error) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6004 NVME registration "
+						"failed, error x%x\n",
+						error);
+			}
 		}
 	}
 
@@ -11905,12 +12336,12 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 
 	return 0;
 
-out_disable_intr:
-	lpfc_sli4_disable_intr(phba);
 out_free_sysfs_attr:
 	lpfc_free_sysfs_attr(vport);
 out_destroy_shost:
 	lpfc_destroy_shost(phba);
+out_disable_intr:
+	lpfc_sli4_disable_intr(phba);
 out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
 out_unset_driver_resource_s4:
@@ -11973,13 +12404,16 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	lpfc_nvmet_destroy_targetport(phba);
 	lpfc_nvme_destroy_localport(vport);
 
+	/* De-allocate multi-XRI pools */
+	if (phba->cfg_xri_rebalancing)
+		lpfc_destroy_multixri_pools(phba);
+
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
 	 * clears the rings, discards all mailbox commands, and resets
 	 * the HBA FCoE function.
 	 */
 	lpfc_debugfs_terminate(vport);
-	lpfc_sli4_hba_unset(phba);
 
 	lpfc_stop_hba_timers(phba);
 	spin_lock_irq(&phba->port_list_lock);
@@ -11989,9 +12423,9 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Perform scsi free before driver resource_unset since scsi
 	 * buffers are released to their corresponding pools here.
 	 */
-	lpfc_scsi_free(phba);
-	lpfc_nvme_free(phba);
+	lpfc_io_free(phba);
 	lpfc_free_iocb_list(phba);
+	lpfc_sli4_hba_unset(phba);
 
 	lpfc_unset_driver_resource_phase2(phba);
 	lpfc_sli4_driver_resource_unset(phba);
@@ -12653,165 +13087,6 @@ lpfc_sli4_ras_init(struct lpfc_hba *phba)
 	}
 }
 
-/**
- * lpfc_fof_queue_setup - Set up all the fof queues
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to set up all the fof queues for the FC HBA
- * operation.
- *
- * Return codes
- *      0 - successful
- *      -ENOMEM - No available memory
- **/
-int
-lpfc_fof_queue_setup(struct lpfc_hba *phba)
-{
-	struct lpfc_sli_ring *pring;
-	int rc;
-
-	rc = lpfc_eq_create(phba, phba->sli4_hba.fof_eq, LPFC_MAX_IMAX);
-	if (rc)
-		return -ENOMEM;
-
-	if (phba->cfg_fof) {
-
-		rc = lpfc_cq_create(phba, phba->sli4_hba.oas_cq,
-				    phba->sli4_hba.fof_eq, LPFC_WCQ, LPFC_FCP);
-		if (rc)
-			goto out_oas_cq;
-
-		rc = lpfc_wq_create(phba, phba->sli4_hba.oas_wq,
-				    phba->sli4_hba.oas_cq, LPFC_FCP);
-		if (rc)
-			goto out_oas_wq;
-
-		/* Bind this CQ/WQ to the NVME ring */
-		pring = phba->sli4_hba.oas_wq->pring;
-		pring->sli.sli4.wqp =
-			(void *)phba->sli4_hba.oas_wq;
-		phba->sli4_hba.oas_cq->pring = pring;
-	}
-
-	return 0;
-
-out_oas_wq:
-	lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq);
-out_oas_cq:
-	lpfc_eq_destroy(phba, phba->sli4_hba.fof_eq);
-	return rc;
-
-}
-
-/**
- * lpfc_fof_queue_create - Create all the fof queues
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to allocate all the fof queues for the FC HBA
- * operation. For each SLI4 queue type, the parameters such as queue entry
- * count (queue depth) shall be taken from the module parameter. For now,
- * we just use some constant number as place holder.
- *
- * Return codes
- *      0 - successful
- *      -ENOMEM - No availble memory
- *      -EIO - The mailbox failed to complete successfully.
- **/
-int
-lpfc_fof_queue_create(struct lpfc_hba *phba)
-{
-	struct lpfc_queue *qdesc;
-	uint32_t wqesize;
-
-	/* Create FOF EQ */
-	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
-				      phba->sli4_hba.eq_esize,
-				      phba->sli4_hba.eq_ecount);
-	if (!qdesc)
-		goto out_error;
-
-	qdesc->qe_valid = 1;
-	phba->sli4_hba.fof_eq = qdesc;
-
-	if (phba->cfg_fof) {
-
-		/* Create OAS CQ */
-		if (phba->enab_exp_wqcq_pages)
-			qdesc = lpfc_sli4_queue_alloc(phba,
-						      LPFC_EXPANDED_PAGE_SIZE,
-						      phba->sli4_hba.cq_esize,
-						      LPFC_CQE_EXP_COUNT);
-		else
-			qdesc = lpfc_sli4_queue_alloc(phba,
-						      LPFC_DEFAULT_PAGE_SIZE,
-						      phba->sli4_hba.cq_esize,
-						      phba->sli4_hba.cq_ecount);
-		if (!qdesc)
-			goto out_error;
-
-		qdesc->qe_valid = 1;
-		phba->sli4_hba.oas_cq = qdesc;
-
-		/* Create OAS WQ */
-		if (phba->enab_exp_wqcq_pages) {
-			wqesize = (phba->fcp_embed_io) ?
-				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
-			qdesc = lpfc_sli4_queue_alloc(phba,
-						      LPFC_EXPANDED_PAGE_SIZE,
-						      wqesize,
-						      LPFC_WQE_EXP_COUNT);
-		} else
-			qdesc = lpfc_sli4_queue_alloc(phba,
-						      LPFC_DEFAULT_PAGE_SIZE,
-						      phba->sli4_hba.wq_esize,
-						      phba->sli4_hba.wq_ecount);
-
-		if (!qdesc)
-			goto out_error;
-
-		phba->sli4_hba.oas_wq = qdesc;
-		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
-
-	}
-	return 0;
-
-out_error:
-	lpfc_fof_queue_destroy(phba);
-	return -ENOMEM;
-}
-
-/**
- * lpfc_fof_queue_destroy - Destroy all the fof queues
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to release all the SLI4 queues with the FC HBA
- * operation.
- *
- * Return codes
- *      0 - successful
- **/
-int
-lpfc_fof_queue_destroy(struct lpfc_hba *phba)
-{
-	/* Release FOF Event queue */
-	if (phba->sli4_hba.fof_eq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.fof_eq);
-		phba->sli4_hba.fof_eq = NULL;
-	}
-
-	/* Release OAS Completion queue */
-	if (phba->sli4_hba.oas_cq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.oas_cq);
-		phba->sli4_hba.oas_cq = NULL;
-	}
-
-	/* Release OAS Work queue */
-	if (phba->sli4_hba.oas_wq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.oas_wq);
-		phba->sli4_hba.oas_wq = NULL;
-	}
-	return 0;
-}
 
 MODULE_DEVICE_TABLE(pci, lpfc_id_table);
 
@@ -12883,7 +13158,6 @@ lpfc_init(void)
 	lpfc_nvmet_cmd_template();
 
 	/* Initialize in case vector mapping is needed */
-	lpfc_used_cpu = NULL;
 	lpfc_present_cpu = num_present_cpus();
 
 	error = pci_register_driver(&lpfc_driver);
@@ -12922,7 +13196,6 @@ lpfc_exit(void)
 				(1L << _dump_buf_dif_order), _dump_buf_dif);
 		free_pages((unsigned long)_dump_buf_dif, _dump_buf_dif_order);
 	}
-	kfree(lpfc_used_cpu);
 	idr_destroy(&lpfc_hba_index);
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 4d3b94317515..8abe933bad09 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2095,8 +2095,8 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
 	if (phba->nvmet_support) {
 		bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1);
 		/* iaab/iaar NOT set for now */
-		 bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0);
-		 bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0);
+		bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0);
+		bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0);
 	}
 	return;
 }
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 96bc3789a166..6172682a24ba 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -825,7 +825,7 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			"rport rolechg:   role:x%x did:x%x flg:x%x",
 			roles, ndlp->nlp_DID, ndlp->nlp_flag);
 
-		if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		if (vport->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
 			fc_remote_port_rolechg(rport, roles);
 	}
 }
@@ -1789,8 +1789,8 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 			 * is configured try it.
 			 */
 			ndlp->nlp_fc4_type |= NLP_FC4_FCP;
-			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-			     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
 				/* We need to update the localport also */
 				lpfc_nvme_update_localport(vport);
@@ -1804,7 +1804,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 			 * should just issue PRLI for FCP. Otherwise issue
 			 * GFT_ID to determine if remote port supports NVME.
 			 */
-			if (phba->cfg_enable_fc4_type != LPFC_ENABLE_FCP) {
+			if (vport->cfg_enable_fc4_type != LPFC_ENABLE_FCP) {
 				rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
 						 0, ndlp->nlp_DID);
 				return ndlp->nlp_state;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 8c9f79042228..55ab9d3ee4ba 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -56,12 +56,12 @@
 
 /* NVME initiator-based functions */
 
-static struct lpfc_nvme_buf *
+static struct lpfc_io_buf *
 lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
-		  int expedite);
+		  int idx, int expedite);
 
 static void
-lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_nvme_buf *);
+lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_io_buf *);
 
 static struct nvme_fc_port_template lpfc_nvme_template;
 
@@ -239,7 +239,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 	if (qidx) {
 		str = "IO ";  /* IO queue */
 		qhandle->index = ((qidx - 1) %
-			vport->phba->cfg_nvme_io_channel);
+			lpfc_nvme_template.max_hw_queues);
 	} else {
 		str = "ADM";  /* Admin queue */
 		qhandle->index = qidx;
@@ -247,7 +247,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6073 Binding %s HdwQueue %d  (cpu %d) to "
-			 "io_channel %d qhandle %p\n", str,
+			 "hdw_queue %d qhandle %p\n", str,
 			 qidx, qhandle->cpu_id, qhandle->index, qhandle);
 	*handle = (void *)qhandle;
 	return 0;
@@ -529,7 +529,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	lpfc_nvmeio_data(phba, "NVME LS  XMIT: xri x%x iotag x%x to x%06x\n",
 			 genwqe->sli4_xritag, genwqe->iotag, ndlp->nlp_DID);
 
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, genwqe);
+	rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe);
 	if (rc) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
 				 "6045 Issue GEN REQ WQE to NPORT x%x "
@@ -761,7 +761,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
 /* Fix up the existing sgls for NVME IO. */
 static inline void
 lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
-		       struct lpfc_nvme_buf *lpfc_ncmd,
+		       struct lpfc_io_buf *lpfc_ncmd,
 		       struct nvmefc_fcp_req *nCmd)
 {
 	struct lpfc_hba  *phba = vport->phba;
@@ -784,7 +784,7 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 	 * rather than the virtual memory to ease the restore
 	 * operation.
 	 */
-	sgl = lpfc_ncmd->nvme_sgl;
+	sgl = lpfc_ncmd->dma_sgl;
 	sgl->sge_len = cpu_to_le32(nCmd->cmdlen);
 	if (phba->cfg_nvme_embed_cmd) {
 		sgl->addr_hi = 0;
@@ -858,7 +858,7 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 static void
 lpfc_nvme_ktime(struct lpfc_hba *phba,
-		struct lpfc_nvme_buf *lpfc_ncmd)
+		struct lpfc_io_buf *lpfc_ncmd)
 {
 	uint64_t seg1, seg2, seg3, seg4;
 	uint64_t segsum;
@@ -956,57 +956,54 @@ static void
 lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 			  struct lpfc_wcqe_complete *wcqe)
 {
-	struct lpfc_nvme_buf *lpfc_ncmd =
-		(struct lpfc_nvme_buf *)pwqeIn->context1;
+	struct lpfc_io_buf *lpfc_ncmd =
+		(struct lpfc_io_buf *)pwqeIn->context1;
 	struct lpfc_vport *vport = pwqeIn->vport;
 	struct nvmefc_fcp_req *nCmd;
 	struct nvme_fc_ersp_iu *ep;
 	struct nvme_fc_cmd_iu *cp;
-	struct lpfc_nvme_rport *rport;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_nvme_fcpreq_priv *freqpriv;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
-	unsigned long flags;
-	uint32_t code, status, idx;
+	uint32_t code, status, idx, cpu;
 	uint16_t cid, sqhd, data;
 	uint32_t *ptr;
 
 	/* Sanity check on return of outstanding command */
-	if (!lpfc_ncmd || !lpfc_ncmd->nvmeCmd || !lpfc_ncmd->nrport) {
-		if (!lpfc_ncmd) {
-			lpfc_printf_vlog(vport, KERN_ERR,
-					 LOG_NODE | LOG_NVME_IOERR,
-					 "6071 Null lpfc_ncmd pointer. No "
-					 "release, skip completion\n");
-			return;
-		}
+	if (!lpfc_ncmd) {
+		lpfc_printf_vlog(vport, KERN_ERR,
+				 LOG_NODE | LOG_NVME_IOERR,
+				 "6071 Null lpfc_ncmd pointer. No "
+				 "release, skip completion\n");
+		return;
+	}
+
+	/* Guard against abort handler being called at same time */
+	spin_lock(&lpfc_ncmd->buf_lock);
 
+	if (!lpfc_ncmd->nvmeCmd) {
+		spin_unlock(&lpfc_ncmd->buf_lock);
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
 				 "6066 Missing cmpl ptrs: lpfc_ncmd %p, "
-				 "nvmeCmd %p nrport %p\n",
-				 lpfc_ncmd, lpfc_ncmd->nvmeCmd,
-				 lpfc_ncmd->nrport);
+				 "nvmeCmd %p\n",
+				 lpfc_ncmd, lpfc_ncmd->nvmeCmd);
 
 		/* Release the lpfc_ncmd regardless of the missing elements. */
 		lpfc_release_nvme_buf(phba, lpfc_ncmd);
 		return;
 	}
 	nCmd = lpfc_ncmd->nvmeCmd;
-	rport = lpfc_ncmd->nrport;
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
 
+	idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
+	phba->sli4_hba.hdwq[idx].nvme_cstat.io_cmpls++;
+
 	if (vport->localport) {
 		lport = (struct lpfc_nvme_lport *)vport->localport->private;
-		if (lport) {
-			idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
-			cstat = &lport->cstat[idx];
-			atomic_inc(&cstat->fc4NvmeIoCmpls);
-			if (status) {
-				if (bf_get(lpfc_wcqe_c_xb, wcqe))
-					atomic_inc(&lport->cmpl_fcp_xb);
-				atomic_inc(&lport->cmpl_fcp_err);
-			}
+		if (lport && status) {
+			if (bf_get(lpfc_wcqe_c_xb, wcqe))
+				atomic_inc(&lport->cmpl_fcp_xb);
+			atomic_inc(&lport->cmpl_fcp_err);
 		}
 	}
 
@@ -1017,18 +1014,11 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	 * Catch race where our node has transitioned, but the
 	 * transport is still transitioning.
 	 */
-	ndlp = rport->ndlp;
+	ndlp = lpfc_ncmd->ndlp;
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-				 "6061 rport %p,  DID x%06x node not ready.\n",
-				 rport, rport->remoteport->port_id);
-
-		ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id);
-		if (!ndlp) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
-					 "6062 Ignoring NVME cmpl.  No ndlp\n");
-			goto out_err;
-		}
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+				 "6062 Ignoring NVME cmpl.  No ndlp\n");
+		goto out_err;
 	}
 
 	code = bf_get(lpfc_wcqe_c_code, wcqe);
@@ -1148,13 +1138,17 @@ out_err:
 		lpfc_nvme_ktime(phba, lpfc_ncmd);
 	}
 	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
-		if (lpfc_ncmd->cpu != smp_processor_id())
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
-					 "6701 CPU Check cmpl: "
-					 "cpu %d expect %d\n",
-					 smp_processor_id(), lpfc_ncmd->cpu);
-		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++;
+		idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT) {
+			if (lpfc_ncmd->cpu != cpu)
+				lpfc_printf_vlog(vport,
+						 KERN_INFO, LOG_NVME_IOERR,
+						 "6701 CPU Check cmpl: "
+						 "cpu %d expect %d\n",
+						 cpu, lpfc_ncmd->cpu);
+			phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
+		}
 	}
 #endif
 
@@ -1165,13 +1159,11 @@ out_err:
 	if (!(lpfc_ncmd->flags & LPFC_SBUF_XBUSY)) {
 		freqpriv = nCmd->private;
 		freqpriv->nvme_buf = NULL;
-		nCmd->done(nCmd);
 		lpfc_ncmd->nvmeCmd = NULL;
-	}
-
-	spin_lock_irqsave(&phba->hbalock, flags);
-	lpfc_ncmd->nrport = NULL;
-	spin_unlock_irqrestore(&phba->hbalock, flags);
+		spin_unlock(&lpfc_ncmd->buf_lock);
+		nCmd->done(nCmd);
+	} else
+		spin_unlock(&lpfc_ncmd->buf_lock);
 
 	/* Call release with XB=1 to queue the IO into the abort list. */
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
@@ -1196,9 +1188,9 @@ out_err:
  **/
 static int
 lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
-		      struct lpfc_nvme_buf *lpfc_ncmd,
+		      struct lpfc_io_buf *lpfc_ncmd,
 		      struct lpfc_nodelist *pnode,
-		      struct lpfc_nvme_ctrl_stat *cstat)
+		      struct lpfc_fc4_ctrl_stat *cstat)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
@@ -1206,7 +1198,7 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 	union lpfc_wqe128 *wqe = &pwqeq->wqe;
 	uint32_t req_len;
 
-	if (!pnode || !NLP_CHK_NODE_ACT(pnode))
+	if (!NLP_CHK_NODE_ACT(pnode))
 		return -EINVAL;
 
 	/*
@@ -1236,7 +1228,7 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			} else {
 				wqe->fcp_iwrite.initial_xfer_len = 0;
 			}
-			atomic_inc(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests++;
 		} else {
 			/* From the iread template, initialize words 7 - 11 */
 			memcpy(&wqe->words[7],
@@ -1249,13 +1241,13 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			/* Word 5 */
 			wqe->fcp_iread.rsrvd5 = 0;
 
-			atomic_inc(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests++;
 		}
 	} else {
 		/* From the icmnd template, initialize words 4 - 11 */
 		memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4],
 		       sizeof(uint32_t) * 8);
-		atomic_inc(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests++;
 	}
 	/*
 	 * Finish initializing those WQE fields that are independent
@@ -1302,12 +1294,12 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
  **/
 static int
 lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
-		      struct lpfc_nvme_buf *lpfc_ncmd)
+		      struct lpfc_io_buf *lpfc_ncmd)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
 	union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
-	struct sli4_sge *sgl = lpfc_ncmd->nvme_sgl;
+	struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
 	struct scatterlist *data_sg;
 	struct sli4_sge *first_data_sgl;
 	struct ulp_bde64 *bde;
@@ -1396,6 +1388,8 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
 		}
 
 	} else {
+		lpfc_ncmd->seg_cnt = 0;
+
 		/* For this clause to be valid, the payload_length
 		 * and sg_cnt must zero.
 		 */
@@ -1435,13 +1429,13 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 {
 	int ret = 0;
 	int expedite = 0;
-	int idx;
+	int idx, cpu;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_vport *vport;
 	struct lpfc_hba *phba;
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_qhandle *lpfc_queue_info;
 	struct lpfc_nvme_fcpreq_priv *freqpriv;
@@ -1559,7 +1553,15 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		}
 	}
 
-	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, expedite);
+	/* Lookup Hardware Queue index based on fcp_io_sched module parameter */
+	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
+		idx = lpfc_queue_info->index;
+	} else {
+		cpu = smp_processor_id();
+		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
+	}
+
+	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
 	if (lpfc_ncmd == NULL) {
 		atomic_inc(&lport->xmt_fcp_noxri);
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
@@ -1586,9 +1588,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	freqpriv->nvme_buf = lpfc_ncmd;
 	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
-	lpfc_ncmd->nrport = rport;
 	lpfc_ncmd->ndlp = ndlp;
-	lpfc_ncmd->start_time = jiffies;
+	lpfc_ncmd->qidx = lpfc_queue_info->qidx;
 
 	/*
 	 * Issue the IO on the WQ indicated by index in the hw_queue_handle.
@@ -1598,9 +1599,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 * index to use and that they have affinitized a CPU to this hardware
 	 * queue. A hardware queue maps to a driver MSI-X vector/EQ/CQ/WQ.
 	 */
-	idx = lpfc_queue_info->index;
 	lpfc_ncmd->cur_iocbq.hba_wqidx = idx;
-	cstat = &lport->cstat[idx];
+	cstat = &phba->sli4_hba.hdwq[idx].nvme_cstat;
 
 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp, cstat);
 	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
@@ -1618,7 +1618,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 			 lpfc_ncmd->cur_iocbq.sli4_xritag,
 			 lpfc_queue_info->index, ndlp->nlp_DID);
 
-	ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
+	ret = lpfc_sli4_issue_wqe(phba, lpfc_ncmd->hdwq, &lpfc_ncmd->cur_iocbq);
 	if (ret) {
 		atomic_inc(&lport->xmt_fcp_wqerr);
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
@@ -1629,26 +1629,26 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		goto out_free_nvme_buf;
 	}
 
+	if (phba->cfg_xri_rebalancing)
+		lpfc_keep_pvt_pool_above_lowwm(phba, lpfc_ncmd->hdwq_no);
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (lpfc_ncmd->ts_cmd_start)
 		lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
 
 	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
-		lpfc_ncmd->cpu = smp_processor_id();
-		if (lpfc_ncmd->cpu != lpfc_queue_info->index) {
-			/* Check for admin queue */
-			if (lpfc_queue_info->qidx) {
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT) {
+			lpfc_ncmd->cpu = cpu;
+			if (idx != cpu)
 				lpfc_printf_vlog(vport,
-						 KERN_ERR, LOG_NVME_IOERR,
+						 KERN_INFO, LOG_NVME_IOERR,
 						"6702 CPU Check cmd: "
 						"cpu %d wq %d\n",
 						lpfc_ncmd->cpu,
 						lpfc_queue_info->index);
-			}
-			lpfc_ncmd->cpu = lpfc_queue_info->index;
+			phba->sli4_hba.hdwq[idx].cpucheck_xmt_io[cpu]++;
 		}
-		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_xmt_io[lpfc_ncmd->cpu]++;
 	}
 #endif
 	return 0;
@@ -1656,11 +1656,11 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
  out_free_nvme_buf:
 	if (lpfc_ncmd->nvmeCmd->sg_cnt) {
 		if (lpfc_ncmd->nvmeCmd->io_dir == NVMEFC_FCP_WRITE)
-			atomic_dec(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests--;
 		else
-			atomic_dec(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests--;
 	} else
-		atomic_dec(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests--;
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
  out_fail:
 	return ret;
@@ -1720,7 +1720,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_vport *vport;
 	struct lpfc_hba *phba;
-	struct lpfc_nvme_buf *lpfc_nbuf;
+	struct lpfc_io_buf *lpfc_nbuf;
 	struct lpfc_iocbq *abts_buf;
 	struct lpfc_iocbq *nvmereq_wqe;
 	struct lpfc_nvme_fcpreq_priv *freqpriv;
@@ -1788,6 +1788,9 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	}
 	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
 
+	/* Guard against IO completion being called at same time */
+	spin_lock(&lpfc_nbuf->buf_lock);
+
 	/*
 	 * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
 	 * state must match the nvme_fcreq passed by the nvme
@@ -1796,24 +1799,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	 * has not seen it yet.
 	 */
 	if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6143 NVME req mismatch: "
 				 "lpfc_nbuf %p nvmeCmd %p, "
 				 "pnvme_fcreq %p.  Skipping Abort xri x%x\n",
 				 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
-		return;
+		goto out_unlock;
 	}
 
 	/* Don't abort IOs no longer on the pending queue. */
 	if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6142 NVME IO req %p not queued - skipping "
 				 "abort req xri x%x\n",
 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
-		return;
+		goto out_unlock;
 	}
 
 	atomic_inc(&lport->xmt_fcp_abort);
@@ -1823,24 +1824,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 
 	/* Outstanding abort is in progress */
 	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6144 Outstanding NVME I/O Abort Request "
 				 "still pending on nvme_fcreq %p, "
 				 "lpfc_ncmd %p xri x%x\n",
 				 pnvme_fcreq, lpfc_nbuf,
 				 nvmereq_wqe->sli4_xritag);
-		return;
+		goto out_unlock;
 	}
 
 	abts_buf = __lpfc_sli_get_iocbq(phba);
 	if (!abts_buf) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6136 No available abort wqes. Skipping "
 				 "Abts req for nvme_fcreq %p xri x%x\n",
 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
-		return;
+		goto out_unlock;
 	}
 
 	/* Ready - mark outstanding as aborted by driver. */
@@ -1883,7 +1882,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	abts_buf->hba_wqidx = nvmereq_wqe->hba_wqidx;
 	abts_buf->vport = vport;
 	abts_buf->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
-	ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf);
+	ret_val = lpfc_sli4_issue_wqe(phba, lpfc_nbuf->hdwq, abts_buf);
+	spin_unlock(&lpfc_nbuf->buf_lock);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (ret_val) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
@@ -1899,6 +1899,12 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 			 "ox_id x%x on reqtag x%x\n",
 			 nvmereq_wqe->sli4_xritag,
 			 abts_buf->iotag);
+	return;
+
+out_unlock:
+	spin_unlock(&lpfc_nbuf->buf_lock);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	return;
 }
 
 /* Declare and initialization an instance of the FC NVME template. */
@@ -1928,454 +1934,63 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
 };
 
 /**
- * lpfc_sli4_post_nvme_sgl_block - post a block of nvme sgl list to firmware
- * @phba: pointer to lpfc hba data structure.
- * @nblist: pointer to nvme buffer list.
- * @count: number of scsi buffers on the list.
- *
- * This routine is invoked to post a block of @count scsi sgl pages from a
- * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
- * No Lock is held.
- *
- **/
-static int
-lpfc_sli4_post_nvme_sgl_block(struct lpfc_hba *phba,
-			      struct list_head *nblist,
-			      int count)
-{
-	struct lpfc_nvme_buf *lpfc_ncmd;
-	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
-	struct sgl_page_pairs *sgl_pg_pairs;
-	void *viraddr;
-	LPFC_MBOXQ_t *mbox;
-	uint32_t reqlen, alloclen, pg_pairs;
-	uint32_t mbox_tmo;
-	uint16_t xritag_start = 0;
-	int rc = 0;
-	uint32_t shdr_status, shdr_add_status;
-	dma_addr_t pdma_phys_bpl1;
-	union lpfc_sli4_cfg_shdr *shdr;
-
-	/* Calculate the requested length of the dma memory */
-	reqlen = count * sizeof(struct sgl_page_pairs) +
-		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
-	if (reqlen > SLI4_PAGE_SIZE) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"6118 Block sgl registration required DMA "
-				"size (%d) great than a page\n", reqlen);
-		return -ENOMEM;
-	}
-	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	if (!mbox) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"6119 Failed to allocate mbox cmd memory\n");
-		return -ENOMEM;
-	}
-
-	/* Allocate DMA memory and set up the non-embedded mailbox command */
-	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
-				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
-				LPFC_SLI4_MBX_NEMBED);
-
-	if (alloclen < reqlen) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"6120 Allocated DMA memory size (%d) is "
-				"less than the requested DMA memory "
-				"size (%d)\n", alloclen, reqlen);
-		lpfc_sli4_mbox_cmd_free(phba, mbox);
-		return -ENOMEM;
-	}
-
-	/* Get the first SGE entry from the non-embedded DMA memory */
-	viraddr = mbox->sge_array->addr[0];
-
-	/* Set up the SGL pages in the non-embedded DMA pages */
-	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
-	sgl_pg_pairs = &sgl->sgl_pg_pairs;
-
-	pg_pairs = 0;
-	list_for_each_entry(lpfc_ncmd, nblist, list) {
-		/* Set up the sge entry */
-		sgl_pg_pairs->sgl_pg0_addr_lo =
-			cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
-		sgl_pg_pairs->sgl_pg0_addr_hi =
-			cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
-		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
-			pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
-						SGL_PAGE_SIZE;
-		else
-			pdma_phys_bpl1 = 0;
-		sgl_pg_pairs->sgl_pg1_addr_lo =
-			cpu_to_le32(putPaddrLow(pdma_phys_bpl1));
-		sgl_pg_pairs->sgl_pg1_addr_hi =
-			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
-		/* Keep the first xritag on the list */
-		if (pg_pairs == 0)
-			xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
-		sgl_pg_pairs++;
-		pg_pairs++;
-	}
-	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
-	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
-	/* Perform endian conversion if necessary */
-	sgl->word0 = cpu_to_le32(sgl->word0);
-
-	if (!phba->sli4_hba.intr_enable)
-		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
-	else {
-		mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
-		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
-	}
-	shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
-	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
-	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
-	if (rc != MBX_TIMEOUT)
-		lpfc_sli4_mbox_cmd_free(phba, mbox);
-	if (shdr_status || shdr_add_status || rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"6125 POST_SGL_BLOCK mailbox command failed "
-				"status x%x add_status x%x mbx status x%x\n",
-				shdr_status, shdr_add_status, rc);
-		rc = -ENXIO;
-	}
-	return rc;
-}
-
-/**
- * lpfc_post_nvme_sgl_list - Post blocks of nvme buffer sgls from a list
- * @phba: pointer to lpfc hba data structure.
- * @post_nblist: pointer to the nvme buffer list.
- *
- * This routine walks a list of nvme buffers that was passed in. It attempts
- * to construct blocks of nvme buffer sgls which contains contiguous xris and
- * uses the non-embedded SGL block post mailbox commands to post to the port.
- * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
- * embedded SGL post mailbox command for posting. The @post_nblist passed in
- * must be local list, thus no lock is needed when manipulate the list.
- *
- * Returns: 0 = failure, non-zero number of successfully posted buffers.
- **/
-static int
-lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
-			     struct list_head *post_nblist, int sb_count)
-{
-	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-	int status, sgl_size;
-	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
-	dma_addr_t pdma_phys_sgl1;
-	int last_xritag = NO_XRI;
-	int cur_xritag;
-	LIST_HEAD(prep_nblist);
-	LIST_HEAD(blck_nblist);
-	LIST_HEAD(nvme_nblist);
-
-	/* sanity check */
-	if (sb_count <= 0)
-		return -EINVAL;
-
-	sgl_size = phba->cfg_sg_dma_buf_size;
-
-	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
-		list_del_init(&lpfc_ncmd->list);
-		block_cnt++;
-		if ((last_xritag != NO_XRI) &&
-		    (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
-			/* a hole in xri block, form a sgl posting block */
-			list_splice_init(&prep_nblist, &blck_nblist);
-			post_cnt = block_cnt - 1;
-			/* prepare list for next posting block */
-			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
-			block_cnt = 1;
-		} else {
-			/* prepare list for next posting block */
-			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
-			/* enough sgls for non-embed sgl mbox command */
-			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
-				list_splice_init(&prep_nblist, &blck_nblist);
-				post_cnt = block_cnt;
-				block_cnt = 0;
-			}
-		}
-		num_posting++;
-		last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
-
-		/* end of repost sgl list condition for NVME buffers */
-		if (num_posting == sb_count) {
-			if (post_cnt == 0) {
-				/* last sgl posting block */
-				list_splice_init(&prep_nblist, &blck_nblist);
-				post_cnt = block_cnt;
-			} else if (block_cnt == 1) {
-				/* last single sgl with non-contiguous xri */
-				if (sgl_size > SGL_PAGE_SIZE)
-					pdma_phys_sgl1 =
-						lpfc_ncmd->dma_phys_sgl +
-						SGL_PAGE_SIZE;
-				else
-					pdma_phys_sgl1 = 0;
-				cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
-				status = lpfc_sli4_post_sgl(phba,
-						lpfc_ncmd->dma_phys_sgl,
-						pdma_phys_sgl1, cur_xritag);
-				if (status) {
-					/* failure, put on abort nvme list */
-					lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
-				} else {
-					/* success, put on NVME buffer list */
-					lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
-					lpfc_ncmd->status = IOSTAT_SUCCESS;
-					num_posted++;
-				}
-				/* success, put on NVME buffer sgl list */
-				list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
-			}
-		}
-
-		/* continue until a nembed page worth of sgls */
-		if (post_cnt == 0)
-			continue;
-
-		/* post block of NVME buffer list sgls */
-		status = lpfc_sli4_post_nvme_sgl_block(phba, &blck_nblist,
-						       post_cnt);
-
-		/* don't reset xirtag due to hole in xri block */
-		if (block_cnt == 0)
-			last_xritag = NO_XRI;
-
-		/* reset NVME buffer post count for next round of posting */
-		post_cnt = 0;
-
-		/* put posted NVME buffer-sgl posted on NVME buffer sgl list */
-		while (!list_empty(&blck_nblist)) {
-			list_remove_head(&blck_nblist, lpfc_ncmd,
-					 struct lpfc_nvme_buf, list);
-			if (status) {
-				/* failure, put on abort nvme list */
-				lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
-			} else {
-				/* success, put on NVME buffer list */
-				lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
-				lpfc_ncmd->status = IOSTAT_SUCCESS;
-				num_posted++;
-			}
-			list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
-		}
-	}
-	/* Push NVME buffers with sgl posted to the available list */
-	while (!list_empty(&nvme_nblist)) {
-		list_remove_head(&nvme_nblist, lpfc_ncmd,
-				 struct lpfc_nvme_buf, list);
-		lpfc_release_nvme_buf(phba, lpfc_ncmd);
-	}
-	return num_posted;
-}
-
-/**
- * lpfc_repost_nvme_sgl_list - Repost all the allocated nvme buffer sgls
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine walks the list of nvme buffers that have been allocated and
- * repost them to the port by using SGL block post. This is needed after a
- * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
- * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
- * to the lpfc_nvme_buf_list. If the repost fails, reject all nvme buffers.
- *
- * Returns: 0 = success, non-zero failure.
- **/
-int
-lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba)
-{
-	LIST_HEAD(post_nblist);
-	int num_posted, rc = 0;
-
-	/* get all NVME buffers need to repost to a local list */
-	spin_lock_irq(&phba->nvme_buf_list_get_lock);
-	spin_lock(&phba->nvme_buf_list_put_lock);
-	list_splice_init(&phba->lpfc_nvme_buf_list_get, &post_nblist);
-	list_splice(&phba->lpfc_nvme_buf_list_put, &post_nblist);
-	phba->get_nvme_bufs = 0;
-	phba->put_nvme_bufs = 0;
-	spin_unlock(&phba->nvme_buf_list_put_lock);
-	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-
-	/* post the list of nvme buffer sgls to port if available */
-	if (!list_empty(&post_nblist)) {
-		num_posted = lpfc_post_nvme_sgl_list(phba, &post_nblist,
-						phba->sli4_hba.nvme_xri_cnt);
-		/* failed to post any nvme buffer, return error */
-		if (num_posted == 0)
-			rc = -EIO;
-	}
-	return rc;
-}
-
-/**
- * lpfc_new_nvme_buf - Scsi buffer allocator for HBA with SLI4 IF spec
- * @vport: The virtual port for which this call being executed.
- * @num_to_allocate: The requested number of buffers to allocate.
+ * lpfc_get_nvme_buf - Get a nvme buffer from io_buf_list of the HBA
+ * @phba: The HBA for which this call is being executed.
  *
- * This routine allocates nvme buffers for device with SLI-4 interface spec,
- * the nvme buffer contains all the necessary information needed to initiate
- * a NVME I/O. After allocating up to @num_to_allocate NVME buffers and put
- * them on a list, it post them to the port by using SGL block post.
+ * This routine removes a nvme buffer from head of @hdwq io_buf_list
+ * and returns to caller.
  *
  * Return codes:
- *   int - number of nvme buffers that were allocated and posted.
- *   0 = failure, less than num_to_alloc is a partial failure.
+ *   NULL - Error
+ *   Pointer to lpfc_nvme_buf - Success
  **/
-static int
-lpfc_new_nvme_buf(struct lpfc_vport *vport, int num_to_alloc)
+static struct lpfc_io_buf *
+lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		  int idx, int expedite)
 {
-	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct sli4_sge *sgl;
 	struct lpfc_iocbq *pwqeq;
 	union lpfc_wqe128 *wqe;
-	struct sli4_sge *sgl;
-	dma_addr_t pdma_phys_sgl;
-	uint16_t iotag, lxri = 0;
-	int bcnt, num_posted;
-	LIST_HEAD(prep_nblist);
-	LIST_HEAD(post_nblist);
-	LIST_HEAD(nvme_nblist);
-
-	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
-		lpfc_ncmd = kzalloc(sizeof(struct lpfc_nvme_buf), GFP_KERNEL);
-		if (!lpfc_ncmd)
-			break;
-		/*
-		 * Get memory from the pci pool to map the virt space to
-		 * pci bus space for an I/O. The DMA buffer includes the
-		 * number of SGE's necessary to support the sg_tablesize.
-		 */
-		lpfc_ncmd->data = dma_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
-						  GFP_KERNEL,
-						  &lpfc_ncmd->dma_handle);
-		if (!lpfc_ncmd->data) {
-			kfree(lpfc_ncmd);
-			break;
-		}
 
-		lxri = lpfc_sli4_next_xritag(phba);
-		if (lxri == NO_XRI) {
-			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
-			kfree(lpfc_ncmd);
-			break;
-		}
+	lpfc_ncmd = lpfc_get_io_buf(phba, NULL, idx, expedite);
+
+	if (lpfc_ncmd) {
 		pwqeq = &(lpfc_ncmd->cur_iocbq);
 		wqe = &pwqeq->wqe;
 
-		/* Allocate iotag for lpfc_ncmd->cur_iocbq. */
-		iotag = lpfc_sli_next_iotag(phba, pwqeq);
-		if (iotag == 0) {
-			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
-			kfree(lpfc_ncmd);
-			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-					"6121 Failed to allocated IOTAG for"
-					" XRI:0x%x\n", lxri);
-			lpfc_sli4_free_xri(phba, lxri);
-			break;
-		}
-		pwqeq->sli4_lxritag = lxri;
-		pwqeq->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
-		pwqeq->iocb_flag |= LPFC_IO_NVME;
-		pwqeq->context1 = lpfc_ncmd;
+		/* Setup key fields in buffer that may have been changed
+		 * if other protocols used this buffer.
+		 */
+		pwqeq->iocb_flag = LPFC_IO_NVME;
 		pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl;
-
-		/* Initialize local short-hand pointers. */
-		lpfc_ncmd->nvme_sgl = lpfc_ncmd->data;
-		sgl = lpfc_ncmd->nvme_sgl;
-		pdma_phys_sgl = lpfc_ncmd->dma_handle;
-		lpfc_ncmd->dma_phys_sgl = pdma_phys_sgl;
+		lpfc_ncmd->start_time = jiffies;
+		lpfc_ncmd->flags = 0;
 
 		/* Rsp SGE will be filled in when we rcv an IO
 		 * from the NVME Layer to be sent.
 		 * The cmd is going to be embedded so we need a SKIP SGE.
 		 */
+		sgl = lpfc_ncmd->dma_sgl;
 		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
 		bf_set(lpfc_sli4_sge_last, sgl, 0);
 		sgl->word2 = cpu_to_le32(sgl->word2);
 		/* Fill in word 3 / sgl_len during cmd submission */
 
-		lpfc_ncmd->cur_iocbq.context1 = lpfc_ncmd;
-
 		/* Initialize WQE */
 		memset(wqe, 0, sizeof(union lpfc_wqe));
 
-		/* add the nvme buffer to a post list */
-		list_add_tail(&lpfc_ncmd->list, &post_nblist);
-		spin_lock_irq(&phba->nvme_buf_list_get_lock);
-		phba->sli4_hba.nvme_xri_cnt++;
-		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
-			"6114 Allocate %d out of %d requested new NVME "
-			"buffers\n", bcnt, num_to_alloc);
-
-	/* post the list of nvme buffer sgls to port if available */
-	if (!list_empty(&post_nblist))
-		num_posted = lpfc_post_nvme_sgl_list(phba,
-						     &post_nblist, bcnt);
-	else
-		num_posted = 0;
-
-	return num_posted;
-}
-
-static inline struct lpfc_nvme_buf *
-lpfc_nvme_buf(struct lpfc_hba *phba)
-{
-	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-
-	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
-				 &phba->lpfc_nvme_buf_list_get, list) {
-		list_del_init(&lpfc_ncmd->list);
-		phba->get_nvme_bufs--;
-		return lpfc_ncmd;
-	}
-	return NULL;
-}
-
-/**
- * lpfc_get_nvme_buf - Get a nvme buffer from lpfc_nvme_buf_list of the HBA
- * @phba: The HBA for which this call is being executed.
- *
- * This routine removes a nvme buffer from head of @phba lpfc_nvme_buf_list list
- * and returns to caller.
- *
- * Return codes:
- *   NULL - Error
- *   Pointer to lpfc_nvme_buf - Success
- **/
-static struct lpfc_nvme_buf *
-lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
-		  int expedite)
-{
-	struct lpfc_nvme_buf *lpfc_ncmd = NULL;
-	unsigned long iflag = 0;
+		if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
+			atomic_inc(&ndlp->cmd_pending);
+			lpfc_ncmd->flags |= LPFC_SBUF_BUMP_QDEPTH;
+		}
 
-	spin_lock_irqsave(&phba->nvme_buf_list_get_lock, iflag);
-	if (phba->get_nvme_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
-		lpfc_ncmd = lpfc_nvme_buf(phba);
-	if (!lpfc_ncmd) {
-		spin_lock(&phba->nvme_buf_list_put_lock);
-		list_splice(&phba->lpfc_nvme_buf_list_put,
-			    &phba->lpfc_nvme_buf_list_get);
-		phba->get_nvme_bufs += phba->put_nvme_bufs;
-		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
-		phba->put_nvme_bufs = 0;
-		spin_unlock(&phba->nvme_buf_list_put_lock);
-		if (phba->get_nvme_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
-			lpfc_ncmd = lpfc_nvme_buf(phba);
+	} else {
+		qp = &phba->sli4_hba.hdwq[idx];
+		qp->empty_io_bufs++;
 	}
-	spin_unlock_irqrestore(&phba->nvme_buf_list_get_lock, iflag);
 
-	if (lpfc_ndlp_check_qdepth(phba, ndlp) && lpfc_ncmd) {
-		atomic_inc(&ndlp->cmd_pending);
-		lpfc_ncmd->flags |= LPFC_BUMP_QDEPTH;
-	}
 	return  lpfc_ncmd;
 }
 
@@ -2385,22 +2000,23 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
  * @lpfc_ncmd: The nvme buffer which is being released.
  *
  * This routine releases @lpfc_ncmd nvme buffer by adding it to tail of @phba
- * lpfc_nvme_buf_list list. For SLI4 XRI's are tied to the nvme buffer
+ * lpfc_io_buf_list list. For SLI4 XRI's are tied to the nvme buffer
  * and cannot be reused for at least RA_TOV amount of time if it was
  * aborted.
  **/
 static void
-lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
+lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd)
 {
+	struct lpfc_sli4_hdw_queue *qp;
 	unsigned long iflag = 0;
 
-	if ((lpfc_ncmd->flags & LPFC_BUMP_QDEPTH) && lpfc_ncmd->ndlp)
+	if ((lpfc_ncmd->flags & LPFC_SBUF_BUMP_QDEPTH) && lpfc_ncmd->ndlp)
 		atomic_dec(&lpfc_ncmd->ndlp->cmd_pending);
 
-	lpfc_ncmd->nonsg_phys = 0;
 	lpfc_ncmd->ndlp = NULL;
-	lpfc_ncmd->flags &= ~LPFC_BUMP_QDEPTH;
+	lpfc_ncmd->flags &= ~LPFC_SBUF_BUMP_QDEPTH;
 
+	qp = lpfc_ncmd->hdwq;
 	if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 				"6310 XB release deferred for "
@@ -2408,20 +2024,13 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 				lpfc_ncmd->cur_iocbq.sli4_xritag,
 				lpfc_ncmd->cur_iocbq.iotag);
 
-		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
-					iflag);
+		spin_lock_irqsave(&qp->abts_nvme_buf_list_lock, iflag);
 		list_add_tail(&lpfc_ncmd->list,
-			&phba->sli4_hba.lpfc_abts_nvme_buf_list);
-		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
-					iflag);
-	} else {
-		lpfc_ncmd->nvmeCmd = NULL;
-		lpfc_ncmd->cur_iocbq.iocb_flag = LPFC_IO_NVME;
-		spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
-		list_add_tail(&lpfc_ncmd->list, &phba->lpfc_nvme_buf_list_put);
-		phba->put_nvme_bufs++;
-		spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
-	}
+			&qp->lpfc_abts_nvme_buf_list);
+		qp->abts_nvme_io_bufs++;
+		spin_unlock_irqrestore(&qp->abts_nvme_buf_list_lock, iflag);
+	} else
+		lpfc_release_io_buf(phba, (struct lpfc_io_buf *)lpfc_ncmd, qp);
 }
 
 /**
@@ -2448,8 +2057,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	struct nvme_fc_port_info nfcp_info;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
-	int len, i;
 
 	/* Initialize this localport instance.  The vport wwn usage ensures
 	 * that NPIV is accounted for.
@@ -2464,12 +2071,13 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	 * allocate + 3, one for cmd, one for rsp and one for this alignment
 	 */
 	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
-	lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel;
 
-	cstat = kmalloc((sizeof(struct lpfc_nvme_ctrl_stat) *
-			phba->cfg_nvme_io_channel), GFP_KERNEL);
-	if (!cstat)
-		return -ENOMEM;
+	/* Advertise how many hw queues we support based on fcp_io_sched */
+	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+		lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+	else
+		lpfc_nvme_template.max_hw_queues =
+			phba->sli4_hba.num_present_cpu;
 
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
@@ -2493,7 +2101,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		lport = (struct lpfc_nvme_lport *)localport->private;
 		vport->localport = localport;
 		lport->vport = vport;
-		lport->cstat = cstat;
 		vport->nvmei_support = 1;
 
 		atomic_set(&lport->xmt_fcp_noxri, 0);
@@ -2510,25 +2117,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		atomic_set(&lport->cmpl_ls_err, 0);
 		atomic_set(&lport->fc4NvmeLsRequests, 0);
 		atomic_set(&lport->fc4NvmeLsCmpls, 0);
-
-		for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-			cstat = &lport->cstat[i];
-			atomic_set(&cstat->fc4NvmeInputRequests, 0);
-			atomic_set(&cstat->fc4NvmeOutputRequests, 0);
-			atomic_set(&cstat->fc4NvmeControlRequests, 0);
-			atomic_set(&cstat->fc4NvmeIoCmpls, 0);
-		}
-
-		/* Don't post more new bufs if repost already recovered
-		 * the nvme sgls.
-		 */
-		if (phba->sli4_hba.nvme_xri_cnt == 0) {
-			len  = lpfc_new_nvme_buf(vport,
-						 phba->sli4_hba.nvme_xri_max);
-			vport->phba->total_nvme_bufs += len;
-		}
-	} else {
-		kfree(cstat);
 	}
 
 	return ret;
@@ -2591,7 +2179,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 #if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
 	int ret;
 	DECLARE_COMPLETION_ONSTACK(lport_unreg_cmp);
 
@@ -2600,7 +2187,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 
 	localport = vport->localport;
 	lport = (struct lpfc_nvme_lport *)localport->private;
-	cstat = lport->cstat;
 
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6011 Destroying NVME localport %p\n",
@@ -2617,7 +2203,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	 */
 	lpfc_nvme_lport_unreg_wait(vport, lport, &lport_unreg_cmp);
 	vport->localport = NULL;
-	kfree(cstat);
 
 	/* Regardless of the unregister upcall response, clear
 	 * nvmei_support.  All rports are unregistered and the
@@ -2909,27 +2494,28 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  **/
 void
 lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
-			   struct sli4_wcqe_xri_aborted *axri)
+			   struct sli4_wcqe_xri_aborted *axri, int idx)
 {
 	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
-	struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd, *next_lpfc_ncmd;
 	struct nvmefc_fcp_req *nvme_cmd = NULL;
 	struct lpfc_nodelist *ndlp;
+	struct lpfc_sli4_hdw_queue *qp;
 	unsigned long iflag = 0;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
+	qp = &phba->sli4_hba.hdwq[idx];
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_lock(&qp->abts_nvme_buf_list_lock);
 	list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
-				 &phba->sli4_hba.lpfc_abts_nvme_buf_list,
-				 list) {
+				 &qp->lpfc_abts_nvme_buf_list, list) {
 		if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
 			list_del_init(&lpfc_ncmd->list);
+			qp->abts_nvme_io_bufs--;
 			lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 			lpfc_ncmd->status = IOSTAT_SUCCESS;
-			spin_unlock(
-				&phba->sli4_hba.abts_nvme_buf_list_lock);
+			spin_unlock(&qp->abts_nvme_buf_list_lock);
 
 			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			ndlp = lpfc_ncmd->ndlp;
@@ -2955,7 +2541,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
 			return;
 		}
 	}
-	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock(&qp->abts_nvme_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
@@ -2979,14 +2565,16 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
 	struct lpfc_sli_ring  *pring;
 	u32 i, wait_cnt = 0;
 
-	if (phba->sli_rev < LPFC_SLI_REV4 || !phba->sli4_hba.nvme_wq)
+	if (phba->sli_rev < LPFC_SLI_REV4 || !phba->sli4_hba.hdwq)
 		return;
 
 	/* Cycle through all NVME rings and make sure all outstanding
 	 * WQEs have been removed from the txcmplqs.
 	 */
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		if (!phba->sli4_hba.hdwq[i].nvme_wq)
+			continue;
+		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 
 		if (!pring)
 			continue;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index b234d0298994..593c48ff634e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -30,6 +30,9 @@
 #define LPFC_NVME_FB_SHIFT		9
 #define LPFC_NVME_MAX_FB		(1 << 20)	/* 1M */
 
+#define LPFC_MAX_NVME_INFO_TMP_LEN	100
+#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
+
 #define lpfc_ndlp_get_nrport(ndlp)					\
 	((!ndlp->nrport || (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG))	\
 	? NULL : ndlp->nrport)
@@ -40,19 +43,11 @@ struct lpfc_nvme_qhandle {
 	uint32_t cpu_id;	/* current cpu id at time of create */
 };
 
-struct lpfc_nvme_ctrl_stat {
-	atomic_t fc4NvmeInputRequests;
-	atomic_t fc4NvmeOutputRequests;
-	atomic_t fc4NvmeControlRequests;
-	atomic_t fc4NvmeIoCmpls;
-};
-
 /* Declare nvme-based local and remote port definitions. */
 struct lpfc_nvme_lport {
 	struct lpfc_vport *vport;
 	struct completion *lport_unreg_cmp;
 	/* Add stats counters here */
-	struct lpfc_nvme_ctrl_stat *cstat;
 	atomic_t fc4NvmeLsRequests;
 	atomic_t fc4NvmeLsCmpls;
 	atomic_t xmt_fcp_noxri;
@@ -76,57 +71,6 @@ struct lpfc_nvme_rport {
 	struct completion rport_unreg_done;
 };
 
-struct lpfc_nvme_buf {
-	struct list_head list;
-	struct nvmefc_fcp_req *nvmeCmd;
-	struct lpfc_nvme_rport *nrport;
-	struct lpfc_nodelist *ndlp;
-
-	uint32_t timeout;
-
-	uint16_t flags;  /* TBD convert exch_busy to flags */
-#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
-#define LPFC_BUMP_QDEPTH	0x2	/* bumped queue depth counter */
-	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
-	uint16_t status;	/* From IOCB Word 7- ulpStatus */
-	uint16_t cpu;
-	uint16_t qidx;
-	uint16_t sqid;
-	uint32_t result;	/* From IOCB Word 4. */
-
-	uint32_t   seg_cnt;	/* Number of scatter-gather segments returned by
-				 * dma_map_sg.  The driver needs this for calls
-				 * to dma_unmap_sg.
-				 */
-	dma_addr_t nonsg_phys;	/* Non scatter-gather physical address. */
-
-	/*
-	 * data and dma_handle are the kernel virtual and bus address of the
-	 * dma-able buffer containing the fcp_cmd, fcp_rsp and a scatter
-	 * gather bde list that supports the sg_tablesize value.
-	 */
-	void *data;
-	dma_addr_t dma_handle;
-
-	struct sli4_sge *nvme_sgl;
-	dma_addr_t dma_phys_sgl;
-
-	/* cur_iocbq has phys of the dma-able buffer.
-	 * Iotag is in here
-	 */
-	struct lpfc_iocbq cur_iocbq;
-
-	wait_queue_head_t *waitq;
-	unsigned long start_time;
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-	uint64_t ts_cmd_start;
-	uint64_t ts_last_cmd;
-	uint64_t ts_cmd_wqput;
-	uint64_t ts_isr_cmpl;
-	uint64_t ts_data_nvme;
-#endif
-};
-
 struct lpfc_nvme_fcpreq_priv {
-	struct lpfc_nvme_buf *nvme_buf;
+	struct lpfc_io_buf *nvme_buf;
 };
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 95fee83090eb..361e2b103648 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channsel Host Bus Adapters.                               *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -73,6 +73,9 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
 					   uint32_t, uint16_t);
 static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_nvmet_rcv_ctx *);
+static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *);
+
+static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf);
 
 static union lpfc_wqe128 lpfc_tsend_cmd_template;
 static union lpfc_wqe128 lpfc_treceive_cmd_template;
@@ -220,21 +223,19 @@ lpfc_nvmet_cmd_template(void)
 void
 lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
 {
-	unsigned long iflag;
+	lockdep_assert_held(&ctxp->ctxlock);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6313 NVMET Defer ctx release xri x%x flg x%x\n",
 			ctxp->oxid, ctxp->flag);
 
-	spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
-	if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
-		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
-				       iflag);
+	if (ctxp->flag & LPFC_NVMET_CTX_RLS)
 		return;
-	}
+
 	ctxp->flag |= LPFC_NVMET_CTX_RLS;
+	spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
-	spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
+	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 }
 
 /**
@@ -325,7 +326,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 	struct rqb_dmabuf *nvmebuf;
 	struct lpfc_nvmet_ctx_info *infop;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
+	uint32_t size, oxid, sid;
 	int cpu;
 	unsigned long iflag;
 
@@ -341,6 +342,20 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 				"6411 NVMET free, already free IO x%x: %d %d\n",
 				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
 	}
+
+	if (ctxp->rqb_buffer) {
+		nvmebuf = ctxp->rqb_buffer;
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		ctxp->rqb_buffer = NULL;
+		if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
+			ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ;
+			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		} else {
+			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+			lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
+		}
+	}
 	ctxp->state = LPFC_NVMET_STE_FREE;
 
 	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
@@ -388,46 +403,30 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 		}
 #endif
 		atomic_inc(&tgtp->rcv_fcp_cmd_in);
-		/*
-		 * The calling sequence should be:
-		 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
-		 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
-		 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
-		 * the NVME command / FC header is stored.
-		 * A buffer has already been reposted for this IO, so just free
-		 * the nvmebuf.
-		 */
-		rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
-					  payload, size);
 
-		/* Process FCP command */
-		if (rc == 0) {
-			ctxp->rqb_buffer = NULL;
-			atomic_inc(&tgtp->rcv_fcp_cmd_out);
-			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
-			return;
-		}
+		/*  flag new work queued, replacement buffer has already
+		 *  been reposted
+		 */
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ;
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
-		/* Processing of FCP command is deferred */
-		if (rc == -EOVERFLOW) {
-			lpfc_nvmeio_data(phba,
-					 "NVMET RCV BUSY: xri x%x sz %d "
-					 "from %06x\n",
-					 oxid, size, sid);
-			atomic_inc(&tgtp->rcv_fcp_cmd_out);
-			return;
+		if (!queue_work(phba->wq, &ctx_buf->defer_work)) {
+			atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6181 Unable to queue deferred work "
+					"for oxid x%x. "
+					"FCP Drop IO [x%x x%x x%x]\n",
+					ctxp->oxid,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+
+			spin_lock_irqsave(&ctxp->ctxlock, iflag);
+			lpfc_nvmet_defer_release(phba, ctxp);
+			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+			lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
 		}
-		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
-				ctxp->oxid, rc,
-				atomic_read(&tgtp->rcv_fcp_cmd_in),
-				atomic_read(&tgtp->rcv_fcp_cmd_out),
-				atomic_read(&tgtp->xmt_fcp_release));
-
-		lpfc_nvmet_defer_release(phba, ctxp);
-		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
-		nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
 		return;
 	}
 	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
@@ -744,16 +743,6 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 					ktime_get_ns();
 			}
 		}
-		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
-			id = smp_processor_id();
-			if (ctxp->cpu != id)
-				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-						"6703 CPU Check cmpl: "
-						"cpu %d expect %d\n",
-						id, ctxp->cpu);
-			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
-				phba->cpucheck_cmpl_io[id]++;
-		}
 #endif
 		rsp->done(rsp);
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -771,19 +760,22 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			ctxp->ts_isr_data = cmdwqe->isr_timestamp;
 			ctxp->ts_data_nvme = ktime_get_ns();
 		}
-		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
-			id = smp_processor_id();
+#endif
+		rsp->done(rsp);
+	}
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT) {
 			if (ctxp->cpu != id)
-				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
 						"6704 CPU Check cmdcmpl: "
 						"cpu %d expect %d\n",
 						id, ctxp->cpu);
-			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
-				phba->cpucheck_ccmpl_io[id]++;
+			phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_cmpl_io[id]++;
 		}
-#endif
-		rsp->done(rsp);
 	}
+#endif
 }
 
 static int
@@ -852,7 +844,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET LS  RESP: xri x%x wqidx x%x len x%x\n",
 			 ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen);
 
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, nvmewqeq);
+	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 		/*
 		 * Okay to repost buffer here, but wait till cmpl
@@ -908,18 +900,22 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		else
 			ctxp->ts_nvme_data = ktime_get_ns();
 	}
+
+	/* Setup the hdw queue if not already set */
+	if (!ctxp->hdwq)
+		ctxp->hdwq = &phba->sli4_hba.hdwq[rsp->hwqid];
+
 	if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
 		int id = smp_processor_id();
-		ctxp->cpu = id;
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_xmt_io[id]++;
-		if (rsp->hwqid != id) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-					"6705 CPU Check OP: "
-					"cpu %d expect %d\n",
-					id, rsp->hwqid);
-			ctxp->cpu = rsp->hwqid;
+		if (id < LPFC_CHECK_CPU_CNT) {
+			if (rsp->hwqid != id)
+				lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+						"6705 CPU Check OP: "
+						"cpu %d expect %d\n",
+						id, rsp->hwqid);
+			phba->sli4_hba.hdwq[rsp->hwqid].cpucheck_xmt_io[id]++;
 		}
+		ctxp->cpu = id; /* Setup cpu for cmpl check */
 	}
 #endif
 
@@ -954,7 +950,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
 	ctxp->flag |= LPFC_NVMET_IO_INP;
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
+	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (!ctxp->ts_cmd_nvme)
@@ -973,7 +969,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		 * WQE release CQE
 		 */
 		ctxp->flag |= LPFC_NVMET_DEFER_WQFULL;
-		wq = phba->sli4_hba.nvme_wq[rsp->hwqid];
+		wq = ctxp->hdwq->nvme_wq;
 		pring = wq->pring;
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		list_add_tail(&nvmewqeq->list, &wq->wqfull_list);
@@ -1024,6 +1020,9 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	if (phba->pport->load_flag & FC_UNLOADING)
 		return;
 
+	if (!ctxp->hdwq)
+		ctxp->hdwq = &phba->sli4_hba.hdwq[0];
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6103 NVMET Abort op: oxri x%x flg x%x ste %d\n",
 			ctxp->oxid, ctxp->flag, ctxp->state);
@@ -1034,7 +1033,6 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
 
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	ctxp->state = LPFC_NVMET_STE_ABORT;
 
 	/* Since iaab/iaar are NOT set, we need to check
 	 * if the firmware is in process of aborting IO
@@ -1046,13 +1044,14 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	ctxp->flag |= LPFC_NVMET_ABORT_OP;
 
 	if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) {
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
 						 ctxp->oxid);
-		wq = phba->sli4_hba.nvme_wq[ctxp->wqeq->hba_wqidx];
-		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+		wq = ctxp->hdwq->nvme_wq;
 		lpfc_nvmet_wqfull_flush(phba, wq, ctxp);
 		return;
 	}
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
 	/* An state of LPFC_NVMET_STE_RCV means we have just received
 	 * the NVME command and have not started processing it.
@@ -1064,7 +1063,6 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	else
 		lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
 					       ctxp->oxid);
-	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 }
 
 static void
@@ -1078,14 +1076,18 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	unsigned long flags;
 	bool aborting = false;
 
-	if (ctxp->state != LPFC_NVMET_STE_DONE &&
-	    ctxp->state != LPFC_NVMET_STE_ABORT) {
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
+	if (ctxp->flag & LPFC_NVMET_XBUSY)
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+				"6027 NVMET release with XBUSY flag x%x"
+				" oxid x%x\n",
+				ctxp->flag, ctxp->oxid);
+	else if (ctxp->state != LPFC_NVMET_STE_DONE &&
+		 ctxp->state != LPFC_NVMET_STE_ABORT)
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6413 NVMET release bad state %d %d oxid x%x\n",
 				ctxp->state, ctxp->entry_cnt, ctxp->oxid);
-	}
 
-	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if ((ctxp->flag & LPFC_NVMET_ABORT_OP) ||
 	    (ctxp->flag & LPFC_NVMET_XBUSY)) {
 		aborting = true;
@@ -1114,6 +1116,8 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
 	struct lpfc_hba *phba = ctxp->phba;
+	unsigned long iflag;
+
 
 	lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
 			 ctxp->oxid, ctxp->size, smp_processor_id());
@@ -1132,6 +1136,9 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
 
 	/* Free the nvmebuf since a new buffer already replaced it */
 	nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+	spin_lock_irqsave(&ctxp->ctxlock, iflag);
+	ctxp->rqb_buffer = NULL;
+	spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 }
 
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
@@ -1163,9 +1170,9 @@ __lpfc_nvmet_clean_io_for_cpu(struct lpfc_hba *phba,
 	spin_lock_irqsave(&infop->nvmet_ctx_list_lock, flags);
 	list_for_each_entry_safe(ctx_buf, next_ctx_buf,
 				&infop->nvmet_ctx_list, list) {
-		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del_init(&ctx_buf->list);
-		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 
 		__lpfc_clear_active_sglq(phba, ctx_buf->sglq->sli4_lxritag);
 		ctx_buf->sglq->state = SGL_FREED;
@@ -1195,9 +1202,9 @@ lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
 
 	/* Cycle the the entire CPU context list for every MRQ */
 	for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
-		for (j = 0; j < phba->sli4_hba.num_present_cpu; j++) {
+		for_each_present_cpu(j) {
+			infop = lpfc_get_ctx_list(phba, j, i);
 			__lpfc_nvmet_clean_io_for_cpu(phba, infop);
-			infop++; /* next */
 		}
 	}
 	kfree(phba->sli4_hba.nvmet_ctx_info);
@@ -1212,14 +1219,14 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 	union lpfc_wqe128 *wqe;
 	struct lpfc_nvmet_ctx_info *last_infop;
 	struct lpfc_nvmet_ctx_info *infop;
-	int i, j, idx;
+	int i, j, idx, cpu;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
 			"6403 Allocate NVMET resources for %d XRIs\n",
 			phba->sli4_hba.nvmet_xri_cnt);
 
 	phba->sli4_hba.nvmet_ctx_info = kcalloc(
-		phba->sli4_hba.num_present_cpu * phba->cfg_nvmet_mrq,
+		phba->sli4_hba.num_possible_cpu * phba->cfg_nvmet_mrq,
 		sizeof(struct lpfc_nvmet_ctx_info), GFP_KERNEL);
 	if (!phba->sli4_hba.nvmet_ctx_info) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -1247,13 +1254,12 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 	 * of the IO completion. Thus a context that was allocated for MRQ A
 	 * whose IO completed on CPU B will be freed to cpuB/mrqA.
 	 */
-	infop = phba->sli4_hba.nvmet_ctx_info;
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+	for_each_possible_cpu(i) {
 		for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
+			infop = lpfc_get_ctx_list(phba, i, j);
 			INIT_LIST_HEAD(&infop->nvmet_ctx_list);
 			spin_lock_init(&infop->nvmet_ctx_list_lock);
 			infop->nvmet_ctx_list_cnt = 0;
-			infop++;
 		}
 	}
 
@@ -1263,8 +1269,10 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 	 * MRQ 1 cycling thru CPUs 0 - X, and so on.
 	 */
 	for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
-		last_infop = lpfc_get_ctx_list(phba, 0, j);
-		for (i = phba->sli4_hba.num_present_cpu - 1;  i >= 0; i--) {
+		last_infop = lpfc_get_ctx_list(phba,
+					       cpumask_first(cpu_present_mask),
+					       j);
+		for (i = phba->sli4_hba.num_possible_cpu - 1;  i >= 0; i--) {
 			infop = lpfc_get_ctx_list(phba, i, j);
 			infop->nvmet_ctx_next_cpu = last_infop;
 			last_infop = infop;
@@ -1275,6 +1283,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 	 * received command on a per xri basis.
 	 */
 	idx = 0;
+	cpu = cpumask_first(cpu_present_mask);
 	for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
 		ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
 		if (!ctx_buf) {
@@ -1322,13 +1331,14 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 					"6407 Ran out of NVMET XRIs\n");
 			return -ENOMEM;
 		}
+		INIT_WORK(&ctx_buf->defer_work, lpfc_nvmet_fcp_rqst_defer_work);
 
 		/*
 		 * Add ctx to MRQidx context list. Our initial assumption
 		 * is MRQidx will be associated with CPUidx. This association
 		 * can change on the fly.
 		 */
-		infop = lpfc_get_ctx_list(phba, idx, idx);
+		infop = lpfc_get_ctx_list(phba, cpu, idx);
 		spin_lock(&infop->nvmet_ctx_list_lock);
 		list_add_tail(&ctx_buf->list, &infop->nvmet_ctx_list);
 		infop->nvmet_ctx_list_cnt++;
@@ -1336,11 +1346,18 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
 
 		/* Spread ctx structures evenly across all MRQs */
 		idx++;
-		if (idx >= phba->cfg_nvmet_mrq)
+		if (idx >= phba->cfg_nvmet_mrq) {
 			idx = 0;
+			cpu = cpumask_first(cpu_present_mask);
+			continue;
+		}
+		cpu = cpumask_next(cpu, cpu_present_mask);
+		if (cpu == nr_cpu_ids)
+			cpu = cpumask_first(cpu_present_mask);
+
 	}
 
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+	for_each_present_cpu(i) {
 		for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
 			infop = lpfc_get_ctx_list(phba, i, j);
 			lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
@@ -1378,7 +1395,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	 * allocate + 3, one for cmd, one for rsp and one for this alignment
 	 */
 	lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
-	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
+	lpfc_tgttemplate.max_hw_queues = phba->cfg_hdw_queue;
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP;
 
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
@@ -1503,13 +1520,14 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	}
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
 		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
+		spin_lock(&ctxp->ctxlock);
 		/* Check if we already received a free context call
 		 * and we have completed processing an abort situation.
 		 */
@@ -1519,7 +1537,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 			released = true;
 		}
 		ctxp->flag &= ~LPFC_NVMET_XBUSY;
-		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_unlock(&ctxp->ctxlock);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 
 		rrq_empty = list_empty(&phba->active_rrq_list);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
@@ -1543,14 +1562,13 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 			lpfc_worker_wake_up(phba);
 		return;
 	}
-	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 }
 
 int
 lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			   struct fc_frame_header *fc_hdr)
-
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_hba *phba = vport->phba;
@@ -1562,14 +1580,14 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	xri = be16_to_cpu(fc_hdr->fh_ox_id);
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
 		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
-		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
@@ -1590,7 +1608,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 		lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
 		return 0;
 	}
-	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 
 	lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
@@ -1658,6 +1676,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *nvmewqeq;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
 	unsigned long iflags;
 	int rc;
 
@@ -1671,7 +1690,8 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 		list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq,
 				 list);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
-		rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
+		ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2;
+		rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		if (rc == -EBUSY) {
 			/* WQ was full again, so put it back on the list */
@@ -1699,8 +1719,8 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		return;
 	if (phba->targetport) {
 		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
-			wq = phba->sli4_hba.nvme_wq[qidx];
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			wq = phba->sli4_hba.hdwq[qidx].nvme_wq;
 			lpfc_nvmet_wqfull_flush(phba, wq, NULL);
 		}
 		tgtp->tport_unreg_cmp = &tport_unreg_cmp;
@@ -1775,6 +1795,7 @@ dropit:
 	ctxp->state = LPFC_NVMET_STE_LS_RCV;
 	ctxp->entry_cnt = 1;
 	ctxp->rqb_buffer = (void *)nvmebuf;
+	ctxp->hdwq = &phba->sli4_hba.hdwq[0];
 
 	lpfc_nvmeio_data(phba, "NVMET LS   RCV: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
@@ -1814,6 +1835,86 @@ dropit:
 #endif
 }
 
+static void
+lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
+{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_hba *phba = ctxp->phba;
+	struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t *payload;
+	uint32_t rc;
+	unsigned long iflags;
+
+	if (!nvmebuf) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6159 process_rcv_fcp_req, nvmebuf is NULL, "
+			"oxid: x%x flg: x%x state: x%x\n",
+			ctxp->oxid, ctxp->flag, ctxp->state);
+		spin_lock_irqsave(&ctxp->ctxlock, iflags);
+		lpfc_nvmet_defer_release(phba, ctxp);
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						 ctxp->oxid);
+		return;
+	}
+
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	/*
+	 * The calling sequence should be:
+	 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
+	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
+	 * the NVME command / FC header is stored.
+	 * A buffer has already been reposted for this IO, so just free
+	 * the nvmebuf.
+	 */
+	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+				  payload, ctxp->size);
+	/* Process FCP command */
+	if (rc == 0) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		return;
+	}
+
+	/* Processing of FCP command is deferred */
+	if (rc == -EOVERFLOW) {
+		lpfc_nvmeio_data(phba, "NVMET RCV BUSY: xri x%x sz %d "
+				 "from %06x\n",
+				 ctxp->oxid, ctxp->size, ctxp->sid);
+		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		atomic_inc(&tgtp->defer_fod);
+		return;
+	}
+	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+			ctxp->oxid, rc,
+			atomic_read(&tgtp->rcv_fcp_cmd_in),
+			atomic_read(&tgtp->rcv_fcp_cmd_out),
+			atomic_read(&tgtp->xmt_fcp_release));
+	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
+			 ctxp->oxid, ctxp->size, ctxp->sid);
+	spin_lock_irqsave(&ctxp->ctxlock, iflags);
+	lpfc_nvmet_defer_release(phba, ctxp);
+	spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
+	lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
+#endif
+}
+
+static void
+lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *work)
+{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_nvmet_ctxbuf *ctx_buf =
+		container_of(work, struct lpfc_nvmet_ctxbuf, defer_work);
+
+	lpfc_nvmet_process_rcv_fcp_req(ctx_buf);
+#endif
+}
+
 static struct lpfc_nvmet_ctxbuf *
 lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
 			     struct lpfc_nvmet_ctx_info *current_infop)
@@ -1838,7 +1939,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
 	else
 		get_infop = current_infop->nvmet_ctx_next_cpu;
 
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+	for (i = 0; i < phba->sli4_hba.num_possible_cpu; i++) {
 		if (get_infop == current_infop) {
 			get_infop = get_infop->nvmet_ctx_next_cpu;
 			continue;
@@ -1896,12 +1997,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	struct lpfc_nvmet_ctx_info *current_infop;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc, qno;
+	uint32_t size, oxid, sid, qno;
 	unsigned long iflag;
 	int current_cpu;
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-	uint32_t id;
-#endif
 
 	if (!IS_ENABLED(CONFIG_NVME_TARGET_FC))
 		return;
@@ -1910,11 +2008,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6157 NVMET FCP Drop IO\n");
-		oxid = 0;
-		size = 0;
-		sid = 0;
-		ctxp = NULL;
-		goto dropit;
+		if (nvmebuf)
+			lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
+		return;
 	}
 
 	/*
@@ -1942,9 +2038,14 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
-		id = smp_processor_id();
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_rcv_io[id]++;
+		if (current_cpu < LPFC_CHECK_CPU_CNT) {
+			if (idx != current_cpu)
+				lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+						"6703 CPU Check rcv: "
+						"cpu %d expect %d\n",
+						current_cpu, idx);
+			phba->sli4_hba.hdwq[idx].cpucheck_rcv_io[current_cpu]++;
+		}
 	}
 #endif
 
@@ -1995,6 +2096,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->flag = 0;
 	ctxp->ctxbuf = ctx_buf;
 	ctxp->rqb_buffer = (void *)nvmebuf;
+	ctxp->hdwq = NULL;
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -2015,67 +2117,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 #endif
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
-	/*
-	 * The calling sequence should be:
-	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
-	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
-	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info in
-	 * the NVME command / FC header is stored, so we are free to repost
-	 * the buffer.
-	 */
-	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
-				  payload, size);
-
-	/* Process FCP command */
-	if (rc == 0) {
-		ctxp->rqb_buffer = NULL;
-		atomic_inc(&tgtp->rcv_fcp_cmd_out);
-		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
-		return;
-	}
-
-	/* Processing of FCP command is deferred */
-	if (rc == -EOVERFLOW) {
-		/*
-		 * Post a brand new DMA buffer to RQ and defer
-		 * freeing rcv buffer till .defer_rcv callback
-		 */
-		qno = nvmebuf->idx;
-		lpfc_post_rq_buffer(
-			phba, phba->sli4_hba.nvmet_mrq_hdr[qno],
-			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
-
-		lpfc_nvmeio_data(phba,
-				 "NVMET RCV BUSY: xri x%x sz %d from %06x\n",
-				 oxid, size, sid);
-		atomic_inc(&tgtp->rcv_fcp_cmd_out);
-		atomic_inc(&tgtp->defer_fod);
-		return;
-	}
-	ctxp->rqb_buffer = nvmebuf;
-
-	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
-			ctxp->oxid, rc,
-			atomic_read(&tgtp->rcv_fcp_cmd_in),
-			atomic_read(&tgtp->rcv_fcp_cmd_out),
-			atomic_read(&tgtp->xmt_fcp_release));
-dropit:
-	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
-			 oxid, size, sid);
-	if (oxid) {
-		lpfc_nvmet_defer_release(phba, ctxp);
-		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
-		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
-		return;
-	}
-
-	if (ctx_buf)
-		lpfc_nvmet_ctxbuf_post(phba, ctx_buf);
-
-	if (nvmebuf)
-		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
+	lpfc_nvmet_process_rcv_fcp_req(ctx_buf);
 }
 
 /**
@@ -2660,15 +2702,17 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
 		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	ctxp->state = LPFC_NVMET_STE_DONE;
 
 	/* Check if we already received a free context call
 	 * and we have completed processing an abort situation.
 	 */
-	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
 	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del(&ctxp->list);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
@@ -2734,6 +2778,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
 		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
@@ -2748,10 +2793,11 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	 * and we have completed processing an abort situation.
 	 */
 	ctxp->state = LPFC_NVMET_STE_DONE;
-	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
 	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del(&ctxp->list);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
@@ -2957,12 +3003,15 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
+		spin_lock_irqsave(&ctxp->ctxlock, flags);
 		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
 
 	/* Issue ABTS for this WQE based on iotag */
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if (!ctxp->abort_wqeq) {
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -2970,11 +3019,13 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"xri: x%x\n", ctxp->oxid);
 		/* No failure to an ABTS request. */
 		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
 	abts_wqeq = ctxp->abort_wqeq;
 	abts_wqe = &abts_wqeq->wqe;
 	ctxp->state = LPFC_NVMET_STE_ABORT;
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
 	/* Announce entry to new IO submit field. */
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
@@ -2995,7 +3046,9 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"NVME Req now. hba_flag x%x oxid x%x\n",
 				phba->hba_flag, ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		spin_lock_irqsave(&ctxp->ctxlock, flags);
 		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
 
@@ -3008,7 +3061,9 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"still pending on oxid x%x\n",
 				ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		spin_lock_irqsave(&ctxp->ctxlock, flags);
 		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return 0;
 	}
 
@@ -3052,7 +3107,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->iocb_flag |= LPFC_IO_NVME;
 	abts_wqeq->context2 = ctxp;
 	abts_wqeq->vport = phba->pport;
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	if (!ctxp->hdwq)
+		ctxp->hdwq = &phba->sli4_hba.hdwq[abts_wqeq->hba_wqidx];
+
+	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
 		atomic_inc(&tgtp->xmt_abort_sol);
@@ -3060,7 +3118,9 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	}
 
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6166 Failed ABORT issue_wqe with status x%x "
@@ -3069,7 +3129,6 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	return 1;
 }
 
-
 static int
 lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 				 struct lpfc_nvmet_rcv_ctx *ctxp,
@@ -3078,6 +3137,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct lpfc_iocbq *abts_wqeq;
 	unsigned long flags;
+	bool released = false;
 	int rc;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
@@ -3104,7 +3164,10 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp;
 	abts_wqeq->iocb_cmpl = NULL;
 	abts_wqeq->iocb_flag |= LPFC_IO_NVMET;
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	if (!ctxp->hdwq)
+		ctxp->hdwq = &phba->sli4_hba.hdwq[abts_wqeq->hba_wqidx];
+
+	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
 		return 0;
@@ -3112,8 +3175,12 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 aerr:
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
-	if (ctxp->flag & LPFC_NVMET_CTX_RLS)
+	if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
+		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		list_del(&ctxp->list);
+		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
+		released = true;
+	}
 	ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS);
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
@@ -3121,7 +3188,8 @@ aerr:
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
 			ctxp->oxid, rc);
-	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
+	if (released)
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 	return 1;
 }
 
@@ -3173,7 +3241,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_abort_cmp;
 	abts_wqeq->iocb_cmpl = 0;
 	abts_wqeq->iocb_flag |=  LPFC_IO_NVME_LS;
-	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
+	rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
 		atomic_inc(&tgtp->xmt_abort_unsol);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 0ec1082ce7ef..368deea2bcf8 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -137,9 +137,11 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_XBUSY		0x4  /* XB bit set on IO cmpl */
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
+#define LPFC_NVMET_CTX_REUSE_WQ		0x20  /* ctx reused via WQ */
 #define LPFC_NVMET_DEFER_WQFULL		0x40  /* Waiting on a free WQE */
 	struct rqb_dmabuf *rqb_buffer;
 	struct lpfc_nvmet_ctxbuf *ctxbuf;
+	struct lpfc_sli4_hdw_queue *hdwq;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t ts_isr_cmd;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index b4f1a840b3b4..c98f264f1d83 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -83,9 +83,9 @@ lpfc_rport_data_from_scsi_device(struct scsi_device *sdev)
 }
 
 static void
-lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb);
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *psb);
 static void
-lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb);
+lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *psb);
 static int
 lpfc_prot_group_type(struct lpfc_hba *phba, struct scsi_cmnd *sc);
 
@@ -180,9 +180,9 @@ lpfc_cmd_guard_csum(struct scsi_cmnd *sc)
  **/
 static void
 lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
-				struct lpfc_scsi_buf *lpfc_cmd)
+				struct lpfc_io_buf *lpfc_cmd)
 {
-	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 	if (sgl) {
 		sgl += 1;
 		sgl->word2 = le32_to_cpu(sgl->word2);
@@ -200,7 +200,7 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
  * function updates the statistical data for the command completion.
  **/
 static void
-lpfc_update_stats(struct lpfc_hba *phba, struct  lpfc_scsi_buf *lpfc_cmd)
+lpfc_update_stats(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	struct lpfc_rport_data *rdata;
 	struct lpfc_nodelist *pnode;
@@ -389,12 +389,12 @@ static int
 lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 {
 	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_scsi_buf *psb;
+	struct lpfc_io_buf *psb;
 	struct ulp_bde64 *bpl;
 	IOCB_t *iocb;
 	dma_addr_t pdma_phys_fcp_cmd;
 	dma_addr_t pdma_phys_fcp_rsp;
-	dma_addr_t pdma_phys_bpl;
+	dma_addr_t pdma_phys_sgl;
 	uint16_t iotag;
 	int bcnt, bpl_size;
 
@@ -408,7 +408,7 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 			 (int)sizeof(struct fcp_rsp), bpl_size);
 
 	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
-		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
+		psb = kzalloc(sizeof(struct lpfc_io_buf), GFP_KERNEL);
 		if (!psb)
 			break;
 
@@ -438,14 +438,14 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 
 		psb->fcp_cmnd = psb->data;
 		psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd);
-		psb->fcp_bpl = psb->data + sizeof(struct fcp_cmnd) +
+		psb->dma_sgl = psb->data + sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp);
 
 		/* Initialize local short-hand pointers. */
-		bpl = psb->fcp_bpl;
+		bpl = (struct ulp_bde64 *)psb->dma_sgl;
 		pdma_phys_fcp_cmd = psb->dma_handle;
 		pdma_phys_fcp_rsp = psb->dma_handle + sizeof(struct fcp_cmnd);
-		pdma_phys_bpl = psb->dma_handle + sizeof(struct fcp_cmnd) +
+		pdma_phys_sgl = psb->dma_handle + sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp);
 
 		/*
@@ -496,9 +496,9 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 			iocb->un.fcpi64.bdl.bdeSize =
 					(2 * sizeof(struct ulp_bde64));
 			iocb->un.fcpi64.bdl.addrLow =
-					putPaddrLow(pdma_phys_bpl);
+					putPaddrLow(pdma_phys_sgl);
 			iocb->un.fcpi64.bdl.addrHigh =
-					putPaddrHigh(pdma_phys_bpl);
+					putPaddrHigh(pdma_phys_sgl);
 			iocb->ulpBdeCount = 1;
 			iocb->ulpLe = 1;
 		}
@@ -506,6 +506,7 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		psb->status = IOSTAT_SUCCESS;
 		/* Put it back into the SCSI buffer list */
 		psb->cur_iocbq.context1  = psb;
+		spin_lock_init(&psb->buf_lock);
 		lpfc_release_scsi_buf_s3(phba, psb);
 
 	}
@@ -524,20 +525,27 @@ void
 lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport)
 {
 	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_scsi_buf *psb, *next_psb;
+	struct lpfc_io_buf *psb, *next_psb;
+	struct lpfc_sli4_hdw_queue *qp;
 	unsigned long iflag = 0;
+	int idx;
 
-	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+	if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 		return;
+
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	list_for_each_entry_safe(psb, next_psb,
-				&phba->sli4_hba.lpfc_abts_scsi_buf_list, list) {
-		if (psb->rdata && psb->rdata->pnode
-			&& psb->rdata->pnode->vport == vport)
-			psb->rdata = NULL;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		qp = &phba->sli4_hba.hdwq[idx];
+
+		spin_lock(&qp->abts_scsi_buf_list_lock);
+		list_for_each_entry_safe(psb, next_psb,
+					 &qp->lpfc_abts_scsi_buf_list, list) {
+			if (psb->rdata && psb->rdata->pnode &&
+			    psb->rdata->pnode->vport == vport)
+				psb->rdata = NULL;
+		}
+		spin_unlock(&qp->abts_scsi_buf_list_lock);
 	}
-	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 }
 
@@ -551,11 +559,12 @@ lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport)
  **/
 void
 lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
-			  struct sli4_wcqe_xri_aborted *axri)
+			  struct sli4_wcqe_xri_aborted *axri, int idx)
 {
 	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
 	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
-	struct lpfc_scsi_buf *psb, *next_psb;
+	struct lpfc_io_buf *psb, *next_psb;
+	struct lpfc_sli4_hdw_queue *qp;
 	unsigned long iflag = 0;
 	struct lpfc_iocbq *iocbq;
 	int i;
@@ -565,16 +574,19 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 		return;
+
+	qp = &phba->sli4_hba.hdwq[idx];
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	spin_lock(&qp->abts_scsi_buf_list_lock);
 	list_for_each_entry_safe(psb, next_psb,
-		&phba->sli4_hba.lpfc_abts_scsi_buf_list, list) {
+		&qp->lpfc_abts_scsi_buf_list, list) {
 		if (psb->cur_iocbq.sli4_xritag == xri) {
 			list_del(&psb->list);
+			qp->abts_scsi_io_bufs--;
 			psb->exch_busy = 0;
 			psb->status = IOSTAT_SUCCESS;
 			spin_unlock(
-				&phba->sli4_hba.abts_scsi_buf_list_lock);
+				&qp->abts_scsi_buf_list_lock);
 			if (psb->rdata && psb->rdata->pnode)
 				ndlp = psb->rdata->pnode;
 			else
@@ -593,7 +605,7 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 			return;
 		}
 	}
-	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	spin_unlock(&qp->abts_scsi_buf_list_lock);
 	for (i = 1; i <= phba->sli.last_iotag; i++) {
 		iocbq = phba->sli.iocbq_lookup[i];
 
@@ -602,7 +614,7 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 			continue;
 		if (iocbq->sli4_xritag != xri)
 			continue;
-		psb = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
+		psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
 		psb->exch_busy = 0;
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		if (!list_empty(&pring->txq))
@@ -614,359 +626,6 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 }
 
 /**
- * lpfc_sli4_post_scsi_sgl_list - Post blocks of scsi buffer sgls from a list
- * @phba: pointer to lpfc hba data structure.
- * @post_sblist: pointer to the scsi buffer list.
- *
- * This routine walks a list of scsi buffers that was passed in. It attempts
- * to construct blocks of scsi buffer sgls which contains contiguous xris and
- * uses the non-embedded SGL block post mailbox commands to post to the port.
- * For single SCSI buffer sgl with non-contiguous xri, if any, it shall use
- * embedded SGL post mailbox command for posting. The @post_sblist passed in
- * must be local list, thus no lock is needed when manipulate the list.
- *
- * Returns: 0 = failure, non-zero number of successfully posted buffers.
- **/
-static int
-lpfc_sli4_post_scsi_sgl_list(struct lpfc_hba *phba,
-			     struct list_head *post_sblist, int sb_count)
-{
-	struct lpfc_scsi_buf *psb, *psb_next;
-	int status, sgl_size;
-	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
-	dma_addr_t pdma_phys_bpl1;
-	int last_xritag = NO_XRI;
-	LIST_HEAD(prep_sblist);
-	LIST_HEAD(blck_sblist);
-	LIST_HEAD(scsi_sblist);
-
-	/* sanity check */
-	if (sb_count <= 0)
-		return -EINVAL;
-
-	sgl_size = phba->cfg_sg_dma_buf_size -
-		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
-
-	list_for_each_entry_safe(psb, psb_next, post_sblist, list) {
-		list_del_init(&psb->list);
-		block_cnt++;
-		if ((last_xritag != NO_XRI) &&
-		    (psb->cur_iocbq.sli4_xritag != last_xritag + 1)) {
-			/* a hole in xri block, form a sgl posting block */
-			list_splice_init(&prep_sblist, &blck_sblist);
-			post_cnt = block_cnt - 1;
-			/* prepare list for next posting block */
-			list_add_tail(&psb->list, &prep_sblist);
-			block_cnt = 1;
-		} else {
-			/* prepare list for next posting block */
-			list_add_tail(&psb->list, &prep_sblist);
-			/* enough sgls for non-embed sgl mbox command */
-			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
-				list_splice_init(&prep_sblist, &blck_sblist);
-				post_cnt = block_cnt;
-				block_cnt = 0;
-			}
-		}
-		num_posting++;
-		last_xritag = psb->cur_iocbq.sli4_xritag;
-
-		/* end of repost sgl list condition for SCSI buffers */
-		if (num_posting == sb_count) {
-			if (post_cnt == 0) {
-				/* last sgl posting block */
-				list_splice_init(&prep_sblist, &blck_sblist);
-				post_cnt = block_cnt;
-			} else if (block_cnt == 1) {
-				/* last single sgl with non-contiguous xri */
-				if (sgl_size > SGL_PAGE_SIZE)
-					pdma_phys_bpl1 = psb->dma_phys_bpl +
-								SGL_PAGE_SIZE;
-				else
-					pdma_phys_bpl1 = 0;
-				status = lpfc_sli4_post_sgl(phba,
-						psb->dma_phys_bpl,
-						pdma_phys_bpl1,
-						psb->cur_iocbq.sli4_xritag);
-				if (status) {
-					/* failure, put on abort scsi list */
-					psb->exch_busy = 1;
-				} else {
-					/* success, put on SCSI buffer list */
-					psb->exch_busy = 0;
-					psb->status = IOSTAT_SUCCESS;
-					num_posted++;
-				}
-				/* success, put on SCSI buffer sgl list */
-				list_add_tail(&psb->list, &scsi_sblist);
-			}
-		}
-
-		/* continue until a nembed page worth of sgls */
-		if (post_cnt == 0)
-			continue;
-
-		/* post block of SCSI buffer list sgls */
-		status = lpfc_sli4_post_scsi_sgl_block(phba, &blck_sblist,
-						       post_cnt);
-
-		/* don't reset xirtag due to hole in xri block */
-		if (block_cnt == 0)
-			last_xritag = NO_XRI;
-
-		/* reset SCSI buffer post count for next round of posting */
-		post_cnt = 0;
-
-		/* put posted SCSI buffer-sgl posted on SCSI buffer sgl list */
-		while (!list_empty(&blck_sblist)) {
-			list_remove_head(&blck_sblist, psb,
-					 struct lpfc_scsi_buf, list);
-			if (status) {
-				/* failure, put on abort scsi list */
-				psb->exch_busy = 1;
-			} else {
-				/* success, put on SCSI buffer list */
-				psb->exch_busy = 0;
-				psb->status = IOSTAT_SUCCESS;
-				num_posted++;
-			}
-			list_add_tail(&psb->list, &scsi_sblist);
-		}
-	}
-	/* Push SCSI buffers with sgl posted to the availble list */
-	while (!list_empty(&scsi_sblist)) {
-		list_remove_head(&scsi_sblist, psb,
-				 struct lpfc_scsi_buf, list);
-		lpfc_release_scsi_buf_s4(phba, psb);
-	}
-	return num_posted;
-}
-
-/**
- * lpfc_sli4_repost_scsi_sgl_list - Repost all the allocated scsi buffer sgls
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine walks the list of scsi buffers that have been allocated and
- * repost them to the port by using SGL block post. This is needed after a
- * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
- * is responsible for moving all scsi buffers on the lpfc_abts_scsi_sgl_list
- * to the lpfc_scsi_buf_list. If the repost fails, reject all scsi buffers.
- *
- * Returns: 0 = success, non-zero failure.
- **/
-int
-lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
-{
-	LIST_HEAD(post_sblist);
-	int num_posted, rc = 0;
-
-	/* get all SCSI buffers need to repost to a local list */
-	spin_lock_irq(&phba->scsi_buf_list_get_lock);
-	spin_lock(&phba->scsi_buf_list_put_lock);
-	list_splice_init(&phba->lpfc_scsi_buf_list_get, &post_sblist);
-	list_splice(&phba->lpfc_scsi_buf_list_put, &post_sblist);
-	spin_unlock(&phba->scsi_buf_list_put_lock);
-	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
-
-	/* post the list of scsi buffer sgls to port if available */
-	if (!list_empty(&post_sblist)) {
-		num_posted = lpfc_sli4_post_scsi_sgl_list(phba, &post_sblist,
-						phba->sli4_hba.scsi_xri_cnt);
-		/* failed to post any scsi buffer, return error */
-		if (num_posted == 0)
-			rc = -EIO;
-	}
-	return rc;
-}
-
-/**
- * lpfc_new_scsi_buf_s4 - Scsi buffer allocator for HBA with SLI4 IF spec
- * @vport: The virtual port for which this call being executed.
- * @num_to_allocate: The requested number of buffers to allocate.
- *
- * This routine allocates scsi buffers for device with SLI-4 interface spec,
- * the scsi buffer contains all the necessary information needed to initiate
- * a SCSI I/O. After allocating up to @num_to_allocate SCSI buffers and put
- * them on a list, it post them to the port by using SGL block post.
- *
- * Return codes:
- *   int - number of scsi buffers that were allocated and posted.
- *   0 = failure, less than num_to_alloc is a partial failure.
- **/
-static int
-lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
-{
-	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_scsi_buf *psb;
-	struct sli4_sge *sgl;
-	IOCB_t *iocb;
-	dma_addr_t pdma_phys_fcp_cmd;
-	dma_addr_t pdma_phys_fcp_rsp;
-	dma_addr_t pdma_phys_bpl;
-	uint16_t iotag, lxri = 0;
-	int bcnt, num_posted, sgl_size;
-	LIST_HEAD(prep_sblist);
-	LIST_HEAD(post_sblist);
-	LIST_HEAD(scsi_sblist);
-
-	sgl_size = phba->cfg_sg_dma_buf_size -
-		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
-
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
-			 "9068 ALLOC %d scsi_bufs: %d (%d + %d + %d)\n",
-			 num_to_alloc, phba->cfg_sg_dma_buf_size, sgl_size,
-			 (int)sizeof(struct fcp_cmnd),
-			 (int)sizeof(struct fcp_rsp));
-
-	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
-		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
-		if (!psb)
-			break;
-		/*
-		 * Get memory from the pci pool to map the virt space to
-		 * pci bus space for an I/O. The DMA buffer includes space
-		 * for the struct fcp_cmnd, struct fcp_rsp and the number
-		 * of bde's necessary to support the sg_tablesize.
-		 */
-		psb->data = dma_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
-						GFP_KERNEL, &psb->dma_handle);
-		if (!psb->data) {
-			kfree(psb);
-			break;
-		}
-
-		/*
-		 * 4K Page alignment is CRITICAL to BlockGuard, double check
-		 * to be sure.
-		 */
-		if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
-		    (((unsigned long)(psb->data) &
-		    (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-					"3369 Memory alignment error "
-					"addr=%lx\n",
-					(unsigned long)psb->data);
-			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-				      psb->data, psb->dma_handle);
-			kfree(psb);
-			break;
-		}
-
-
-		lxri = lpfc_sli4_next_xritag(phba);
-		if (lxri == NO_XRI) {
-			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-				      psb->data, psb->dma_handle);
-			kfree(psb);
-			break;
-		}
-
-		/* Allocate iotag for psb->cur_iocbq. */
-		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
-		if (iotag == 0) {
-			dma_pool_free(phba->lpfc_sg_dma_buf_pool,
-				      psb->data, psb->dma_handle);
-			kfree(psb);
-			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-					"3368 Failed to allocate IOTAG for"
-					" XRI:0x%x\n", lxri);
-			lpfc_sli4_free_xri(phba, lxri);
-			break;
-		}
-		psb->cur_iocbq.sli4_lxritag = lxri;
-		psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
-		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
-		psb->fcp_bpl = psb->data;
-		psb->fcp_cmnd = (psb->data + sgl_size);
-		psb->fcp_rsp = (struct fcp_rsp *)((uint8_t *)psb->fcp_cmnd +
-					sizeof(struct fcp_cmnd));
-
-		/* Initialize local short-hand pointers. */
-		sgl = (struct sli4_sge *)psb->fcp_bpl;
-		pdma_phys_bpl = psb->dma_handle;
-		pdma_phys_fcp_cmd = (psb->dma_handle + sgl_size);
-		pdma_phys_fcp_rsp = pdma_phys_fcp_cmd + sizeof(struct fcp_cmnd);
-
-		/*
-		 * The first two bdes are the FCP_CMD and FCP_RSP.
-		 * The balance are sg list bdes. Initialize the
-		 * first two and leave the rest for queuecommand.
-		 */
-		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
-		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
-		sgl->word2 = le32_to_cpu(sgl->word2);
-		bf_set(lpfc_sli4_sge_last, sgl, 0);
-		sgl->word2 = cpu_to_le32(sgl->word2);
-		sgl->sge_len = cpu_to_le32(sizeof(struct fcp_cmnd));
-		sgl++;
-
-		/* Setup the physical region for the FCP RSP */
-		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_rsp));
-		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_rsp));
-		sgl->word2 = le32_to_cpu(sgl->word2);
-		bf_set(lpfc_sli4_sge_last, sgl, 1);
-		sgl->word2 = cpu_to_le32(sgl->word2);
-		sgl->sge_len = cpu_to_le32(sizeof(struct fcp_rsp));
-
-		/*
-		 * Since the IOCB for the FCP I/O is built into this
-		 * lpfc_scsi_buf, initialize it with all known data now.
-		 */
-		iocb = &psb->cur_iocbq.iocb;
-		iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
-		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
-		/* setting the BLP size to 2 * sizeof BDE may not be correct.
-		 * We are setting the bpl to point to out sgl. An sgl's
-		 * entries are 16 bytes, a bpl entries are 12 bytes.
-		 */
-		iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
-		iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_fcp_cmd);
-		iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_fcp_cmd);
-		iocb->ulpBdeCount = 1;
-		iocb->ulpLe = 1;
-		iocb->ulpClass = CLASS3;
-		psb->cur_iocbq.context1 = psb;
-		psb->dma_phys_bpl = pdma_phys_bpl;
-
-		/* add the scsi buffer to a post list */
-		list_add_tail(&psb->list, &post_sblist);
-		spin_lock_irq(&phba->scsi_buf_list_get_lock);
-		phba->sli4_hba.scsi_xri_cnt++;
-		spin_unlock_irq(&phba->scsi_buf_list_get_lock);
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_BG | LOG_FCP,
-			"3021 Allocate %d out of %d requested new SCSI "
-			"buffers\n", bcnt, num_to_alloc);
-
-	/* post the list of scsi buffer sgls to port if available */
-	if (!list_empty(&post_sblist))
-		num_posted = lpfc_sli4_post_scsi_sgl_list(phba,
-							  &post_sblist, bcnt);
-	else
-		num_posted = 0;
-
-	return num_posted;
-}
-
-/**
- * lpfc_new_scsi_buf - Wrapper funciton for scsi buffer allocator
- * @vport: The virtual port for which this call being executed.
- * @num_to_allocate: The requested number of buffers to allocate.
- *
- * This routine wraps the actual SCSI buffer allocator function pointer from
- * the lpfc_hba struct.
- *
- * Return codes:
- *   int - number of scsi buffers that were allocated.
- *   0 = failure, less than num_to_alloc is a partial failure.
- **/
-static inline int
-lpfc_new_scsi_buf(struct lpfc_vport *vport, int num_to_alloc)
-{
-	return vport->phba->lpfc_new_scsi_buf(vport, num_to_alloc);
-}
-
-/**
  * lpfc_get_scsi_buf_s3 - Get a scsi buffer from lpfc_scsi_buf_list of the HBA
  * @phba: The HBA for which this call is being executed.
  *
@@ -977,15 +636,16 @@ lpfc_new_scsi_buf(struct lpfc_vport *vport, int num_to_alloc)
  *   NULL - Error
  *   Pointer to lpfc_scsi_buf - Success
  **/
-static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+static struct lpfc_io_buf *
+lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		     struct scsi_cmnd *cmnd)
 {
-	struct  lpfc_scsi_buf * lpfc_cmd = NULL;
+	struct lpfc_io_buf *lpfc_cmd = NULL;
 	struct list_head *scsi_buf_list_get = &phba->lpfc_scsi_buf_list_get;
 	unsigned long iflag = 0;
 
 	spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag);
-	list_remove_head(scsi_buf_list_get, lpfc_cmd, struct lpfc_scsi_buf,
+	list_remove_head(scsi_buf_list_get, lpfc_cmd, struct lpfc_io_buf,
 			 list);
 	if (!lpfc_cmd) {
 		spin_lock(&phba->scsi_buf_list_put_lock);
@@ -993,7 +653,7 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 			    &phba->lpfc_scsi_buf_list_get);
 		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
 		list_remove_head(scsi_buf_list_get, lpfc_cmd,
-				 struct lpfc_scsi_buf, list);
+				 struct lpfc_io_buf, list);
 		spin_unlock(&phba->scsi_buf_list_put_lock);
 	}
 	spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag);
@@ -1005,52 +665,107 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	return  lpfc_cmd;
 }
 /**
- * lpfc_get_scsi_buf_s4 - Get a scsi buffer from lpfc_scsi_buf_list of the HBA
+ * lpfc_get_scsi_buf_s4 - Get a scsi buffer from io_buf_list of the HBA
  * @phba: The HBA for which this call is being executed.
  *
- * This routine removes a scsi buffer from head of @phba lpfc_scsi_buf_list list
+ * This routine removes a scsi buffer from head of @hdwq io_buf_list
  * and returns to caller.
  *
  * Return codes:
  *   NULL - Error
  *   Pointer to lpfc_scsi_buf - Success
  **/
-static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+static struct lpfc_io_buf *
+lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		     struct scsi_cmnd *cmnd)
 {
-	struct lpfc_scsi_buf *lpfc_cmd, *lpfc_cmd_next;
-	unsigned long iflag = 0;
-	int found = 0;
+	struct lpfc_io_buf *lpfc_cmd;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct sli4_sge *sgl;
+	IOCB_t *iocb;
+	dma_addr_t pdma_phys_fcp_rsp;
+	dma_addr_t pdma_phys_fcp_cmd;
+	uint32_t sgl_size, cpu, idx;
+	int tag;
 
-	spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag);
-	list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
-				 &phba->lpfc_scsi_buf_list_get, list) {
-		if (lpfc_test_rrq_active(phba, ndlp,
-					 lpfc_cmd->cur_iocbq.sli4_lxritag))
-			continue;
-		list_del_init(&lpfc_cmd->list);
-		found = 1;
-		break;
-	}
-	if (!found) {
-		spin_lock(&phba->scsi_buf_list_put_lock);
-		list_splice(&phba->lpfc_scsi_buf_list_put,
-			    &phba->lpfc_scsi_buf_list_get);
-		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-		spin_unlock(&phba->scsi_buf_list_put_lock);
-		list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
-					 &phba->lpfc_scsi_buf_list_get, list) {
-			if (lpfc_test_rrq_active(
-				phba, ndlp, lpfc_cmd->cur_iocbq.sli4_lxritag))
-				continue;
-			list_del_init(&lpfc_cmd->list);
-			found = 1;
-			break;
-		}
+	cpu = smp_processor_id();
+	if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
+		tag = blk_mq_unique_tag(cmnd->request);
+		idx = blk_mq_unique_tag_to_hwq(tag);
+	} else {
+		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
 	}
-	spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag);
-	if (!found)
+
+	lpfc_cmd = lpfc_get_io_buf(phba, ndlp, idx,
+				   !phba->cfg_xri_rebalancing);
+	if (!lpfc_cmd) {
+		qp = &phba->sli4_hba.hdwq[idx];
+		qp->empty_io_bufs++;
 		return NULL;
+	}
+
+	sgl_size = phba->cfg_sg_dma_buf_size -
+		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+
+	/* Setup key fields in buffer that may have been changed
+	 * if other protocols used this buffer.
+	 */
+	lpfc_cmd->cur_iocbq.iocb_flag = LPFC_IO_FCP;
+	lpfc_cmd->prot_seg_cnt = 0;
+	lpfc_cmd->seg_cnt = 0;
+	lpfc_cmd->timeout = 0;
+	lpfc_cmd->flags = 0;
+	lpfc_cmd->start_time = jiffies;
+	lpfc_cmd->waitq = NULL;
+	lpfc_cmd->cpu = cpu;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	lpfc_cmd->prot_data_type = 0;
+#endif
+	lpfc_cmd->fcp_cmnd = (lpfc_cmd->data + sgl_size);
+	lpfc_cmd->fcp_rsp = (struct fcp_rsp *)((uint8_t *)lpfc_cmd->fcp_cmnd +
+				sizeof(struct fcp_cmnd));
+
+	/*
+	 * The first two SGEs are the FCP_CMD and FCP_RSP.
+	 * The balance are sg list bdes. Initialize the
+	 * first two and leave the rest for queuecommand.
+	 */
+	sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
+	pdma_phys_fcp_cmd = (lpfc_cmd->dma_handle + sgl_size);
+	sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
+	sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
+	sgl->word2 = le32_to_cpu(sgl->word2);
+	bf_set(lpfc_sli4_sge_last, sgl, 0);
+	sgl->word2 = cpu_to_le32(sgl->word2);
+	sgl->sge_len = cpu_to_le32(sizeof(struct fcp_cmnd));
+	sgl++;
+
+	/* Setup the physical region for the FCP RSP */
+	pdma_phys_fcp_rsp = pdma_phys_fcp_cmd + sizeof(struct fcp_cmnd);
+	sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_rsp));
+	sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_rsp));
+	sgl->word2 = le32_to_cpu(sgl->word2);
+	bf_set(lpfc_sli4_sge_last, sgl, 1);
+	sgl->word2 = cpu_to_le32(sgl->word2);
+	sgl->sge_len = cpu_to_le32(sizeof(struct fcp_rsp));
+
+	/*
+	 * Since the IOCB for the FCP I/O is built into this
+	 * lpfc_io_buf, initialize it with all known data now.
+	 */
+	iocb = &lpfc_cmd->cur_iocbq.iocb;
+	iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
+	iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+	/* setting the BLP size to 2 * sizeof BDE may not be correct.
+	 * We are setting the bpl to point to out sgl. An sgl's
+	 * entries are 16 bytes, a bpl entries are 12 bytes.
+	 */
+	iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
+	iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_fcp_cmd);
+	iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_fcp_cmd);
+	iocb->ulpBdeCount = 1;
+	iocb->ulpLe = 1;
+	iocb->ulpClass = CLASS3;
 
 	if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
 		atomic_inc(&ndlp->cmd_pending);
@@ -1069,10 +784,11 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
  *   NULL - Error
  *   Pointer to lpfc_scsi_buf - Success
  **/
-static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+static struct lpfc_io_buf*
+lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		  struct scsi_cmnd *cmnd)
 {
-	return  phba->lpfc_get_scsi_buf(phba, ndlp);
+	return  phba->lpfc_get_scsi_buf(phba, ndlp, cmnd);
 }
 
 /**
@@ -1084,12 +800,11 @@ lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
  * lpfc_scsi_buf_list list.
  **/
 static void
-lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 {
 	unsigned long iflag = 0;
 
 	psb->seg_cnt = 0;
-	psb->nonsg_phys = 0;
 	psb->prot_seg_cnt = 0;
 
 	spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
@@ -1104,34 +819,29 @@ lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
  * @phba: The Hba for which this call is being executed.
  * @psb: The scsi buffer which is being released.
  *
- * This routine releases @psb scsi buffer by adding it to tail of @phba
- * lpfc_scsi_buf_list list. For SLI4 XRI's are tied to the scsi buffer
+ * This routine releases @psb scsi buffer by adding it to tail of @hdwq
+ * io_buf_list list. For SLI4 XRI's are tied to the scsi buffer
  * and cannot be reused for at least RA_TOV amount of time if it was
  * aborted.
  **/
 static void
-lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 {
+	struct lpfc_sli4_hdw_queue *qp;
 	unsigned long iflag = 0;
 
 	psb->seg_cnt = 0;
-	psb->nonsg_phys = 0;
 	psb->prot_seg_cnt = 0;
 
+	qp = psb->hdwq;
 	if (psb->exch_busy) {
-		spin_lock_irqsave(&phba->sli4_hba.abts_scsi_buf_list_lock,
-					iflag);
+		spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
 		psb->pCmd = NULL;
-		list_add_tail(&psb->list,
-			&phba->sli4_hba.lpfc_abts_scsi_buf_list);
-		spin_unlock_irqrestore(&phba->sli4_hba.abts_scsi_buf_list_lock,
-					iflag);
+		list_add_tail(&psb->list, &qp->lpfc_abts_scsi_buf_list);
+		qp->abts_scsi_io_bufs++;
+		spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
 	} else {
-		psb->pCmd = NULL;
-		psb->cur_iocbq.iocb_flag = LPFC_IO_FCP;
-		spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
-		list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list_put);
-		spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+		lpfc_release_io_buf(phba, (struct lpfc_io_buf *)psb, qp);
 	}
 }
 
@@ -1144,7 +854,7 @@ lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
  * lpfc_scsi_buf_list list.
  **/
 static void
-lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 {
 	if ((psb->flags & LPFC_SBUF_BUMP_QDEPTH) && psb->ndlp)
 		atomic_dec(&psb->ndlp->cmd_pending);
@@ -1168,12 +878,12 @@ lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
  *   0 - Success
  **/
 static int
-lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct scatterlist *sgel = NULL;
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
-	struct ulp_bde64 *bpl = lpfc_cmd->fcp_bpl;
+	struct ulp_bde64 *bpl = (struct ulp_bde64 *)lpfc_cmd->dma_sgl;
 	struct lpfc_iocbq *iocbq = &lpfc_cmd->cur_iocbq;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	struct ulp_bde64 *data_bde = iocb_cmd->unsli3.fcp_ext.dbde;
@@ -1320,7 +1030,7 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		uint32_t *reftag, uint16_t *apptag, uint32_t new_guard)
 {
 	struct scatterlist *sgpe; /* s/g prot entry */
-	struct lpfc_scsi_buf *lpfc_cmd = NULL;
+	struct lpfc_io_buf *lpfc_cmd = NULL;
 	struct scsi_dif_tuple *src = NULL;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_rport_data *rdata;
@@ -1379,7 +1089,7 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 	if (sgpe) {
 		src = (struct scsi_dif_tuple *)sg_virt(sgpe);
 		src += blockoff;
-		lpfc_cmd = (struct lpfc_scsi_buf *)sc->host_scribble;
+		lpfc_cmd = (struct lpfc_io_buf *)sc->host_scribble;
 	}
 
 	/* Should we change the Reference Tag */
@@ -2684,7 +2394,7 @@ lpfc_prot_group_type(struct lpfc_hba *phba, struct scsi_cmnd *sc)
  **/
 static int
 lpfc_bg_scsi_adjust_dl(struct lpfc_hba *phba,
-		       struct lpfc_scsi_buf *lpfc_cmd)
+		       struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *sc = lpfc_cmd->pCmd;
 	int fcpdl;
@@ -2724,11 +2434,11 @@ lpfc_bg_scsi_adjust_dl(struct lpfc_hba *phba,
  **/
 static int
 lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba,
-		struct lpfc_scsi_buf *lpfc_cmd)
+		struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
-	struct ulp_bde64 *bpl = lpfc_cmd->fcp_bpl;
+	struct ulp_bde64 *bpl = (struct ulp_bde64 *)lpfc_cmd->dma_sgl;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	uint32_t num_bde = 0;
 	int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
@@ -2904,7 +2614,7 @@ lpfc_bg_csum(uint8_t *data, int count)
  * what type of T10-DIF error occurred.
  */
 static void
-lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scatterlist *sgpe; /* s/g prot entry */
 	struct scatterlist *sgde; /* s/g data entry */
@@ -3089,8 +2799,8 @@ out:
  * -1 - Internal error (bad profile, ...etc)
  */
 static int
-lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd,
-			struct lpfc_iocbq *pIocbOut)
+lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
+		  struct lpfc_iocbq *pIocbOut)
 {
 	struct scsi_cmnd *cmd = lpfc_cmd->pCmd;
 	struct sli3_bg_fields *bgf = &pIocbOut->iocb.unsli3.sli3_bg;
@@ -3256,12 +2966,12 @@ out:
  *	0 - Success
  **/
 static int
-lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct scatterlist *sgel = NULL;
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
-	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 	struct sli4_sge *first_data_sgl;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	dma_addr_t physaddr;
@@ -3388,6 +3098,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 		lpfc_cmd->cur_iocbq.priority = ((struct lpfc_device_data *)
 			scsi_cmnd->device->hostdata)->priority;
 	}
+
 	return 0;
 }
 
@@ -3402,11 +3113,11 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
  **/
 static int
 lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
-		struct lpfc_scsi_buf *lpfc_cmd)
+		struct lpfc_io_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
-	struct sli4_sge *sgl = (struct sli4_sge *)(lpfc_cmd->fcp_bpl);
+	struct sli4_sge *sgl = (struct sli4_sge *)(lpfc_cmd->dma_sgl);
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	uint32_t num_sge = 0;
 	int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
@@ -3578,7 +3289,7 @@ err:
  *	0 - Success
  **/
 static inline int
-lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	return phba->lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
 }
@@ -3597,7 +3308,7 @@ lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
  *	0 - Success
  **/
 static inline int
-lpfc_bg_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_bg_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 {
 	return phba->lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
 }
@@ -3614,7 +3325,7 @@ lpfc_bg_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
  **/
 static void
 lpfc_send_scsi_error_event(struct lpfc_hba *phba, struct lpfc_vport *vport,
-		struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb) {
+		struct lpfc_io_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb) {
 	struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
 	struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp;
 	uint32_t resp_info = fcprsp->rspStatus2;
@@ -3706,7 +3417,7 @@ lpfc_send_scsi_error_event(struct lpfc_hba *phba, struct lpfc_vport *vport,
  * field of @lpfc_cmd for device with SLI-3 interface spec.
  **/
 static void
-lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 {
 	/*
 	 * There are only two special cases to consider.  (1) the scsi command
@@ -3725,7 +3436,7 @@ lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 /**
  * lpfc_handler_fcp_err - FCP response handler
  * @vport: The virtual port for which this call is being executed.
- * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lpfc_cmd: Pointer to lpfc_io_buf data structure.
  * @rsp_iocb: The response IOCB which contains FCP error.
  *
  * This routine is called to process response IOCB with status field
@@ -3733,7 +3444,7 @@ lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
  * based upon SCSI and FCP error.
  **/
 static void
-lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd,
 		    struct lpfc_iocbq *rsp_iocb)
 {
 	struct lpfc_hba *phba = vport->phba;
@@ -3912,49 +3623,6 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 }
 
 /**
- * lpfc_sli4_scmd_to_wqidx_distr - scsi command to SLI4 WQ index distribution
- * @phba: Pointer to HBA context object.
- *
- * This routine performs a roundrobin SCSI command to SLI4 FCP WQ index
- * distribution.  This is called by __lpfc_sli_issue_iocb_s4() with the hbalock
- * held.
- * If scsi-mq is enabled, get the default block layer mapping of software queues
- * to hardware queues. This information is saved in request tag.
- *
- * Return: index into SLI4 fast-path FCP queue index.
- **/
-int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
-				  struct lpfc_scsi_buf *lpfc_cmd)
-{
-	struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
-	struct lpfc_vector_map_info *cpup;
-	int chann, cpu;
-	uint32_t tag;
-	uint16_t hwq;
-
-	if (cmnd) {
-		tag = blk_mq_unique_tag(cmnd->request);
-		hwq = blk_mq_unique_tag_to_hwq(tag);
-
-		return hwq;
-	}
-
-	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_CPU
-	    && phba->cfg_fcp_io_channel > 1) {
-		cpu = smp_processor_id();
-		if (cpu < phba->sli4_hba.num_present_cpu) {
-			cpup = phba->sli4_hba.cpu_map;
-			cpup += cpu;
-			return cpup->channel_id;
-		}
-	}
-	chann = atomic_add_return(1, &phba->fcp_qidx);
-	chann = chann % phba->cfg_fcp_io_channel;
-	return chann;
-}
-
-
-/**
  * lpfc_scsi_cmd_iocb_cmpl - Scsi cmnd IOCB completion routine
  * @phba: The Hba for which this call is being executed.
  * @pIocbIn: The command IOCBQ for the scsi cmnd.
@@ -3968,8 +3636,8 @@ static void
 lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 			struct lpfc_iocbq *pIocbOut)
 {
-	struct lpfc_scsi_buf *lpfc_cmd =
-		(struct lpfc_scsi_buf *) pIocbIn->context1;
+	struct lpfc_io_buf *lpfc_cmd =
+		(struct lpfc_io_buf *) pIocbIn->context1;
 	struct lpfc_vport      *vport = pIocbIn->vport;
 	struct lpfc_rport_data *rdata = lpfc_cmd->rdata;
 	struct lpfc_nodelist *pnode = rdata->pnode;
@@ -3977,14 +3645,35 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	unsigned long flags;
 	struct lpfc_fast_path_event *fast_path_evt;
 	struct Scsi_Host *shost;
+	int idx;
 	uint32_t logit = LOG_FCP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	int cpu;
+#endif
 
-	atomic_inc(&phba->fc4ScsiIoCmpls);
+	/* Guard against abort handler being called at same time */
+	spin_lock(&lpfc_cmd->buf_lock);
 
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
-	if (!cmd)
+	if (!cmd) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "2621 IO completion: Not an active IO\n");
+		spin_unlock(&lpfc_cmd->buf_lock);
 		return;
+	}
+
+	idx = lpfc_cmd->cur_iocbq.hba_wqidx;
+	if (phba->sli4_hba.hdwq)
+		phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT)
+			phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
+	}
+#endif
 	shost = cmd->device->host;
 
 	lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -4178,29 +3867,24 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	}
 	lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd);
 
-	/* If pCmd was set to NULL from abort path, do not call scsi_done */
-	if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) {
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
-				 "5688 FCP cmd already NULL, sid: 0x%06x, "
-				 "did: 0x%06x, oxid: 0x%04x\n",
-				 vport->fc_myDID,
-				 (pnode) ? pnode->nlp_DID : 0,
-				 phba->sli_rev == LPFC_SLI_REV4 ?
-				 lpfc_cmd->cur_iocbq.sli4_xritag : 0xffff);
-		return;
-	}
+	lpfc_cmd->pCmd = NULL;
+	spin_unlock(&lpfc_cmd->buf_lock);
 
 	/* The sdev is not guaranteed to be valid post scsi_done upcall. */
 	cmd->scsi_done(cmd);
 
 	/*
-	 * If there is a thread waiting for command completion
+	 * If there is an abort thread waiting for command completion
 	 * wake up the thread.
 	 */
-	spin_lock_irqsave(shost->host_lock, flags);
-	if (lpfc_cmd->waitq)
-		wake_up(lpfc_cmd->waitq);
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	spin_lock(&lpfc_cmd->buf_lock);
+	if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) {
+		lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
+		if (lpfc_cmd->waitq)
+			wake_up(lpfc_cmd->waitq);
+		lpfc_cmd->waitq = NULL;
+	}
+	spin_unlock(&lpfc_cmd->buf_lock);
 
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
 }
@@ -4233,7 +3917,7 @@ lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
  * to transfer for device with SLI3 interface spec.
  **/
 static void
-lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd,
 		    struct lpfc_nodelist *pnode)
 {
 	struct lpfc_hba *phba = vport->phba;
@@ -4241,7 +3925,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	struct lpfc_iocbq *piocbq = &(lpfc_cmd->cur_iocbq);
+	struct lpfc_sli4_hdw_queue *hdwq = NULL;
 	int datadir = scsi_cmnd->sc_data_direction;
+	int idx;
 	uint8_t *ptr;
 	bool sli4;
 	uint32_t fcpdl;
@@ -4267,6 +3953,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
 	piocbq->iocb.un.fcpi.fcpi_XRdy = 0;
+	idx = lpfc_cmd->hdwq_no;
+	if (phba->sli4_hba.hdwq)
+		hdwq = &phba->sli4_hba.hdwq[idx];
 
 	/*
 	 * There are three possibilities here - use scatter-gather segment, use
@@ -4288,19 +3977,22 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 						vport->cfg_first_burst_size;
 			}
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
-			atomic_inc(&phba->fc4ScsiOutputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.output_requests++;
 		} else {
 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
 			iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = READ_DATA;
-			atomic_inc(&phba->fc4ScsiInputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.input_requests++;
 		}
 	} else {
 		iocb_cmd->ulpCommand = CMD_FCP_ICMND64_CR;
 		iocb_cmd->un.fcpi.fcpi_parm = 0;
 		iocb_cmd->ulpPU = 0;
 		fcp_cmnd->fcpCntl3 = 0;
-		atomic_inc(&phba->fc4ScsiControlRequests);
+		if (hdwq)
+			hdwq->scsi_cstat.control_requests++;
 	}
 	if (phba->sli_rev == 3 &&
 	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
@@ -4328,7 +4020,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 /**
  * lpfc_scsi_prep_task_mgmt_cmd - Convert SLI3 scsi TM cmd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
- * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lpfc_cmd: Pointer to lpfc_io_buf data structure.
  * @lun: Logical unit number.
  * @task_mgmt_cmd: SCSI task management command.
  *
@@ -4341,7 +4033,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
  **/
 static int
 lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
-			     struct lpfc_scsi_buf *lpfc_cmd,
+			     struct lpfc_io_buf *lpfc_cmd,
 			     uint64_t lun,
 			     uint8_t task_mgmt_cmd)
 {
@@ -4413,14 +4105,12 @@ lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 
 	switch (dev_grp) {
 	case LPFC_PCI_DEV_LP:
-		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s3;
 		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s3;
 		phba->lpfc_bg_scsi_prep_dma_buf = lpfc_bg_scsi_prep_dma_buf_s3;
 		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s3;
 		phba->lpfc_get_scsi_buf = lpfc_get_scsi_buf_s3;
 		break;
 	case LPFC_PCI_DEV_OC:
-		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s4;
 		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s4;
 		phba->lpfc_bg_scsi_prep_dma_buf = lpfc_bg_scsi_prep_dma_buf_s4;
 		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s4;
@@ -4452,8 +4142,8 @@ lpfc_tskmgmt_def_cmpl(struct lpfc_hba *phba,
 			struct lpfc_iocbq *cmdiocbq,
 			struct lpfc_iocbq *rspiocbq)
 {
-	struct lpfc_scsi_buf *lpfc_cmd =
-		(struct lpfc_scsi_buf *) cmdiocbq->context1;
+	struct lpfc_io_buf *lpfc_cmd =
+		(struct lpfc_io_buf *) cmdiocbq->context1;
 	if (lpfc_cmd)
 		lpfc_release_scsi_buf(phba, lpfc_cmd);
 	return;
@@ -4652,9 +4342,12 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_rport_data *rdata;
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
-	int err;
+	int err, idx;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	int cpu;
+#endif
 
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
 
@@ -4718,7 +4411,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 		}
 	}
 
-	lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
+	lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp, cmnd);
 	if (lpfc_cmd == NULL) {
 		lpfc_rampdown_queue_depth(phba);
 
@@ -4735,8 +4428,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	lpfc_cmd->pCmd  = cmnd;
 	lpfc_cmd->rdata = rdata;
 	lpfc_cmd->ndlp = ndlp;
-	lpfc_cmd->timeout = 0;
-	lpfc_cmd->start_time = jiffies;
 	cmnd->host_scribble = (unsigned char *)lpfc_cmd;
 
 	if (scsi_get_prot_op(cmnd) != SCSI_PROT_NORMAL) {
@@ -4771,6 +4462,16 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 
 	lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT) {
+			struct lpfc_sli4_hdw_queue *hdwq =
+					&phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no];
+			hdwq->cpucheck_xmt_io[cpu]++;
+		}
+	}
+#endif
 	err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
 				  &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
 	if (err) {
@@ -4791,16 +4492,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 				 (uint32_t)
 				 (cmnd->request->timeout / 1000));
 
-		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
-		case WRITE_DATA:
-			atomic_dec(&phba->fc4ScsiOutputRequests);
-			break;
-		case READ_DATA:
-			atomic_dec(&phba->fc4ScsiInputRequests);
-			break;
-		default:
-			atomic_dec(&phba->fc4ScsiControlRequests);
-		}
 		goto out_host_busy_free_buf;
 	}
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
@@ -4811,10 +4502,26 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 			lpfc_poll_rearm_timer(phba);
 	}
 
+	if (phba->cfg_xri_rebalancing)
+		lpfc_keep_pvt_pool_above_lowwm(phba, lpfc_cmd->hdwq_no);
+
 	return 0;
 
  out_host_busy_free_buf:
+	idx = lpfc_cmd->hdwq_no;
 	lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd);
+	if (phba->sli4_hba.hdwq) {
+		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
+		case WRITE_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.output_requests--;
+			break;
+		case READ_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.input_requests--;
+			break;
+		default:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.control_requests--;
+		}
+	}
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
  out_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -4846,7 +4553,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_iocbq *iocb;
 	struct lpfc_iocbq *abtsiocb;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	IOCB_t *cmd, *icmd;
 	int ret = SUCCESS, status = 0;
 	struct lpfc_sli_ring *pring_s4 = NULL;
@@ -4858,65 +4565,59 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	if (status != 0 && status != SUCCESS)
 		return status;
 
+	lpfc_cmd = (struct lpfc_io_buf *)cmnd->host_scribble;
+	if (!lpfc_cmd)
+		return ret;
+
 	spin_lock_irqsave(&phba->hbalock, flags);
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			"3168 SCSI Layer abort requested I/O has been "
 			"flushed by LLD.\n");
-		return FAILED;
+		ret = FAILED;
+		goto out_unlock;
 	}
 
-	lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
-	if (!lpfc_cmd || !lpfc_cmd->pCmd) {
-		spin_unlock_irqrestore(&phba->hbalock, flags);
+	/* Guard against IO completion being called at same time */
+	spin_lock(&lpfc_cmd->buf_lock);
+
+	if (!lpfc_cmd->pCmd) {
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			 "2873 SCSI Layer I/O Abort Request IO CMPL Status "
 			 "x%x ID %d LUN %llu\n",
 			 SUCCESS, cmnd->device->id, cmnd->device->lun);
-		return SUCCESS;
+		goto out_unlock_buf;
 	}
 
 	iocb = &lpfc_cmd->cur_iocbq;
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		if (!(phba->cfg_fof) ||
-		    (!(iocb->iocb_flag & LPFC_IO_FOF))) {
-			pring_s4 =
-				phba->sli4_hba.fcp_wq[iocb->hba_wqidx]->pring;
-		} else {
-			iocb->hba_wqidx = 0;
-			pring_s4 = phba->sli4_hba.oas_wq->pring;
-		}
+		pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].fcp_wq->pring;
 		if (!pring_s4) {
 			ret = FAILED;
-			goto out_unlock;
+			goto out_unlock_buf;
 		}
 		spin_lock(&pring_s4->ring_lock);
 	}
 	/* the command is in process of being cancelled */
 	if (!(iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			spin_unlock(&pring_s4->ring_lock);
-		spin_unlock_irqrestore(&phba->hbalock, flags);
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			"3169 SCSI Layer abort requested I/O has been "
 			"cancelled by LLD.\n");
-		return FAILED;
+		ret = FAILED;
+		goto out_unlock_ring;
 	}
 	/*
-	 * If pCmd field of the corresponding lpfc_scsi_buf structure
+	 * If pCmd field of the corresponding lpfc_io_buf structure
 	 * points to a different SCSI command, then the driver has
 	 * already completed this command, but the midlayer did not
 	 * see the completion before the eh fired. Just return SUCCESS.
 	 */
 	if (lpfc_cmd->pCmd != cmnd) {
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			spin_unlock(&pring_s4->ring_lock);
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			"3170 SCSI Layer abort requested I/O has been "
 			"completed by LLD.\n");
-		goto out_unlock;
+		goto out_unlock_ring;
 	}
 
 	BUG_ON(iocb->context1 != lpfc_cmd);
@@ -4927,6 +4628,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 			 "3389 SCSI Layer I/O Abort Request is pending\n");
 		if (phba->sli_rev == LPFC_SLI_REV4)
 			spin_unlock(&pring_s4->ring_lock);
+		spin_unlock(&lpfc_cmd->buf_lock);
 		spin_unlock_irqrestore(&phba->hbalock, flags);
 		goto wait_for_cmpl;
 	}
@@ -4934,9 +4636,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	abtsiocb = __lpfc_sli_get_iocbq(phba);
 	if (abtsiocb == NULL) {
 		ret = FAILED;
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			spin_unlock(&pring_s4->ring_lock);
-		goto out_unlock;
+		goto out_unlock_ring;
 	}
 
 	/* Indicate the IO is being aborted by the driver. */
@@ -4986,24 +4686,18 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	/* no longer need the lock after this point */
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
-
 	if (ret_val == IOCB_ERROR) {
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			spin_lock_irqsave(&pring_s4->ring_lock, flags);
-		else
-			spin_lock_irqsave(&phba->hbalock, flags);
 		/* Indicate the IO is not being aborted by the driver. */
 		iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED;
 		lpfc_cmd->waitq = NULL;
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			spin_unlock_irqrestore(&pring_s4->ring_lock, flags);
-		else
-			spin_unlock_irqrestore(&phba->hbalock, flags);
+		spin_unlock(&lpfc_cmd->buf_lock);
 		lpfc_sli_release_iocbq(phba, abtsiocb);
 		ret = FAILED;
 		goto out;
 	}
 
+	spin_unlock(&lpfc_cmd->buf_lock);
+
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 		lpfc_sli_handle_fast_ring_event(phba,
 			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
@@ -5014,9 +4708,7 @@ wait_for_cmpl:
 			  (lpfc_cmd->pCmd != cmnd),
 			   msecs_to_jiffies(2*vport->cfg_devloss_tmo*1000));
 
-	spin_lock_irqsave(shost->host_lock, flags);
-	lpfc_cmd->waitq = NULL;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	spin_lock(&lpfc_cmd->buf_lock);
 
 	if (lpfc_cmd->pCmd == cmnd) {
 		ret = FAILED;
@@ -5027,8 +4719,14 @@ wait_for_cmpl:
 				 iocb->sli4_xritag, ret,
 				 cmnd->device->id, cmnd->device->lun);
 	}
+	spin_unlock(&lpfc_cmd->buf_lock);
 	goto out;
 
+out_unlock_ring:
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		spin_unlock(&pring_s4->ring_lock);
+out_unlock_buf:
+	spin_unlock(&lpfc_cmd->buf_lock);
 out_unlock:
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 out:
@@ -5066,7 +4764,7 @@ lpfc_taskmgmt_name(uint8_t task_mgmt_cmd)
 /**
  * lpfc_check_fcp_rsp - check the returned fcp_rsp to see if task failed
  * @vport: The virtual port for which this call is being executed.
- * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lpfc_cmd: Pointer to lpfc_io_buf data structure.
  *
  * This routine checks the FCP RSP INFO to see if the tsk mgmt command succeded
  *
@@ -5075,7 +4773,7 @@ lpfc_taskmgmt_name(uint8_t task_mgmt_cmd)
  *   0x2002 - Success
  **/
 static int
-lpfc_check_fcp_rsp(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_check_fcp_rsp(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd)
 {
 	struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp;
 	uint32_t rsp_info;
@@ -5150,7 +4848,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd,
 		   uint8_t task_mgmt_cmd)
 {
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct lpfc_iocbq *iocbq;
 	struct lpfc_iocbq *iocbqrsp;
 	struct lpfc_rport_data *rdata;
@@ -5163,7 +4861,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd,
 		return FAILED;
 	pnode = rdata->pnode;
 
-	lpfc_cmd = lpfc_get_scsi_buf(phba, pnode);
+	lpfc_cmd = lpfc_get_scsi_buf(phba, pnode, NULL);
 	if (lpfc_cmd == NULL)
 		return FAILED;
 	lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo;
@@ -5671,6 +5369,12 @@ lpfc_slave_alloc(struct scsi_device *sdev)
 	}
 	sdev_cnt = atomic_inc_return(&phba->sdev_cnt);
 
+	/* For SLI4, all IO buffers are pre-allocated */
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		return 0;
+
+	/* This code path is now ONLY for SLI3 adapters */
+
 	/*
 	 * Populate the cmds_per_lun count scsi_bufs into this host's globally
 	 * available list of scsi buffers.  Don't allocate more than the
@@ -5702,7 +5406,7 @@ lpfc_slave_alloc(struct scsi_device *sdev)
 				 (phba->cfg_hba_queue_depth - total));
 		num_to_alloc = phba->cfg_hba_queue_depth - total;
 	}
-	num_allocated = lpfc_new_scsi_buf(vport, num_to_alloc);
+	num_allocated = lpfc_new_scsi_buf_s3(vport, num_to_alloc);
 	if (num_to_alloc != num_allocated) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
 					 "0708 Allocation request of %d "
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index b759b089432c..f76667b7da7b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -130,62 +130,6 @@ struct lpfc_scsicmd_bkt {
 	uint32_t cmd_count;
 };
 
-struct lpfc_scsi_buf {
-	struct list_head list;
-	struct scsi_cmnd *pCmd;
-	struct lpfc_rport_data *rdata;
-	struct lpfc_nodelist *ndlp;
-
-	uint32_t timeout;
-
-	uint16_t flags;  /* TBD convert exch_busy to flags */
-#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
-#define LPFC_SBUF_BUMP_QDEPTH	0x8	/* bumped queue depth counter */
-	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
-	uint16_t status;	/* From IOCB Word 7- ulpStatus */
-	uint32_t result;	/* From IOCB Word 4. */
-
-	uint32_t   seg_cnt;	/* Number of scatter-gather segments returned by
-				 * dma_map_sg.  The driver needs this for calls
-				 * to dma_unmap_sg. */
-	uint32_t prot_seg_cnt;  /* seg_cnt's counterpart for protection data */
-
-	dma_addr_t nonsg_phys;	/* Non scatter-gather physical address. */
-
-	/*
-	 * data and dma_handle are the kernel virtual and bus address of the
-	 * dma-able buffer containing the fcp_cmd, fcp_rsp and a scatter
-	 * gather bde list that supports the sg_tablesize value.
-	 */
-	void *data;
-	dma_addr_t dma_handle;
-
-	struct fcp_cmnd *fcp_cmnd;
-	struct fcp_rsp *fcp_rsp;
-	struct ulp_bde64 *fcp_bpl;
-
-	dma_addr_t dma_phys_bpl;
-
-	/* cur_iocbq has phys of the dma-able buffer.
-	 * Iotag is in here
-	 */
-	struct lpfc_iocbq cur_iocbq;
-	uint16_t cpu;
-
-	wait_queue_head_t *waitq;
-	unsigned long start_time;
-
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-	/* Used to restore any changes to protection data for error injection */
-	void *prot_data_segment;
-	uint32_t prot_data;
-	uint32_t prot_data_type;
-#define	LPFC_INJERR_REFTAG	1
-#define	LPFC_INJERR_APPTAG	2
-#define	LPFC_INJERR_GUARD	3
-#endif
-};
-
 #define LPFC_SCSI_DMA_EXT_SIZE	264
 #define LPFC_BPL_SIZE		1024
 #define MDAC_DIRECT_CMD		0x22
@@ -200,5 +144,6 @@ struct lpfc_scsi_buf {
 
 #define TXRDY_PAYLOAD_LEN	12
 
-int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
-				  struct lpfc_scsi_buf *lpfc_cmd);
+/* For sysfs/debugfs tmp string max len */
+#define LPFC_MAX_SCSI_INFO_TMP_LEN	79
+
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2242e9b3ca12..d0817facdae3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -78,12 +78,13 @@ static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
 static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
 					  struct hbq_dmabuf *dmabuf);
-static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
-				    struct lpfc_cqe *);
+static bool lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba,
+				   struct lpfc_queue *cq, struct lpfc_cqe *cqe);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
 				       int);
 static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
-				     struct lpfc_eqe *eqe, uint32_t qidx);
+				     struct lpfc_queue *eq,
+				     struct lpfc_eqe *eqe);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
 static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -160,7 +161,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
 	}
 	q->WQ_posted++;
 	/* set consumption flag every once in a while */
-	if (!((q->host_index + 1) % q->entry_repost))
+	if (!((q->host_index + 1) % q->notify_interval))
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 1);
 	else
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 0);
@@ -325,29 +326,16 @@ lpfc_sli4_mq_release(struct lpfc_queue *q)
 static struct lpfc_eqe *
 lpfc_sli4_eq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_eqe *eqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	eqe = q->qe[q->hba_index].eqe;
+	eqe = q->qe[q->host_index].eqe;
 
 	/* If the next EQE is not valid then we are done */
 	if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.eqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
-
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -397,44 +385,25 @@ lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
@@ -442,7 +411,7 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
 		bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
 	}
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
 	bf_set(lpfc_eqcq_doorbell_eqid_hi, &doorbell,
 			(q->queue_id >> LPFC_EQID_HI_FIELD_SHIFT));
@@ -451,60 +420,112 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_if6_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			  uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_eq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_eq_doorbell_eqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.EQDBregaddr);
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
+}
+
+static void
+__lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			struct lpfc_eqe *eqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.eqav)
+		bf_set_le32(lpfc_eqe_valid, eqe, 0);
+
+	eq->host_index = ((eq->host_index + 1) % eq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.eqav && !eq->host_index)
+		eq->qe_valid = (eq->qe_valid) ? 0 : 1;
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	uint32_t count = 0;
+
+	/* walk all the EQ entries and drop on the floor */
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+		count++;
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+
+	/* Clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM);
+}
+
+static int
+lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	int count = 0, consumed = 0;
+
+	if (cmpxchg(&eq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
+
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		lpfc_sli4_hba_handle_eqe(phba, eq, eqe);
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+
+		consumed++;
+		if (!(++count % eq->max_proc_limit))
+			break;
+
+		if (!(count % eq->notify_interval)) {
+			phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed,
+							LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+	eq->EQ_processed += count;
+
+	/* Track the max number of EQEs processed in 1 intr */
+	if (count > eq->EQ_max_eqe)
+		eq->EQ_max_eqe = count;
+
+	eq->queue_claimed = 0;
+
+rearm_and_exit:
+	/* Always clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM);
+
+	return count;
 }
 
 /**
@@ -519,28 +540,16 @@ lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
 static struct lpfc_cqe *
 lpfc_sli4_cq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_cqe *cqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	cqe = q->qe[q->hba_index].cqe;
+	cqe = q->qe[q->host_index].cqe;
 
 	/* If the next CQE is not valid then we are done */
 	if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.cqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -554,107 +563,81 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
 	return cqe;
 }
 
+static void
+__lpfc_sli4_consume_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			struct lpfc_cqe *cqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.cqav)
+		bf_set_le32(lpfc_cqe_valid, cqe, 0);
+
+	cq->host_index = ((cq->host_index + 1) % cq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.cqav && !cq->host_index)
+		cq->qe_valid = (cq->qe_valid) ? 0 : 1;
+}
+
 /**
- * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
 	bf_set(lpfc_eqcq_doorbell_cqid_hi, &doorbell,
 			(q->queue_id >> LPFC_CQID_HI_FIELD_SHIFT));
 	bf_set(lpfc_eqcq_doorbell_cqid_lo, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_if6_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			 uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_cq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_cq_doorbell_cqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
@@ -703,15 +686,15 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
-	if (!(hq->host_index % hq->entry_repost)) {
+	if (!(hq->host_index % hq->notify_interval)) {
 		doorbell.word0 = 0;
 		if (hq->db_format == LPFC_DB_RING_FORMAT) {
 			bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
 		} else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
 			bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_list_fm_index, &doorbell,
 			       hq->host_index);
 			bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
@@ -1025,7 +1008,7 @@ lpfc_test_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 	if (!ndlp->active_rrqs_xri_bitmap)
 		return 0;
 	if (test_bit(xritag, ndlp->active_rrqs_xri_bitmap))
-			return 1;
+		return 1;
 	else
 		return 0;
 }
@@ -1133,14 +1116,14 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	struct list_head *lpfc_els_sgl_list = &phba->sli4_hba.lpfc_els_sgl_list;
 	struct lpfc_sglq *sglq = NULL;
 	struct lpfc_sglq *start_sglq = NULL;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct lpfc_nodelist *ndlp;
 	int found = 0;
 
 	lockdep_assert_held(&phba->hbalock);
 
 	if (piocbq->iocb_flag &  LPFC_IO_FCP) {
-		lpfc_cmd = (struct lpfc_scsi_buf *) piocbq->context1;
+		lpfc_cmd = (struct lpfc_io_buf *) piocbq->context1;
 		ndlp = lpfc_cmd->rdata->pnode;
 	} else  if ((piocbq->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) &&
 			!(piocbq->iocb_flag & LPFC_IO_LIBDFC)) {
@@ -1596,6 +1579,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	list_add_tail(&piocb->list, &pring->txcmplq);
 	piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ;
+	pring->txcmplq_cnt++;
 
 	if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
 	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
@@ -3008,6 +2992,7 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
 			/* remove from txcmpl queue list */
 			list_del_init(&cmd_iocb->list);
 			cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+			pring->txcmplq_cnt--;
 			return cmd_iocb;
 		}
 	}
@@ -3045,6 +3030,7 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 			/* remove from txcmpl queue list */
 			list_del_init(&cmd_iocb->list);
 			cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+			pring->txcmplq_cnt--;
 			return cmd_iocb;
 		}
 	}
@@ -3981,8 +3967,8 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
 
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
-		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = phba->sli4_hba.fcp_wq[i]->pring;
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 			lpfc_sli_abort_iocb_ring(phba, pring);
 		}
 	} else {
@@ -4006,12 +3992,13 @@ lpfc_sli_abort_nvme_rings(struct lpfc_hba *phba)
 	struct lpfc_sli_ring  *pring;
 	uint32_t i;
 
-	if (phba->sli_rev < LPFC_SLI_REV4)
+	if ((phba->sli_rev < LPFC_SLI_REV4) ||
+	    !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
 
 	/* Abort all IO on each NVME ring. */
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 		lpfc_sli_abort_wqe_ring(phba, pring);
 	}
 }
@@ -4044,8 +4031,8 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
-		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = phba->sli4_hba.fcp_wq[i]->pring;
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 
 			spin_lock_irq(&pring->ring_lock);
 			/* Retrieve everything on txq */
@@ -4110,7 +4097,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
 	uint32_t i;
 	struct lpfc_iocbq *piocb, *next_iocb;
 
-	if (phba->sli_rev < LPFC_SLI_REV4)
+	if ((phba->sli_rev < LPFC_SLI_REV4) ||
+	    !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
 
 	/* Hint to other driver operations that a flush is in progress. */
@@ -4122,8 +4110,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
 	 * a local driver reason code.  This is a flush so no
 	 * abort exchange to FW.
 	 */
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 
 		spin_lock_irq(&pring->ring_lock);
 		list_for_each_entry_safe(piocb, next_iocb,
@@ -5564,41 +5552,35 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
 	int qidx;
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
+	struct lpfc_sli4_hdw_queue *qp;
 
-	sli4_hba->sli4_cq_release(sli4_hba->mbx_cq, LPFC_QUEUE_REARM);
-	sli4_hba->sli4_cq_release(sli4_hba->els_cq, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
 	if (sli4_hba->nvmels_cq)
-		sli4_hba->sli4_cq_release(sli4_hba->nvmels_cq,
-						LPFC_QUEUE_REARM);
+		sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
+					   LPFC_QUEUE_REARM);
 
-	if (sli4_hba->fcp_cq)
-		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
-			sli4_hba->sli4_cq_release(sli4_hba->fcp_cq[qidx],
-						LPFC_QUEUE_REARM);
-
-	if (sli4_hba->nvme_cq)
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
-			sli4_hba->sli4_cq_release(sli4_hba->nvme_cq[qidx],
-						LPFC_QUEUE_REARM);
-
-	if (phba->cfg_fof)
-		sli4_hba->sli4_cq_release(sli4_hba->oas_cq, LPFC_QUEUE_REARM);
+	qp = sli4_hba->hdwq;
+	if (sli4_hba->hdwq) {
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+						   LPFC_QUEUE_REARM);
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+						   LPFC_QUEUE_REARM);
+		}
 
-	if (sli4_hba->hba_eq)
-		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
-			sli4_hba->sli4_eq_release(sli4_hba->hba_eq[qidx],
-							LPFC_QUEUE_REARM);
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
+			sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
+						0, LPFC_QUEUE_REARM);
+	}
 
 	if (phba->nvmet_support) {
 		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
-			sli4_hba->sli4_cq_release(
-				sli4_hba->nvmet_cqset[qidx],
+			sli4_hba->sli4_write_cq_db(phba,
+				sli4_hba->nvmet_cqset[qidx], 0,
 				LPFC_QUEUE_REARM);
 		}
 	}
-
-	if (phba->cfg_fof)
-		sli4_hba->sli4_eq_release(sli4_hba->fof_eq, LPFC_QUEUE_REARM);
 }
 
 /**
@@ -6027,11 +6009,8 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 		list_add_tail(&rsrc_blks->list, ext_blk_list);
 		rsrc_start = rsrc_id;
 		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0)) {
-			phba->sli4_hba.scsi_xri_start = rsrc_start +
+			phba->sli4_hba.io_xri_start = rsrc_start +
 				lpfc_sli4_get_iocb_cnt(phba);
-			phba->sli4_hba.nvme_xri_start =
-				phba->sli4_hba.scsi_xri_start +
-				phba->sli4_hba.scsi_xri_max;
 		}
 
 		while (rsrc_id < (rsrc_start + rsrc_size)) {
@@ -7056,6 +7035,38 @@ lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
 	return total_cnt;
 }
 
+/**
+ * lpfc_sli4_repost_io_sgl_list - Repost all the allocated nvme buffer sgls
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of nvme buffers that have been allocated and
+ * repost them to the port by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
+ * to the lpfc_io_buf_list. If the repost fails, reject all nvme buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_sli4_repost_io_sgl_list(struct lpfc_hba *phba)
+{
+	LIST_HEAD(post_nblist);
+	int num_posted, rc = 0;
+
+	/* get all NVME buffers need to repost to a local list */
+	lpfc_io_buf_flush(phba, &post_nblist);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist)) {
+		num_posted = lpfc_sli4_post_io_sgl_list(
+			phba, &post_nblist, phba->sli4_hba.io_xri_cnt);
+		/* failed to post any nvme buffer, return error */
+		if (num_posted == 0)
+			rc = -EIO;
+	}
+	return rc;
+}
+
 void
 lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 {
@@ -7144,7 +7155,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i, cnt;
+	int rc, i, cnt, len;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -7517,24 +7528,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		/* We need 1 iocbq for every SGL, for IO processing */
 		cnt += phba->sli4_hba.nvmet_xri_cnt;
 	} else {
-		/* update host scsi xri-sgl sizes and mappings */
-		rc = lpfc_sli4_scsi_sgl_update(phba);
+		/* update host common xri-sgl sizes and mappings */
+		rc = lpfc_sli4_io_sgl_update(phba);
 		if (unlikely(rc)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6309 Failed to update scsi-sgl size "
+					"6082 Failed to update nvme-sgl size "
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
 
-		/* update host nvme xri-sgl sizes and mappings */
-		rc = lpfc_sli4_nvme_sgl_update(phba);
+		/* register the allocated common sgl pool to the port */
+		rc = lpfc_sli4_repost_io_sgl_list(phba);
 		if (unlikely(rc)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6082 Failed to update nvme-sgl size "
-					"and mapping: %d\n", rc);
+					"6116 Error %d during nvme sgl post "
+					"operation\n", rc);
+			/* Some NVME buffers were moved to abort nvme list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
 			goto out_destroy_queue;
 		}
-
 		cnt = phba->cfg_iocb_cnt * 1024;
 	}
 
@@ -7571,36 +7584,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
-		/* register the allocated scsi sgl pool to the port */
-		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
-		if (unlikely(rc)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"0383 Error %d during scsi sgl post "
-					"operation\n", rc);
-			/* Some Scsi buffers were moved to abort scsi list */
-			/* A pci function reset will repost them */
-			rc = -ENODEV;
-			goto out_destroy_queue;
-		}
-	}
-
-	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-	    (phba->nvmet_support == 0)) {
-
-		/* register the allocated nvme sgl pool to the port */
-		rc = lpfc_repost_nvme_sgl_list(phba);
-		if (unlikely(rc)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6116 Error %d during nvme sgl post "
-					"operation\n", rc);
-			/* Some NVME buffers were moved to abort nvme list */
-			/* A pci function reset will repost them */
-			rc = -ENODEV;
-			goto out_destroy_queue;
-		}
-	}
-
 	/* Post the rpi header region to the device. */
 	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
 	if (unlikely(rc)) {
@@ -7650,6 +7633,25 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		lpfc_sli_read_link_ste(phba);
 	}
 
+	/* Don't post more new bufs if repost already recovered
+	 * the nvme sgls.
+	 */
+	if (phba->nvmet_support == 0) {
+		if (phba->sli4_hba.io_xri_cnt == 0) {
+			len = lpfc_new_io_buf(
+					      phba, phba->sli4_hba.io_xri_max);
+			if (len == 0) {
+				rc = -ENOMEM;
+				goto out_unset_queue;
+			}
+
+			if (phba->cfg_xri_rebalancing)
+				lpfc_create_multixri_pools(phba);
+		}
+	} else {
+		phba->cfg_xri_rebalancing = 0;
+	}
+
 	/* Arm the CQs and then EQs on device */
 	lpfc_sli4_arm_cqeq_intr(phba);
 
@@ -7678,6 +7680,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	phba->hb_outstanding = 0;
 	phba->last_completion_time = jiffies;
 
+	/* start eq_delay heartbeat */
+	if (phba->cfg_auto_imax)
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
 	/* Start error attention (ERATT) polling timer */
 	mod_timer(&phba->eratt_poll,
 		  jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval));
@@ -7729,18 +7736,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_SLI,
 					"3104 Adapter failed to issue "
 					"DOWN_LINK mbox cmd, rc:x%x\n", rc);
-			goto out_unset_queue;
+			goto out_io_buff_free;
 		}
 	} else if (phba->cfg_suppress_link_up == LPFC_INITIALIZE_LINK) {
 		/* don't perform init_link on SLI4 FC port loopback test */
 		if (!(phba->link_flag & LS_LOOPBACK_MODE)) {
 			rc = phba->lpfc_hba_init_link(phba, MBX_NOWAIT);
 			if (rc)
-				goto out_unset_queue;
+				goto out_io_buff_free;
 		}
 	}
 	mempool_free(mboxq, phba->mbox_mem_pool);
 	return rc;
+out_io_buff_free:
+	/* Free allocated IO Buffers */
+	lpfc_io_free(phba);
 out_unset_queue:
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
@@ -7846,7 +7856,6 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	uint32_t eqidx;
 	struct lpfc_queue *fpeq = NULL;
-	struct lpfc_eqe *eqe;
 	bool mbox_pending;
 
 	if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
@@ -7854,11 +7863,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 
 	/* Find the eq associated with the mcq */
 
-	if (sli4_hba->hba_eq)
-		for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++)
-			if (sli4_hba->hba_eq[eqidx]->queue_id ==
+	if (sli4_hba->hdwq)
+		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
+			if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
 			    sli4_hba->mbx_cq->assoc_qid) {
-				fpeq = sli4_hba->hba_eq[eqidx];
+				fpeq = sli4_hba->hdwq[eqidx].hba_eq;
 				break;
 			}
 	if (!fpeq)
@@ -7880,14 +7889,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	 */
 
 	if (mbox_pending)
-		while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-			lpfc_sli4_hba_handle_eqe(phba, eqe, eqidx);
-			fpeq->EQ_processed++;
-		}
-
-	/* Always clear and re-arm the EQ */
-
-	sli4_hba->sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+		/* process and rearm the EQ */
+		lpfc_sli4_process_eq(phba, fpeq);
+	else
+		/* Always clear and re-arm the EQ */
+		sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM);
 
 	return mbox_pending;
 
@@ -8557,7 +8563,6 @@ lpfc_sli4_post_sync_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	rc = lpfc_sli4_wait_bmbx_ready(phba, mboxq);
 	if (rc)
 		goto exit;
-
 	/*
 	 * Initialize the bootstrap memory region to avoid stale data areas
 	 * in the mailbox post.  Then copy the caller's mailbox contents to
@@ -9476,7 +9481,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 0);
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9484,7 +9489,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9540,7 +9545,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 0);
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9548,7 +9553,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9597,7 +9602,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		/* Note, word 10 is already initialized to 0 */
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9605,7 +9610,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9864,10 +9869,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	/* Get the WQ */
 	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
 	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS)))
-			wq = phba->sli4_hba.fcp_wq[piocb->hba_wqidx];
-		else
-			wq = phba->sli4_hba.oas_wq;
+		wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq;
 	} else {
 		wq = phba->sli4_hba.els_wq;
 	}
@@ -10001,29 +10003,20 @@ lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 struct lpfc_sli_ring *
 lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 {
+	struct lpfc_io_buf *lpfc_cmd;
+
 	if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
-		if (!(phba->cfg_fof) ||
-		    (!(piocb->iocb_flag & LPFC_IO_FOF))) {
-			if (unlikely(!phba->sli4_hba.fcp_wq))
-				return NULL;
-			/*
-			 * for abort iocb hba_wqidx should already
-			 * be setup based on what work queue we used.
-			 */
-			if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-				piocb->hba_wqidx =
-					lpfc_sli4_scmd_to_wqidx_distr(phba,
-							      piocb->context1);
-				piocb->hba_wqidx = piocb->hba_wqidx %
-					phba->cfg_fcp_io_channel;
-			}
-			return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
-		} else {
-			if (unlikely(!phba->sli4_hba.oas_wq))
-				return NULL;
-			piocb->hba_wqidx = 0;
-			return phba->sli4_hba.oas_wq->pring;
+		if (unlikely(!phba->sli4_hba.hdwq))
+			return NULL;
+		/*
+		 * for abort iocb hba_wqidx should already
+		 * be setup based on what work queue we used.
+		 */
+		if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
+			lpfc_cmd = (struct lpfc_io_buf *)piocb->context1;
+			piocb->hba_wqidx = lpfc_cmd->hdwq_no;
 		}
+		return phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq->pring;
 	} else {
 		if (unlikely(!phba->sli4_hba.els_wq))
 			return NULL;
@@ -10049,12 +10042,9 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_sli_ring *pring;
-	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflags;
-	int rc, idx;
+	int rc;
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10064,34 +10054,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
-		if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-			idx = piocb->hba_wqidx;
-			hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
-			if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
-				/* Get associated EQ with this index */
-				fpeq = phba->sli4_hba.hba_eq[idx];
-
-				/* Turn off interrupts from this EQ */
-				phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
-				/*
-				 * Process all the events on FCP EQ
-				 */
-				while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-					lpfc_sli4_hba_handle_eqe(phba,
-						eqe, idx);
-					fpeq->EQ_processed++;
-				}
-
-				/* Always clear and re-arm the EQ */
-				phba->sli4_hba.sli4_eq_release(fpeq,
-					LPFC_QUEUE_REARM);
-			}
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-		}
 	} else {
 		/* For now, SLI2/3 will still use hbalock */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -10506,19 +10468,11 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&psli->mboxq);
 	INIT_LIST_HEAD(&psli->mboxq_cmpl);
 	/* Initialize list headers for txq and txcmplq as double linked lists */
-	for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-		pring = phba->sli4_hba.fcp_wq[i]->pring;
-		pring->flag = 0;
-		pring->ringno = LPFC_FCP_RING;
-		INIT_LIST_HEAD(&pring->txq);
-		INIT_LIST_HEAD(&pring->txcmplq);
-		INIT_LIST_HEAD(&pring->iocb_continueq);
-		spin_lock_init(&pring->ring_lock);
-	}
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 		pring->flag = 0;
 		pring->ringno = LPFC_FCP_RING;
+		pring->txcmplq_cnt = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -10527,25 +10481,27 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
 	pring = phba->sli4_hba.els_wq->pring;
 	pring->flag = 0;
 	pring->ringno = LPFC_ELS_RING;
+	pring->txcmplq_cnt = 0;
 	INIT_LIST_HEAD(&pring->txq);
 	INIT_LIST_HEAD(&pring->txcmplq);
 	INIT_LIST_HEAD(&pring->iocb_continueq);
 	spin_lock_init(&pring->ring_lock);
 
-	if (phba->cfg_nvme_io_channel) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+			pring->flag = 0;
+			pring->ringno = LPFC_FCP_RING;
+			pring->txcmplq_cnt = 0;
+			INIT_LIST_HEAD(&pring->txq);
+			INIT_LIST_HEAD(&pring->txcmplq);
+			INIT_LIST_HEAD(&pring->iocb_continueq);
+			spin_lock_init(&pring->ring_lock);
+		}
 		pring = phba->sli4_hba.nvmels_wq->pring;
 		pring->flag = 0;
 		pring->ringno = LPFC_ELS_RING;
-		INIT_LIST_HEAD(&pring->txq);
-		INIT_LIST_HEAD(&pring->txcmplq);
-		INIT_LIST_HEAD(&pring->iocb_continueq);
-		spin_lock_init(&pring->ring_lock);
-	}
-
-	if (phba->cfg_fof) {
-		pring = phba->sli4_hba.oas_wq->pring;
-		pring->flag = 0;
-		pring->ringno = LPFC_FCP_RING;
+		pring->txcmplq_cnt = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -11327,6 +11283,7 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	struct lpfc_iocbq *abtsiocbp;
 	union lpfc_wqe128 *abts_wqe;
 	int retval;
+	int idx = cmdiocb->hba_wqidx;
 
 	/*
 	 * There are certain command types we don't want to abort.  And we
@@ -11382,7 +11339,8 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
 	abtsiocbp->vport = vport;
 	abtsiocbp->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
-	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
+	retval = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[idx],
+				     abtsiocbp);
 	if (retval) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
 				 "6147 Failed abts issue_wqe with status x%x "
@@ -11457,7 +11415,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
 			   uint16_t tgt_id, uint64_t lun_id,
 			   lpfc_ctx_cmd ctx_cmd)
 {
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	int rc = 1;
 
 	if (iocbq->vport != vport)
@@ -11467,7 +11425,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
 	    !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
 		return rc;
 
-	lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
+	lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
 
 	if (lpfc_cmd->pCmd == NULL)
 		return rc;
@@ -11694,14 +11652,14 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 			uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd)
 {
 	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct lpfc_iocbq *abtsiocbq;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_iocbq *iocbq;
 	IOCB_t *icmd;
 	int sum, i, ret_val;
 	unsigned long iflags;
-	struct lpfc_sli_ring *pring_s4;
+	struct lpfc_sli_ring *pring_s4 = NULL;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 
@@ -11719,17 +11677,46 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 					       cmd) != 0)
 			continue;
 
+		/* Guard against IO completion being called at same time */
+		lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+		spin_lock(&lpfc_cmd->buf_lock);
+
+		if (!lpfc_cmd->pCmd) {
+			spin_unlock(&lpfc_cmd->buf_lock);
+			continue;
+		}
+
+		if (phba->sli_rev == LPFC_SLI_REV4) {
+			pring_s4 =
+			    phba->sli4_hba.hdwq[iocbq->hba_wqidx].fcp_wq->pring;
+			if (!pring_s4) {
+				spin_unlock(&lpfc_cmd->buf_lock);
+				continue;
+			}
+			/* Note: both hbalock and ring_lock must be set here */
+			spin_lock(&pring_s4->ring_lock);
+		}
+
 		/*
 		 * If the iocbq is already being aborted, don't take a second
 		 * action, but do count it.
 		 */
-		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+		if ((iocbq->iocb_flag & LPFC_DRIVER_ABORTED) ||
+		    !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				spin_unlock(&pring_s4->ring_lock);
+			spin_unlock(&lpfc_cmd->buf_lock);
 			continue;
+		}
 
 		/* issue ABTS for this IOCB based on iotag */
 		abtsiocbq = __lpfc_sli_get_iocbq(phba);
-		if (abtsiocbq == NULL)
+		if (!abtsiocbq) {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				spin_unlock(&pring_s4->ring_lock);
+			spin_unlock(&lpfc_cmd->buf_lock);
 			continue;
+		}
 
 		icmd = &iocbq->iocb;
 		abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
@@ -11750,7 +11737,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
 			abtsiocbq->iocb_flag |= LPFC_IO_FOF;
 
-		lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
 		ndlp = lpfc_cmd->rdata->pnode;
 
 		if (lpfc_is_link_up(phba) &&
@@ -11769,11 +11755,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
 
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			pring_s4 = lpfc_sli4_calc_ring(phba, abtsiocbq);
-			if (!pring_s4)
-				continue;
-			/* Note: both hbalock and ring_lock must be set here */
-			spin_lock(&pring_s4->ring_lock);
 			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
 							abtsiocbq, 0);
 			spin_unlock(&pring_s4->ring_lock);
@@ -11782,6 +11763,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 							abtsiocbq, 0);
 		}
 
+		spin_unlock(&lpfc_cmd->buf_lock);
 
 		if (ret_val == IOCB_ERROR)
 			__lpfc_sli_release_iocbq(phba, abtsiocbq);
@@ -11816,7 +11798,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
 {
 	wait_queue_head_t *pdone_q;
 	unsigned long iflags;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	if (cmdiocbq->iocb_flag & LPFC_IO_WAKE_TMO) {
@@ -11845,7 +11827,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
 	/* Set the exchange busy flag for task management commands */
 	if ((cmdiocbq->iocb_flag & LPFC_IO_FCP) &&
 		!(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) {
-		lpfc_cmd = container_of(cmdiocbq, struct lpfc_scsi_buf,
+		lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,
 			cur_iocbq);
 		lpfc_cmd->exch_busy = rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY;
 	}
@@ -12919,35 +12901,6 @@ lpfc_sli_intr_handler(int irq, void *dev_id)
 }  /* lpfc_sli_intr_handler */
 
 /**
- * lpfc_sli4_fcp_xri_abort_event_proc - Process fcp xri abort event
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked by the worker thread to process all the pending
- * SLI4 FCP abort XRI events.
- **/
-void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
-{
-	struct lpfc_cq_event *cq_event;
-
-	/* First, declare the fcp xri abort event has been handled */
-	spin_lock_irq(&phba->hbalock);
-	phba->hba_flag &= ~FCP_XRI_ABORT_EVENT;
-	spin_unlock_irq(&phba->hbalock);
-	/* Now, handle all the fcp xri abort events */
-	while (!list_empty(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue)) {
-		/* Get the first event from the head of the event queue */
-		spin_lock_irq(&phba->hbalock);
-		list_remove_head(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
-				 cq_event, struct lpfc_cq_event, list);
-		spin_unlock_irq(&phba->hbalock);
-		/* Notify aborted XRI for FCP work queue */
-		lpfc_sli4_fcp_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
-		/* Free the event processed back to the free pool */
-		lpfc_sli4_cq_event_release(phba, cq_event);
-	}
-}
-
-/**
  * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
  * @phba: pointer to lpfc hba data structure.
  *
@@ -13320,11 +13273,14 @@ out_no_mqe_complete:
  * Return: true if work posted to worker thread, otherwise false.
  **/
 static bool
-lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
 {
 	struct lpfc_mcqe mcqe;
 	bool workposted;
 
+	cq->CQ_mbox++;
+
 	/* Copy the mailbox MCQE and convert endian order as needed */
 	lpfc_sli4_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
 
@@ -13443,17 +13399,8 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 
 	switch (cq->subtype) {
 	case LPFC_FCP:
-		cq_event = lpfc_cq_event_setup(
-			phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
-		if (!cq_event)
-			return false;
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		list_add_tail(&cq_event->list,
-			      &phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
-		/* Set the fcp xri abort event flag */
-		phba->hba_flag |= FCP_XRI_ABORT_EVENT;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
+		lpfc_sli4_fcp_xri_aborted(phba, wcqe, cq->hdwq);
+		workposted = false;
 		break;
 	case LPFC_NVME_LS: /* NVME LS uses ELS resources */
 	case LPFC_ELS:
@@ -13461,6 +13408,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 			phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
 		if (!cq_event)
 			return false;
+		cq_event->hdwq = cq->hdwq;
 		spin_lock_irqsave(&phba->hbalock, iflags);
 		list_add_tail(&cq_event->list,
 			      &phba->sli4_hba.sp_els_xri_aborted_work_queue);
@@ -13474,7 +13422,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 		if (phba->nvmet_support)
 			lpfc_sli4_nvmet_xri_aborted(phba, wcqe);
 		else
-			lpfc_sli4_nvme_xri_aborted(phba, wcqe);
+			lpfc_sli4_nvme_xri_aborted(phba, wcqe, cq->hdwq);
 
 		workposted = false;
 		break;
@@ -13592,7 +13540,7 @@ out:
  * lpfc_sli4_sp_handle_cqe - Process a slow path completion queue entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to the completion queue.
- * @wcqe: Pointer to a completion queue entry.
+ * @cqe: Pointer to a completion queue entry.
  *
  * This routine process a slow-path work-queue or receive queue completion queue
  * entry.
@@ -13684,7 +13632,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Save EQ associated with this CQ */
 	cq->assoc_qp = speq;
 
-	if (!queue_work(phba->wq, &cq->spwork))
+	if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0390 Cannot schedule soft IRQ "
 				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -13692,60 +13640,129 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 }
 
 /**
- * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * __lpfc_sli4_process_cq - Process elements of a CQ
  * @phba: Pointer to HBA context object.
+ * @cq: Pointer to CQ to be processed
+ * @handler: Routine to process each cqe
+ * @delay: Pointer to usdelay to set in case of rescheduling of the handler
  *
- * This routine process a event queue entry from the slow-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on that completion queue, rearm the
- * completion queue, and then return.
+ * This routine processes completion queue entries in a CQ. While a valid
+ * queue element is found, the handler is called. During processing checks
+ * are made for periodic doorbell writes to let the hardware know of
+ * element consumption.
+ *
+ * If the max limit on cqes to process is hit, or there are no more valid
+ * entries, the loop stops. If we processed a sufficient number of elements,
+ * meaning there is sufficient load, rather than rearming and generating
+ * another interrupt, a cq rescheduling delay will be set. A delay of 0
+ * indicates no rescheduling.
  *
+ * Returns True if work scheduled, False otherwise.
  **/
-static void
-lpfc_sli4_sp_process_cq(struct work_struct *work)
+static bool
+__lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
+	bool (*handler)(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_cqe *), unsigned long *delay)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, spwork);
-	struct lpfc_hba *phba = cq->phba;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
-	int ccount = 0;
+	int count = 0, consumed = 0;
+	bool arm = true;
+
+	/* default - no reschedule */
+	*delay = 0;
+
+	if (cmpxchg(&cq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
 
 	/* Process all the entries to the CQ */
+	cqe = lpfc_sli4_cq_get(cq);
+	while (cqe) {
+#if defined(CONFIG_SCSI_LPFC_DEBUG_FS) && defined(BUILD_NVME)
+		if (phba->ktime_on)
+			cq->isr_timestamp = ktime_get_ns();
+		else
+			cq->isr_timestamp = 0;
+#endif
+		workposted |= handler(phba, cq, cqe);
+		__lpfc_sli4_consume_cqe(phba, cq, cqe);
+
+		consumed++;
+		if (!(++count % cq->max_proc_limit))
+			break;
+
+		if (!(count % cq->notify_interval)) {
+			phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+						LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		cqe = lpfc_sli4_cq_get(cq);
+	}
+	if (count >= phba->cfg_cq_poll_threshold) {
+		*delay = 1;
+		arm = false;
+	}
+
+	/* Track the max number of CQEs processed in 1 EQ */
+	if (count > cq->CQ_max_cqe)
+		cq->CQ_max_cqe = count;
+
+	cq->assoc_qp->EQ_cqe_cnt += count;
+
+	/* Catch the no cq entry condition */
+	if (unlikely(count == 0))
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"0369 No entry from completion queue "
+				"qid=%d\n", cq->queue_id);
+
+	cq->queue_claimed = 0;
+
+rearm_and_exit:
+	phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+			arm ?  LPFC_QUEUE_REARM : LPFC_QUEUE_NOARM);
+
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * @cq: pointer to CQ to process
+ *
+ * This routine calls the cq processing routine with a handler specific
+ * to the type of queue bound to it.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
+ **/
+static void
+__lpfc_sli4_sp_process_cq(struct lpfc_queue *cq)
+{
+	struct lpfc_hba *phba = cq->phba;
+	unsigned long delay;
+	bool workposted = false;
+
+	/* Process and rearm the CQ */
 	switch (cq->type) {
 	case LPFC_MCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
-			if (!(++ccount % cq->entry_repost))
-				break;
-			cq->CQ_mbox++;
-		}
+		workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_mcqe,
+						&delay);
 		break;
 	case LPFC_WCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			if (cq->subtype == LPFC_FCP ||
-			    cq->subtype == LPFC_NVME) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-				if (phba->ktime_on)
-					cq->isr_timestamp = ktime_get_ns();
-				else
-					cq->isr_timestamp = 0;
-#endif
-				workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
-								       cqe);
-			} else {
-				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
-								      cqe);
-			}
-			if (!(++ccount % cq->entry_repost))
-				break;
-		}
-
-		/* Track the max number of CQEs processed in 1 EQ */
-		if (ccount > cq->CQ_max_cqe)
-			cq->CQ_max_cqe = ccount;
+		if (cq->subtype == LPFC_FCP || cq->subtype == LPFC_NVME)
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_fp_handle_cqe,
+						&delay);
+		else
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_cqe,
+						&delay);
 		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13754,14 +13771,14 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 		return;
 	}
 
-	/* Catch the no cq entry condition, log an error */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0371 No entry from the CQ: identifier "
-				"(x%x), type (%d)\n", cq->queue_id, cq->type);
-
-	/* In any case, flash and re-arm the RCQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_spwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0394 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
@@ -13769,6 +13786,36 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 }
 
 /**
+ * lpfc_sli4_sp_process_cq - slow-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
+ * lpfc_sli4_dly_sp_process_cq - slow-path work handler when started by timer
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_dly_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
  * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to associated CQ
@@ -13999,13 +14046,16 @@ out:
 
 /**
  * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
+ * @phba: adapter with cq
  * @cq: Pointer to the completion queue.
  * @eqe: Pointer to fast-path completion queue entry.
  *
  * This routine process a fast-path work queue completion entry from fast-path
  * event queue for FCP command response completion.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
  **/
-static int
+static bool
 lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			 struct lpfc_cqe *cqe)
 {
@@ -14072,10 +14122,11 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * completion queue, and then return.
  **/
 static void
-lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
-			uint32_t qidx)
+lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			 struct lpfc_eqe *eqe)
 {
 	struct lpfc_queue *cq = NULL;
+	uint32_t qidx = eq->hdwq;
 	uint16_t cqid, id;
 
 	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -14090,6 +14141,14 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
+	/* Use the fast lookup method first */
+	if (cqid <= phba->sli4_hba.cq_max) {
+		cq = phba->sli4_hba.cq_lookup[cqid];
+		if (cq)
+			goto  work_cq;
+	}
+
+	/* Next check for NVMET completion */
 	if (phba->cfg_nvmet_mrq && phba->sli4_hba.nvmet_cqset) {
 		id = phba->sli4_hba.nvmet_cqset[0]->queue_id;
 		if ((cqid >= id) && (cqid < (id + phba->cfg_nvmet_mrq))) {
@@ -14099,20 +14158,6 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		}
 	}
 
-	if (phba->sli4_hba.nvme_cq_map &&
-	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
-		/* Process NVME / NVMET command completion */
-		cq = phba->sli4_hba.nvme_cq[qidx];
-		goto  process_cq;
-	}
-
-	if (phba->sli4_hba.fcp_cq_map &&
-	    (cqid == phba->sli4_hba.fcp_cq_map[qidx])) {
-		/* Process FCP command completion */
-		cq = phba->sli4_hba.fcp_cq[qidx];
-		goto  process_cq;
-	}
-
 	if (phba->sli4_hba.nvmels_cq &&
 	    (cqid == phba->sli4_hba.nvmels_cq->queue_id)) {
 		/* Process NVME unsol rcv */
@@ -14121,7 +14166,8 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 
 	/* Otherwise this is a Slow path event */
 	if (cq == NULL) {
-		lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
+		lpfc_sli4_sp_handle_eqe(phba, eqe,
+					phba->sli4_hba.hdwq[qidx].hba_eq);
 		return;
 	}
 
@@ -14134,10 +14180,8 @@ process_cq:
 		return;
 	}
 
-	/* Save EQ associated with this CQ */
-	cq->assoc_qp = phba->sli4_hba.hba_eq[qidx];
-
-	if (!queue_work(phba->wq, &cq->irqwork))
+work_cq:
+	if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0363 Cannot schedule soft IRQ "
 				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14145,219 +14189,73 @@ process_cq:
 }
 
 /**
- * lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * __lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
+ * @cq: Pointer to CQ to be processed
  *
- * This routine process a event queue entry from the fast-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on the completion queue, rearm the
- * completion queue, and then return.
+ * This routine calls the cq processing routine with the handler for
+ * fast path CQEs.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
  **/
 static void
-lpfc_sli4_hba_process_cq(struct work_struct *work)
+__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, irqwork);
 	struct lpfc_hba *phba = cq->phba;
-	struct lpfc_cqe *cqe;
+	unsigned long delay;
 	bool workposted = false;
-	int ccount = 0;
-
-	/* Process all the entries to the CQ */
-	while ((cqe = lpfc_sli4_cq_get(cq))) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-		if (phba->ktime_on)
-			cq->isr_timestamp = ktime_get_ns();
-		else
-			cq->isr_timestamp = 0;
-#endif
-		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
-		if (!(++ccount % cq->entry_repost))
-			break;
-	}
 
-	/* Track the max number of CQEs processed in 1 EQ */
-	if (ccount > cq->CQ_max_cqe)
-		cq->CQ_max_cqe = ccount;
-	cq->assoc_qp->EQ_cqe_cnt += ccount;
+	/* process and rearm the CQ */
+	workposted |= __lpfc_sli4_process_cq(phba, cq, lpfc_sli4_fp_handle_cqe,
+					     &delay);
 
-	/* Catch the no cq entry condition */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0369 No entry from fast-path completion "
-				"queue fcpcqid=%d\n", cq->queue_id);
-
-	/* In any case, flash and re-arm the CQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_irqwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0367 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
 		lpfc_worker_wake_up(phba);
 }
 
-static void
-lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
-{
-	struct lpfc_eqe *eqe;
-
-	/* walk all the EQ entries and drop on the floor */
-	while ((eqe = lpfc_sli4_eq_get(eq)))
-		;
-
-	/* Clear and re-arm the EQ */
-	phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
-}
-
-
 /**
- * lpfc_sli4_fof_handle_eqe - Process a Flash Optimized Fabric event queue
- *			     entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
  *
- * This routine process a event queue entry from the Flash Optimized Fabric
- * event queue.  It will check the MajorCode and MinorCode to determine this
- * is for a completion event on a completion queue, if not, an error shall be
- * logged and just return. Otherwise, it will get to the corresponding
- * completion queue and process all the entries on the completion queue, rearm
- * the completion queue, and then return.
+ * translates from the work handler and calls the fast-path handler.
  **/
 static void
-lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
+lpfc_sli4_hba_process_cq(struct work_struct *work)
 {
-	struct lpfc_queue *cq;
-	uint16_t cqid;
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, irqwork);
 
-	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"9147 Not a valid completion "
-				"event: majorcode=x%x, minorcode=x%x\n",
-				bf_get_le32(lpfc_eqe_major_code, eqe),
-				bf_get_le32(lpfc_eqe_minor_code, eqe));
-		return;
-	}
-
-	/* Get the reference to the corresponding CQ */
-	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
-
-	/* Next check for OAS */
-	cq = phba->sli4_hba.oas_cq;
-	if (unlikely(!cq)) {
-		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"9148 OAS completion queue "
-					"does not exist\n");
-		return;
-	}
-
-	if (unlikely(cqid != cq->queue_id)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"9149 Miss-matched fast-path compl "
-				"queue id: eqcqid=%d, fcpcqid=%d\n",
-				cqid, cq->queue_id);
-		return;
-	}
-
-	/* Save EQ associated with this CQ */
-	cq->assoc_qp = phba->sli4_hba.fof_eq;
-
-	/* CQ work will be processed on CPU affinitized to this IRQ */
-	if (!queue_work(phba->wq, &cq->irqwork))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0367 Cannot schedule soft IRQ "
-				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
-				cqid, cq->queue_id, smp_processor_id());
+	__lpfc_sli4_hba_process_cq(cq);
 }
 
 /**
- * lpfc_sli4_fof_intr_handler - HBA interrupt handler to SLI-4 device
- * @irq: Interrupt number.
- * @dev_id: The device context pointer.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by timer
+ * @work: pointer to work element
  *
- * This function is directly called from the PCI layer as an interrupt
- * service routine when device with SLI-4 interface spec is enabled with
- * MSI-X multi-message interrupt mode and there is a Flash Optimized Fabric
- * IOCB ring event in the HBA. However, when the device is enabled with either
- * MSI or Pin-IRQ interrupt mode, this function is called as part of the
- * device-level interrupt handler. When the PCI slot is in error recovery
- * or the HBA is undergoing initialization, the interrupt handler will not
- * process the interrupt. The Flash Optimized Fabric ring event are handled in
- * the intrrupt context. This function is called without any lock held.
- * It gets the hbalock to access and update SLI data structures. Note that,
- * the EQ to CQ are one-to-one map such that the EQ index is
- * equal to that of CQ index.
- *
- * This function returns IRQ_HANDLED when interrupt is handled else it
- * returns IRQ_NONE.
+ * translates from the work handler and calls the fast-path handler.
  **/
-irqreturn_t
-lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
+static void
+lpfc_sli4_dly_hba_process_cq(struct work_struct *work)
 {
-	struct lpfc_hba *phba;
-	struct lpfc_hba_eq_hdl *hba_eq_hdl;
-	struct lpfc_queue *eq;
-	struct lpfc_eqe *eqe;
-	unsigned long iflag;
-	int ecount = 0;
-
-	/* Get the driver's phba structure from the dev_id */
-	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
-	phba = hba_eq_hdl->phba;
-
-	if (unlikely(!phba))
-		return IRQ_NONE;
-
-	/* Get to the EQ struct associated with this vector */
-	eq = phba->sli4_hba.fof_eq;
-	if (unlikely(!eq))
-		return IRQ_NONE;
-
-	/* Check device state for handling interrupt */
-	if (unlikely(lpfc_intr_state_check(phba))) {
-		/* Check again for link_state with lock held */
-		spin_lock_irqsave(&phba->hbalock, iflag);
-		if (phba->link_state < LPFC_LINK_DOWN)
-			/* Flush, clear interrupt, and rearm the EQ */
-			lpfc_sli4_eq_flush(phba, eq);
-		spin_unlock_irqrestore(&phba->hbalock, iflag);
-		return IRQ_NONE;
-	}
-
-	/*
-	 * Process all the event on FCP fast-path EQ
-	 */
-	while ((eqe = lpfc_sli4_eq_get(eq))) {
-		lpfc_sli4_fof_handle_eqe(phba, eqe);
-		if (!(++ecount % eq->entry_repost))
-			break;
-		eq->EQ_processed++;
-	}
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_irqwork);
 
-	/* Track the max number of EQEs processed in 1 intr */
-	if (ecount > eq->EQ_max_eqe)
-		eq->EQ_max_eqe = ecount;
-
-
-	if (unlikely(ecount == 0)) {
-		eq->EQ_no_entry++;
-
-		if (phba->intr_type == MSIX)
-			/* MSI-X treated interrupt served as no EQ share INT */
-			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-					"9145 MSI-X interrupt with no EQE\n");
-		else {
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"9146 ISR interrupt with no EQE\n");
-			/* Non MSI-X treated on interrupt as EQ share INT */
-			return IRQ_NONE;
-		}
-	}
-	/* Always clear and re-arm the fast-path EQ */
-	phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
-	return IRQ_HANDLED;
+	__lpfc_sli4_hba_process_cq(cq);
 }
 
 /**
@@ -14392,10 +14290,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba *phba;
 	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
 	int hba_eqidx;
+	struct lpfc_eq_intr_info *eqi;
+	uint32_t icnt;
 
 	/* Get the driver's phba structure from the dev_id */
 	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
@@ -14404,23 +14303,14 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
-	if (unlikely(!phba->sli4_hba.hba_eq))
+	if (unlikely(!phba->sli4_hba.hdwq))
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hba_eq[hba_eqidx];
+	fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq;
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
-	if (lpfc_fcp_look_ahead) {
-		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
-			phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-		else {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-	}
-
 	/* Check device state for handling interrupt */
 	if (unlikely(lpfc_intr_state_check(phba))) {
 		/* Check again for link_state with lock held */
@@ -14429,36 +14319,25 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			/* Flush, clear interrupt, and rearm the EQ */
 			lpfc_sli4_eq_flush(phba, fpeq);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
-		if (lpfc_fcp_look_ahead)
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		return IRQ_NONE;
 	}
 
-	/*
-	 * Process all the event on FCP fast-path EQ
-	 */
-	while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
-		if (!(++ecount % fpeq->entry_repost))
-			break;
-		fpeq->EQ_processed++;
-	}
+	eqi = phba->sli4_hba.eq_info;
+	icnt = this_cpu_inc_return(eqi->icnt);
+	fpeq->last_cpu = smp_processor_id();
 
-	/* Track the max number of EQEs processed in 1 intr */
-	if (ecount > fpeq->EQ_max_eqe)
-		fpeq->EQ_max_eqe = ecount;
+	if (icnt > LPFC_EQD_ISR_TRIGGER &&
+	    phba->cfg_irq_chann == 1 &&
+	    phba->cfg_auto_imax &&
+	    fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY &&
+	    phba->sli.sli_flag & LPFC_SLI_USE_EQDR)
+		lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
 
-	/* Always clear and re-arm the fast-path EQ */
-	phba->sli4_hba.sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+	/* process and rearm the EQ */
+	ecount = lpfc_sli4_process_eq(phba, fpeq);
 
 	if (unlikely(ecount == 0)) {
 		fpeq->EQ_no_entry++;
-
-		if (lpfc_fcp_look_ahead) {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-
 		if (phba->intr_type == MSIX)
 			/* MSI-X treated interrupt served as no EQ share INT */
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14468,9 +14347,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			return IRQ_NONE;
 	}
 
-	if (lpfc_fcp_look_ahead)
-		atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
 	return IRQ_HANDLED;
 } /* lpfc_sli4_fp_intr_handler */
 
@@ -14508,20 +14384,13 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	/*
 	 * Invoke fast-path host attention interrupt handling as appropriate.
 	 */
-	for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
 		hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
 					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
 
-	if (phba->cfg_fof) {
-		hba_irq_rc = lpfc_sli4_fof_intr_handler(irq,
-					&phba->sli4_hba.hba_eq_hdl[qidx]);
-		if (hba_irq_rc == IRQ_HANDLED)
-			hba_handled |= true;
-	}
-
 	return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE;
 } /* lpfc_sli4_intr_handler */
 
@@ -14553,6 +14422,9 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 		kfree(queue->rqbp);
 	}
 
+	if (!list_empty(&queue->cpu_list))
+		list_del(&queue->cpu_list);
+
 	if (!list_empty(&queue->wq_list))
 		list_del(&queue->wq_list);
 
@@ -14601,6 +14473,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	INIT_LIST_HEAD(&queue->wqfull_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
+	INIT_LIST_HEAD(&queue->cpu_list);
 
 	/* Set queue parameters now.  If the system cannot provide memory
 	 * resources, the free routine needs to know what was allocated.
@@ -14633,8 +14506,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	}
 	INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
 	INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_irqwork, lpfc_sli4_dly_hba_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_spwork, lpfc_sli4_dly_sp_process_cq);
 
-	/* entry_repost will be set during q creation */
+	/* notify_interval will be set during q creation */
 
 	return queue;
 out_fail:
@@ -14671,43 +14546,76 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 }
 
 /**
- * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on FCP EQs
- * @phba: HBA structure that indicates port to create a queue on.
- * @startq: The starting FCP EQ to modify
+ * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs
+ * @phba: HBA structure that EQs are on.
+ * @startq: The starting EQ index to modify
+ * @numq: The number of EQs (consecutive indexes) to modify
+ * @usdelay: amount of delay
  *
- * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
- * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
- * updated in one mailbox command.
+ * This function revises the EQ delay on 1 or more EQs. The EQ delay
+ * is set either by writing to a register (if supported by the SLI Port)
+ * or by mailbox command. The mailbox command allows several EQs to be
+ * updated at once.
  *
- * The @phba struct is used to send mailbox command to HBA. The @startq
- * is used to get the starting FCP EQ to change.
- * This function is asynchronous and will wait for the mailbox
- * command to finish before continuing.
+ * The @phba struct is used to send a mailbox command to HBA. The @startq
+ * is used to get the starting EQ index to change. The @numq value is
+ * used to specify how many consecutive EQ indexes, starting at EQ index,
+ * are to be changed. This function is asynchronous and will wait for any
+ * mailbox commands to finish before returning.
  *
- * On success this function will return a zero. If unable to allocate enough
- * memory this function will return -ENOMEM. If the queue create mailbox command
- * fails this function will return -ENXIO.
+ * On success this function will return a zero. If unable to allocate
+ * enough memory this function will return -ENOMEM. If a mailbox command
+ * fails this function will return -ENXIO. Note: on ENXIO, some EQs may
+ * have had their delay multipler changed.
  **/
-int
+void
 lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
-			 uint32_t numq, uint32_t imax)
+			 uint32_t numq, uint32_t usdelay)
 {
 	struct lpfc_mbx_modify_eq_delay *eq_delay;
 	LPFC_MBOXQ_t *mbox;
 	struct lpfc_queue *eq;
-	int cnt, rc, length, status = 0;
+	int cnt = 0, rc, length;
 	uint32_t shdr_status, shdr_add_status;
-	uint32_t result, val;
+	uint32_t dmult;
 	int qidx;
 	union lpfc_sli4_cfg_shdr *shdr;
-	uint16_t dmult;
 
-	if (startq >= phba->io_channel_irqs)
-		return 0;
+	if (startq >= phba->cfg_irq_chann)
+		return;
+
+	if (usdelay > 0xFFFF) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP | LOG_NVME,
+				"6429 usdelay %d too large. Scaled down to "
+				"0xFFFF.\n", usdelay);
+		usdelay = 0xFFFF;
+	}
+
+	/* set values by EQ_DELAY register if supported */
+	if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
+		for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+			eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+			if (!eq)
+				continue;
+
+			lpfc_sli4_mod_hba_eq_delay(phba, eq, usdelay);
+
+			if (++cnt >= numq)
+				break;
+		}
+
+		return;
+	}
+
+	/* Otherwise, set values by mailbox cmd */
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	if (!mbox)
-		return -ENOMEM;
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_FCP | LOG_NVME,
+				"6428 Failed allocating mailbox cmd buffer."
+				" EQ delay was not set.\n");
+		return;
+	}
 	length = (sizeof(struct lpfc_mbx_modify_eq_delay) -
 		  sizeof(struct lpfc_sli4_cfg_mhdr));
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
@@ -14716,45 +14624,22 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	eq_delay = &mbox->u.mqe.un.eq_delay;
 
 	/* Calculate delay multiper from maximum interrupt per second */
-	result = imax / phba->io_channel_irqs;
-	if (result > LPFC_DMULT_CONST || result == 0)
-		dmult = 0;
-	else
-		dmult = LPFC_DMULT_CONST/result - 1;
+	dmult = (usdelay * LPFC_DMULT_CONST) / LPFC_SEC_TO_USEC;
+	if (dmult)
+		dmult--;
 	if (dmult > LPFC_DMULT_MAX)
 		dmult = LPFC_DMULT_MAX;
 
-	cnt = 0;
-	for (qidx = startq; qidx < phba->io_channel_irqs; qidx++) {
-		eq = phba->sli4_hba.hba_eq[qidx];
+	for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+		eq = phba->sli4_hba.hdwq[qidx].hba_eq;
 		if (!eq)
 			continue;
-		eq->q_mode = imax;
+		eq->q_mode = usdelay;
 		eq_delay->u.request.eq[cnt].eq_id = eq->queue_id;
 		eq_delay->u.request.eq[cnt].phase = 0;
 		eq_delay->u.request.eq[cnt].delay_multi = dmult;
-		cnt++;
-
-		/* q_mode is only used for auto_imax */
-		if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
-			/* Use EQ Delay Register method for q_mode */
-
-			/* Convert for EQ Delay register */
-			val =  phba->cfg_fcp_imax;
-			if (val) {
-				/* First, interrupts per sec per EQ */
-				val = phba->cfg_fcp_imax /
-					phba->io_channel_irqs;
-
-				/* us delay between each interrupt */
-				val = LPFC_SEC_TO_USEC / val;
-			}
-			eq->q_mode = val;
-		} else {
-			eq->q_mode = imax;
-		}
 
-		if (cnt >= numq)
+		if (++cnt >= numq)
 			break;
 	}
 	eq_delay->u.request.num_eq = cnt;
@@ -14772,10 +14657,9 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 				"2512 MODIFY_EQ_DELAY mailbox failed with "
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
-		status = -ENXIO;
 	}
 	mempool_free(mbox, phba->mbox_mem_pool);
-	return status;
+	return;
 }
 
 /**
@@ -14900,8 +14784,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 	if (eq->queue_id == 0xFFFF)
 		status = -ENXIO;
 	eq->host_index = 0;
-	eq->hba_index = 0;
-	eq->entry_repost = LPFC_EQ_REPOST;
+	eq->notify_interval = LPFC_EQ_NOTIFY_INTRVL;
+	eq->max_proc_limit = LPFC_EQ_MAX_PROC_LIMIT;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -15039,10 +14923,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->subtype = subtype;
 	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
 	cq->assoc_qid = eq->queue_id;
+	cq->assoc_qp = eq;
 	cq->host_index = 0;
-	cq->hba_index = 0;
-	cq->entry_repost = LPFC_CQ_REPOST;
+	cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+	cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit, cq->entry_count);
 
+	if (cq->queue_id > phba->sli4_hba.cq_max)
+		phba->sli4_hba.cq_max = cq->queue_id;
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -15052,7 +14939,7 @@ out:
  * lpfc_cq_create_set - Create a set of Completion Queues on the HBA for MRQ
  * @phba: HBA structure that indicates port to create a queue on.
  * @cqp: The queue structure array to use to create the completion queues.
- * @eqp: The event queue array to bind these completion queues to.
+ * @hdwq: The hardware queue array  with the EQ to bind completion queues to.
  *
  * This function creates a set of  completion queue, s to support MRQ
  * as detailed in @cqp, on a port,
@@ -15072,7 +14959,8 @@ out:
  **/
 int
 lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
-		   struct lpfc_queue **eqp, uint32_t type, uint32_t subtype)
+		   struct lpfc_sli4_hdw_queue *hdwq, uint32_t type,
+		   uint32_t subtype)
 {
 	struct lpfc_queue *cq;
 	struct lpfc_queue *eq;
@@ -15087,7 +14975,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 
 	/* sanity check on queue memory */
 	numcq = phba->cfg_nvmet_mrq;
-	if (!cqp || !eqp || !numcq)
+	if (!cqp || !hdwq || !numcq)
 		return -ENODEV;
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -15114,7 +15002,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 
 	for (idx = 0; idx < numcq; idx++) {
 		cq = cqp[idx];
-		eq = eqp[idx];
+		eq = hdwq[idx].hba_eq;
 		if (!cq || !eq) {
 			status = -ENOMEM;
 			goto out;
@@ -15247,9 +15135,11 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->type = type;
 		cq->subtype = subtype;
 		cq->assoc_qid = eq->queue_id;
+		cq->assoc_qp = eq;
 		cq->host_index = 0;
-		cq->hba_index = 0;
-		cq->entry_repost = LPFC_CQ_REPOST;
+		cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+		cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+					 cq->entry_count);
 		cq->chann = idx;
 
 		rc = 0;
@@ -15287,6 +15177,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 	for (idx = 0; idx < numcq; idx++) {
 		cq = cqp[idx];
 		cq->queue_id = rc + idx;
+		if (cq->queue_id > phba->sli4_hba.cq_max)
+			phba->sli4_hba.cq_max = cq->queue_id;
 	}
 
 out:
@@ -15499,7 +15391,6 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
-	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -15765,7 +15656,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	wq->subtype = subtype;
 	wq->host_index = 0;
 	wq->hba_index = 0;
-	wq->entry_repost = LPFC_RELEASE_NOTIFICATION_INTERVAL;
+	wq->notify_interval = LPFC_WQ_NOTIFY_INTRVL;
 
 	/* link the wq onto the parent cq child list */
 	list_add_tail(&wq->list, &cq->child_list);
@@ -15959,7 +15850,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
-	hrq->entry_repost = LPFC_RQ_REPOST;
+	hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -16052,7 +15943,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
-	drq->entry_repost = LPFC_RQ_REPOST;
+	drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -16210,7 +16101,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
-		hrq->entry_repost = LPFC_RQ_REPOST;
+		hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -16219,7 +16110,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
-		drq->entry_repost = LPFC_RQ_REPOST;
+		drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16279,6 +16170,7 @@ lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
 	/* sanity check on queue memory */
 	if (!eq)
 		return -ENODEV;
+
 	mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
@@ -16828,22 +16720,21 @@ lpfc_sli4_post_sgl_list(struct lpfc_hba *phba,
 }
 
 /**
- * lpfc_sli4_post_scsi_sgl_block - post a block of scsi sgl list to firmware
+ * lpfc_sli4_post_io_sgl_block - post a block of nvme sgl list to firmware
  * @phba: pointer to lpfc hba data structure.
- * @sblist: pointer to scsi buffer list.
+ * @nblist: pointer to nvme buffer list.
  * @count: number of scsi buffers on the list.
  *
  * This routine is invoked to post a block of @count scsi sgl pages from a
- * SCSI buffer list @sblist to the HBA using non-embedded mailbox command.
+ * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
  * No Lock is held.
  *
  **/
-int
-lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
-			      struct list_head *sblist,
-			      int count)
+static int
+lpfc_sli4_post_io_sgl_block(struct lpfc_hba *phba, struct list_head *nblist,
+			    int count)
 {
-	struct lpfc_scsi_buf *psb;
+	struct lpfc_io_buf *lpfc_ncmd;
 	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
 	struct sgl_page_pairs *sgl_pg_pairs;
 	void *viraddr;
@@ -16861,25 +16752,25 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
 	if (reqlen > SLI4_PAGE_SIZE) {
 		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0217 Block sgl registration required DMA "
+				"6118 Block sgl registration required DMA "
 				"size (%d) great than a page\n", reqlen);
 		return -ENOMEM;
 	}
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0283 Failed to allocate mbox cmd memory\n");
+				"6119 Failed to allocate mbox cmd memory\n");
 		return -ENOMEM;
 	}
 
 	/* Allocate DMA memory and set up the non-embedded mailbox command */
 	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
-				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
-				LPFC_SLI4_MBX_NEMBED);
+				    LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES,
+				    reqlen, LPFC_SLI4_MBX_NEMBED);
 
 	if (alloclen < reqlen) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2561 Allocated DMA memory size (%d) is "
+				"6120 Allocated DMA memory size (%d) is "
 				"less than the requested DMA memory "
 				"size (%d)\n", alloclen, reqlen);
 		lpfc_sli4_mbox_cmd_free(phba, mbox);
@@ -16894,14 +16785,15 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	sgl_pg_pairs = &sgl->sgl_pg_pairs;
 
 	pg_pairs = 0;
-	list_for_each_entry(psb, sblist, list) {
+	list_for_each_entry(lpfc_ncmd, nblist, list) {
 		/* Set up the sge entry */
 		sgl_pg_pairs->sgl_pg0_addr_lo =
-			cpu_to_le32(putPaddrLow(psb->dma_phys_bpl));
+			cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
 		sgl_pg_pairs->sgl_pg0_addr_hi =
-			cpu_to_le32(putPaddrHigh(psb->dma_phys_bpl));
+			cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
 		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
-			pdma_phys_bpl1 = psb->dma_phys_bpl + SGL_PAGE_SIZE;
+			pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
 		else
 			pdma_phys_bpl1 = 0;
 		sgl_pg_pairs->sgl_pg1_addr_lo =
@@ -16910,7 +16802,7 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
 		/* Keep the first xritag on the list */
 		if (pg_pairs == 0)
-			xritag_start = psb->cur_iocbq.sli4_xritag;
+			xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
 		sgl_pg_pairs++;
 		pg_pairs++;
 	}
@@ -16919,20 +16811,20 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	/* Perform endian conversion if necessary */
 	sgl->word0 = cpu_to_le32(sgl->word0);
 
-	if (!phba->sli4_hba.intr_enable)
+	if (!phba->sli4_hba.intr_enable) {
 		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
-	else {
+	} else {
 		mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
 		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
 	}
-	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
 	if (rc != MBX_TIMEOUT)
 		lpfc_sli4_mbox_cmd_free(phba, mbox);
 	if (shdr_status || shdr_add_status || rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"2564 POST_SGL_BLOCK mailbox command failed "
+				"6125 POST_SGL_BLOCK mailbox command failed "
 				"status x%x add_status x%x mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
@@ -16941,6 +16833,134 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_sli4_post_io_sgl_list - Post blocks of nvme buffer sgls from a list
+ * @phba: pointer to lpfc hba data structure.
+ * @post_nblist: pointer to the nvme buffer list.
+ *
+ * This routine walks a list of nvme buffers that was passed in. It attempts
+ * to construct blocks of nvme buffer sgls which contains contiguous xris and
+ * uses the non-embedded SGL block post mailbox commands to post to the port.
+ * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
+ * embedded SGL post mailbox command for posting. The @post_nblist passed in
+ * must be local list, thus no lock is needed when manipulate the list.
+ *
+ * Returns: 0 = failure, non-zero number of successfully posted buffers.
+ **/
+int
+lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
+			   struct list_head *post_nblist, int sb_count)
+{
+	struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	int status, sgl_size;
+	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
+	dma_addr_t pdma_phys_sgl1;
+	int last_xritag = NO_XRI;
+	int cur_xritag;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(blck_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	/* sanity check */
+	if (sb_count <= 0)
+		return -EINVAL;
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
+		list_del_init(&lpfc_ncmd->list);
+		block_cnt++;
+		if ((last_xritag != NO_XRI) &&
+		    (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
+			/* a hole in xri block, form a sgl posting block */
+			list_splice_init(&prep_nblist, &blck_nblist);
+			post_cnt = block_cnt - 1;
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			block_cnt = 1;
+		} else {
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			/* enough sgls for non-embed sgl mbox command */
+			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+				block_cnt = 0;
+			}
+		}
+		num_posting++;
+		last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+
+		/* end of repost sgl list condition for NVME buffers */
+		if (num_posting == sb_count) {
+			if (post_cnt == 0) {
+				/* last sgl posting block */
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+			} else if (block_cnt == 1) {
+				/* last single sgl with non-contiguous xri */
+				if (sgl_size > SGL_PAGE_SIZE)
+					pdma_phys_sgl1 =
+						lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+				else
+					pdma_phys_sgl1 = 0;
+				cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+				status = lpfc_sli4_post_sgl(
+						phba, lpfc_ncmd->dma_phys_sgl,
+						pdma_phys_sgl1, cur_xritag);
+				if (status) {
+					/* Post error.  Buffer unavailable. */
+					lpfc_ncmd->flags |=
+						LPFC_SBUF_NOT_POSTED;
+				} else {
+					/* Post success. Bffer available. */
+					lpfc_ncmd->flags &=
+						~LPFC_SBUF_NOT_POSTED;
+					lpfc_ncmd->status = IOSTAT_SUCCESS;
+					num_posted++;
+				}
+				/* success, put on NVME buffer sgl list */
+				list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+			}
+		}
+
+		/* continue until a nembed page worth of sgls */
+		if (post_cnt == 0)
+			continue;
+
+		/* post block of NVME buffer list sgls */
+		status = lpfc_sli4_post_io_sgl_block(phba, &blck_nblist,
+						     post_cnt);
+
+		/* don't reset xirtag due to hole in xri block */
+		if (block_cnt == 0)
+			last_xritag = NO_XRI;
+
+		/* reset NVME buffer post count for next round of posting */
+		post_cnt = 0;
+
+		/* put posted NVME buffer-sgl posted on NVME buffer sgl list */
+		while (!list_empty(&blck_nblist)) {
+			list_remove_head(&blck_nblist, lpfc_ncmd,
+					 struct lpfc_io_buf, list);
+			if (status) {
+				/* Post error.  Mark buffer unavailable. */
+				lpfc_ncmd->flags |= LPFC_SBUF_NOT_POSTED;
+			} else {
+				/* Post success, Mark buffer available. */
+				lpfc_ncmd->flags &= ~LPFC_SBUF_NOT_POSTED;
+				lpfc_ncmd->status = IOSTAT_SUCCESS;
+				num_posted++;
+			}
+			list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+		}
+	}
+	/* Push NVME buffers with sgl posted to the available list */
+	lpfc_io_buf_replenish(phba, &nvme_nblist);
+
+	return num_posted;
+}
+
+/**
  * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
  * @phba: pointer to lpfc_hba struct that the frame was received on
  * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
@@ -19500,7 +19520,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 
 	if (phba->link_flag & LS_MDS_LOOPBACK) {
 		/* MDS WQE are posted only to first WQ*/
-		wq = phba->sli4_hba.fcp_wq[0];
+		wq = phba->sli4_hba.hdwq[0].fcp_wq;
 		if (unlikely(!wq))
 			return 0;
 		pring = wq->pring;
@@ -19708,7 +19728,7 @@ lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq,
  * @pwqe: Pointer to command WQE.
  **/
 int
-lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
+lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 		    struct lpfc_iocbq *pwqe)
 {
 	union lpfc_wqe128 *wqe = &pwqe->wqe;
@@ -19722,7 +19742,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVME_LS and NVME_LS ABTS requests. */
 	if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
 		pring =  phba->sli4_hba.nvmels_wq->pring;
-		spin_lock_irqsave(&pring->ring_lock, iflags);
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
 		if (!sglq) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19750,12 +19771,13 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVME_FCREQ and NVME_ABTS requests */
 	if (pwqe->iocb_flag & LPFC_IO_NVME) {
 		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
-		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+		wq = qp->nvme_wq;
+		pring = wq->pring;
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
-		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
-		bf_set(wqe_cqid, &wqe->generic.wqe_com,
-		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19769,9 +19791,9 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVMET requests */
 	if (pwqe->iocb_flag & LPFC_IO_NVMET) {
 		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
-		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+		wq = qp->nvme_wq;
+		pring = wq->pring;
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
 		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
@@ -19780,9 +19802,10 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		}
 		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
 		       pwqe->sli4_xritag);
-		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
-		bf_set(wqe_cqid, &wqe->generic.wqe_com,
-		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19794,3 +19817,647 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	}
 	return WQE_ERROR;
 }
+
+#ifdef LPFC_MXP_STAT
+/**
+ * lpfc_snapshot_mxp - Snapshot pbl, pvt and busy count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * The purpose of this routine is to take a snapshot of pbl, pvt and busy count
+ * 15 seconds after a test case is running.
+ *
+ * The user should call lpfc_debugfs_multixripools_write before running a test
+ * case to clear stat_snapshot_taken. Then the user starts a test case. During
+ * test case is running, stat_snapshot_taken is incremented by 1 every time when
+ * this routine is called from heartbeat timer. When stat_snapshot_taken is
+ * equal to LPFC_MXP_SNAPSHOT_TAKEN, a snapshot is taken.
+ **/
+void lpfc_snapshot_mxp(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	u32 txcmplq_cnt;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	if (!multixri_pool)
+		return;
+
+	if (multixri_pool->stat_snapshot_taken == LPFC_MXP_SNAPSHOT_TAKEN) {
+		pvt_pool = &qp->p_multixri_pool->pvt_pool;
+		pbl_pool = &qp->p_multixri_pool->pbl_pool;
+		txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+		if (qp->nvme_wq)
+			txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+
+		multixri_pool->stat_pbl_count = pbl_pool->count;
+		multixri_pool->stat_pvt_count = pvt_pool->count;
+		multixri_pool->stat_busy_count = txcmplq_cnt;
+	}
+
+	multixri_pool->stat_snapshot_taken++;
+}
+#endif
+
+/**
+ * lpfc_adjust_pvt_pool_count - Adjust private pool count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine moves some XRIs from private to public pool when private pool
+ * is not busy.
+ **/
+void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	u32 io_req_count;
+	u32 prev_io_req_count;
+
+	multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+	if (!multixri_pool)
+		return;
+	io_req_count = multixri_pool->io_req_count;
+	prev_io_req_count = multixri_pool->prev_io_req_count;
+
+	if (prev_io_req_count != io_req_count) {
+		/* Private pool is busy */
+		multixri_pool->prev_io_req_count = io_req_count;
+	} else {
+		/* Private pool is not busy.
+		 * Move XRIs from private to public pool.
+		 */
+		lpfc_move_xri_pvt_to_pbl(phba, hwqid);
+	}
+}
+
+/**
+ * lpfc_adjust_high_watermark - Adjust high watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine sets high watermark as number of outstanding XRIs,
+ * but make sure the new value is between xri_limit/2 and xri_limit.
+ **/
+void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid)
+{
+	u32 new_watermark;
+	u32 watermark_max;
+	u32 watermark_min;
+	u32 xri_limit;
+	u32 txcmplq_cnt;
+	u32 abts_io_bufs;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	if (!multixri_pool)
+		return;
+	xri_limit = multixri_pool->xri_limit;
+
+	watermark_max = xri_limit;
+	watermark_min = xri_limit / 2;
+
+	txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+	abts_io_bufs = qp->abts_scsi_io_bufs;
+	if (qp->nvme_wq) {
+		txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+		abts_io_bufs += qp->abts_nvme_io_bufs;
+	}
+
+	new_watermark = txcmplq_cnt + abts_io_bufs;
+	new_watermark = min(watermark_max, new_watermark);
+	new_watermark = max(watermark_min, new_watermark);
+	multixri_pool->pvt_pool.high_watermark = new_watermark;
+
+#ifdef LPFC_MXP_STAT
+	multixri_pool->stat_max_hwm = max(multixri_pool->stat_max_hwm,
+					  new_watermark);
+#endif
+}
+
+/**
+ * lpfc_move_xri_pvt_to_pbl - Move some XRIs from private to public pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine is called from hearbeat timer when pvt_pool is idle.
+ * All free XRIs are moved from private to public pool on hwqid with 2 steps.
+ * The first step moves (all - low_watermark) amount of XRIs.
+ * The second step moves the rest of XRIs.
+ **/
+void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct list_head tmp_list;
+	u32 tmp_count;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	pbl_pool = &qp->p_multixri_pool->pbl_pool;
+	pvt_pool = &qp->p_multixri_pool->pvt_pool;
+	tmp_count = 0;
+
+	lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+	lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
+
+	if (pvt_pool->count > pvt_pool->low_watermark) {
+		/* Step 1: move (all - low_watermark) from pvt_pool
+		 * to pbl_pool
+		 */
+
+		/* Move low watermark of bufs from pvt_pool to tmp_list */
+		INIT_LIST_HEAD(&tmp_list);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &pvt_pool->list, list) {
+			list_move_tail(&lpfc_ncmd->list, &tmp_list);
+			tmp_count++;
+			if (tmp_count >= pvt_pool->low_watermark)
+				break;
+		}
+
+		/* Move all bufs from pvt_pool to pbl_pool */
+		list_splice_init(&pvt_pool->list, &pbl_pool->list);
+
+		/* Move all bufs from tmp_list to pvt_pool */
+		list_splice(&tmp_list, &pvt_pool->list);
+
+		pbl_pool->count += (pvt_pool->count - tmp_count);
+		pvt_pool->count = tmp_count;
+	} else {
+		/* Step 2: move the rest from pvt_pool to pbl_pool */
+		list_splice_init(&pvt_pool->list, &pbl_pool->list);
+		pbl_pool->count += pvt_pool->count;
+		pvt_pool->count = 0;
+	}
+
+	spin_unlock(&pvt_pool->lock);
+	spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+}
+
+/**
+ * _lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure
+ * @pbl_pool: specified public free XRI pool
+ * @pvt_pool: specified private free XRI pool
+ * @count: number of XRIs to move
+ *
+ * This routine tries to move some free common bufs from the specified pbl_pool
+ * to the specified pvt_pool. It might move less than count XRIs if there's not
+ * enough in public pool.
+ *
+ * Return:
+ *   true - if XRIs are successfully moved from the specified pbl_pool to the
+ *          specified pvt_pool
+ *   false - if the specified pbl_pool is empty or locked by someone else
+ **/
+static bool
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+			  struct lpfc_pbl_pool *pbl_pool,
+			  struct lpfc_pvt_pool *pvt_pool, u32 count)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	int ret;
+
+	ret = spin_trylock_irqsave(&pbl_pool->lock, iflag);
+	if (ret) {
+		if (pbl_pool->count) {
+			/* Move a batch of XRIs from public to private pool */
+			lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
+			list_for_each_entry_safe(lpfc_ncmd,
+						 lpfc_ncmd_next,
+						 &pbl_pool->list,
+						 list) {
+				list_move_tail(&lpfc_ncmd->list,
+					       &pvt_pool->list);
+				pvt_pool->count++;
+				pbl_pool->count--;
+				count--;
+				if (count == 0)
+					break;
+			}
+
+			spin_unlock(&pvt_pool->lock);
+			spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+			return true;
+		}
+		spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+	}
+
+	return false;
+}
+
+/**
+ * lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ * @count: number of XRIs to move
+ *
+ * This routine tries to find some free common bufs in one of public pools with
+ * Round Robin method. The search always starts from local hwqid, then the next
+ * HWQ which was found last time (rrb_next_hwqid). Once a public pool is found,
+ * a batch of free common bufs are moved to private pool on hwqid.
+ * It might move less than count XRIs if there's not enough in public pool.
+ **/
+void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_multixri_pool *next_multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+	u32 next_hwqid;
+	u32 hwq_count;
+	int ret;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+	pbl_pool = &multixri_pool->pbl_pool;
+
+	/* Check if local pbl_pool is available */
+	ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
+	if (ret) {
+#ifdef LPFC_MXP_STAT
+		multixri_pool->local_pbl_hit_count++;
+#endif
+		return;
+	}
+
+	hwq_count = phba->cfg_hdw_queue;
+
+	/* Get the next hwqid which was found last time */
+	next_hwqid = multixri_pool->rrb_next_hwqid;
+
+	do {
+		/* Go to next hwq */
+		next_hwqid = (next_hwqid + 1) % hwq_count;
+
+		next_multixri_pool =
+			phba->sli4_hba.hdwq[next_hwqid].p_multixri_pool;
+		pbl_pool = &next_multixri_pool->pbl_pool;
+
+		/* Check if the public free xri pool is available */
+		ret = _lpfc_move_xri_pbl_to_pvt(
+			phba, qp, pbl_pool, pvt_pool, count);
+
+		/* Exit while-loop if success or all hwqid are checked */
+	} while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
+
+	/* Starting point for the next time */
+	multixri_pool->rrb_next_hwqid = next_hwqid;
+
+	if (!ret) {
+		/* stats: all public pools are empty*/
+		multixri_pool->pbl_empty_count++;
+	}
+
+#ifdef LPFC_MXP_STAT
+	if (ret) {
+		if (next_hwqid == hwqid)
+			multixri_pool->local_pbl_hit_count++;
+		else
+			multixri_pool->other_pbl_hit_count++;
+	}
+#endif
+}
+
+/**
+ * lpfc_keep_pvt_pool_above_lowwm - Keep pvt_pool above low watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @qp: belong to which HWQ.
+ *
+ * This routine get a batch of XRIs from pbl_pool if pvt_pool is less than
+ * low watermark.
+ **/
+void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+
+	multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+
+	if (pvt_pool->count < pvt_pool->low_watermark)
+		lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+}
+
+/**
+ * lpfc_release_io_buf - Return one IO buf back to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @lpfc_ncmd: IO buf to be returned.
+ * @qp: belong to which HWQ.
+ *
+ * This routine returns one IO buf back to free pool. If this is an urgent IO,
+ * the IO buf is returned to expedite pool. If cfg_xri_rebalancing==1,
+ * the IO buf is returned to pbl_pool or pvt_pool based on watermark and
+ * xri_limit.  If cfg_xri_rebalancing==0, the IO buf is returned to
+ * lpfc_io_buf_list_put.
+ **/
+void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
+			 struct lpfc_sli4_hdw_queue *qp)
+{
+	unsigned long iflag;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_epd_pool *epd_pool;
+	u32 txcmplq_cnt;
+	u32 xri_owned;
+	u32 xri_limit;
+	u32 abts_io_bufs;
+
+	/* MUST zero fields if buffer is reused by another protocol */
+	lpfc_ncmd->nvmeCmd = NULL;
+	lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL;
+	lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL;
+
+	if (phba->cfg_xri_rebalancing) {
+		if (lpfc_ncmd->expedite) {
+			/* Return to expedite pool */
+			epd_pool = &phba->epd_pool;
+			spin_lock_irqsave(&epd_pool->lock, iflag);
+			list_add_tail(&lpfc_ncmd->list, &epd_pool->list);
+			epd_pool->count++;
+			spin_unlock_irqrestore(&epd_pool->lock, iflag);
+			return;
+		}
+
+		/* Avoid invalid access if an IO sneaks in and is being rejected
+		 * just _after_ xri pools are destroyed in lpfc_offline.
+		 * Nothing much can be done at this point.
+		 */
+		if (!qp->p_multixri_pool)
+			return;
+
+		pbl_pool = &qp->p_multixri_pool->pbl_pool;
+		pvt_pool = &qp->p_multixri_pool->pvt_pool;
+
+		txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+		abts_io_bufs = qp->abts_scsi_io_bufs;
+		if (qp->nvme_wq) {
+			txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+			abts_io_bufs += qp->abts_nvme_io_bufs;
+		}
+
+		xri_owned = pvt_pool->count + txcmplq_cnt + abts_io_bufs;
+		xri_limit = qp->p_multixri_pool->xri_limit;
+
+#ifdef LPFC_MXP_STAT
+		if (xri_owned <= xri_limit)
+			qp->p_multixri_pool->below_limit_count++;
+		else
+			qp->p_multixri_pool->above_limit_count++;
+#endif
+
+		/* XRI goes to either public or private free xri pool
+		 *     based on watermark and xri_limit
+		 */
+		if ((pvt_pool->count < pvt_pool->low_watermark) ||
+		    (xri_owned < xri_limit &&
+		     pvt_pool->count < pvt_pool->high_watermark)) {
+			lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+						  qp, free_pvt_pool);
+			list_add_tail(&lpfc_ncmd->list,
+				      &pvt_pool->list);
+			pvt_pool->count++;
+			spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+		} else {
+			lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+						  qp, free_pub_pool);
+			list_add_tail(&lpfc_ncmd->list,
+				      &pbl_pool->list);
+			pbl_pool->count++;
+			spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+		}
+	} else {
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+					  qp, free_xri);
+		list_add_tail(&lpfc_ncmd->list,
+			      &qp->lpfc_io_buf_list_put);
+		qp->put_io_bufs++;
+		spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
+				       iflag);
+	}
+}
+
+/**
+ * lpfc_get_io_buf_from_private_pool - Get one free IO buf from private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @pvt_pool: pointer to private pool data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ *
+ * This routine tries to get one free IO buf from private pool.
+ *
+ * Return:
+ *   pointer to one free IO buf - if private pool is not empty
+ *   NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+				  struct lpfc_sli4_hdw_queue *qp,
+				  struct lpfc_pvt_pool *pvt_pool,
+				  struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+
+	lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &pvt_pool->list, list) {
+		if (lpfc_test_rrq_active(
+			phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
+			continue;
+		list_del(&lpfc_ncmd->list);
+		pvt_pool->count--;
+		spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+		return lpfc_ncmd;
+	}
+	spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+
+	return NULL;
+}
+
+/**
+ * lpfc_get_io_buf_from_expedite_pool - Get one free IO buf from expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine tries to get one free IO buf from expedite pool.
+ *
+ * Return:
+ *   pointer to one free IO buf - if expedite pool is not empty
+ *   NULL - if expedite pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_expedite_pool(struct lpfc_hba *phba)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct lpfc_epd_pool *epd_pool;
+
+	epd_pool = &phba->epd_pool;
+	lpfc_ncmd = NULL;
+
+	spin_lock_irqsave(&epd_pool->lock, iflag);
+	if (epd_pool->count > 0) {
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &epd_pool->list, list) {
+			list_del(&lpfc_ncmd->list);
+			epd_pool->count--;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&epd_pool->lock, iflag);
+
+	return lpfc_ncmd;
+}
+
+/**
+ * lpfc_get_io_buf_from_multixri_pools - Get one free IO bufs
+ * @phba: pointer to lpfc hba data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine will do the following actions and then return a pointer to
+ * one free IO buf.
+ *
+ * 1. If private free xri count is empty, move some XRIs from public to
+ *    private pool.
+ * 2. Get one XRI from private free xri pool.
+ * 3. If we fail to get one from pvt_pool and this is an expedite request,
+ *    get one free xri from expedite pool.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ *       The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return:
+ *   pointer to one free IO buf - if private pool is not empty
+ *   NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
+				    struct lpfc_nodelist *ndlp,
+				    int hwqid, int expedite)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_io_buf *lpfc_ncmd;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	lpfc_ncmd = NULL;
+	multixri_pool = qp->p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+	multixri_pool->io_req_count++;
+
+	/* If pvt_pool is empty, move some XRIs from public to private pool */
+	if (pvt_pool->count == 0)
+		lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+
+	/* Get one XRI from private free xri pool */
+	lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
+
+	if (lpfc_ncmd) {
+		lpfc_ncmd->hdwq = qp;
+		lpfc_ncmd->hdwq_no = hwqid;
+	} else if (expedite) {
+		/* If we fail to get one from pvt_pool and this is an expedite
+		 * request, get one free xri from expedite pool.
+		 */
+		lpfc_ncmd = lpfc_get_io_buf_from_expedite_pool(phba);
+	}
+
+	return lpfc_ncmd;
+}
+
+static inline struct lpfc_io_buf *
+lpfc_io_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int idx)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_cmd, *lpfc_cmd_next;
+
+	qp = &phba->sli4_hba.hdwq[idx];
+	list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
+				 &qp->lpfc_io_buf_list_get, list) {
+		if (lpfc_test_rrq_active(phba, ndlp,
+					 lpfc_cmd->cur_iocbq.sli4_lxritag))
+			continue;
+
+		if (lpfc_cmd->flags & LPFC_SBUF_NOT_POSTED)
+			continue;
+
+		list_del_init(&lpfc_cmd->list);
+		qp->get_io_bufs--;
+		lpfc_cmd->hdwq = qp;
+		lpfc_cmd->hdwq_no = idx;
+		return lpfc_cmd;
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_get_io_buf - Get one IO buffer from free pool
+ * @phba: The HBA for which this call is being executed.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine gets one IO buffer from free pool. If cfg_xri_rebalancing==1,
+ * removes a IO buffer from multiXRI pools. If cfg_xri_rebalancing==0, removes
+ * a IO buffer from head of @hdwq io_buf_list and returns to caller.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ *       The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to lpfc_io_buf - Success
+ **/
+struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
+				    struct lpfc_nodelist *ndlp,
+				    u32 hwqid, int expedite)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	unsigned long iflag;
+	struct lpfc_io_buf *lpfc_cmd;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	lpfc_cmd = NULL;
+
+	if (phba->cfg_xri_rebalancing)
+		lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
+			phba, ndlp, hwqid, expedite);
+	else {
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+					  qp, alloc_xri_get);
+		if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
+			lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+		if (!lpfc_cmd) {
+			lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+					  qp, alloc_xri_put);
+			list_splice(&qp->lpfc_io_buf_list_put,
+				    &qp->lpfc_io_buf_list_get);
+			qp->get_io_bufs += qp->put_io_bufs;
+			INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+			qp->put_io_bufs = 0;
+			spin_unlock(&qp->io_buf_list_put_lock);
+			if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT ||
+			    expedite)
+				lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+		}
+		spin_unlock_irqrestore(&qp->io_buf_list_get_lock, iflag);
+	}
+
+	return lpfc_cmd;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 7abb395bb64a..7a1a761efdd6 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -20,6 +20,10 @@
  * included with this package.                                     *
  *******************************************************************/
 
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
+#define CONFIG_SCSI_LPFC_DEBUG_FS
+#endif
+
 /* forward declaration for LPFC_IOCB_t's use */
 struct lpfc_hba;
 struct lpfc_vport;
@@ -33,6 +37,7 @@ typedef enum _lpfc_ctx_cmd {
 
 struct lpfc_cq_event {
 	struct list_head list;
+	uint16_t hdwq;
 	union {
 		struct lpfc_mcqe		mcqe_cmpl;
 		struct lpfc_acqe_link		acqe_link;
@@ -351,3 +356,85 @@ struct lpfc_sli {
 #define LPFC_MBOX_SLI4_CONFIG_EXTENDED_TMO	300
 /* Timeout for other flash-based outstanding mbox command (Seconds) */
 #define LPFC_MBOX_TMO_FLASH_CMD			300
+
+struct lpfc_io_buf {
+	/* Common fields */
+	struct list_head list;
+	void *data;
+	dma_addr_t dma_handle;
+	dma_addr_t dma_phys_sgl;
+	struct sli4_sge *dma_sgl;
+	struct lpfc_iocbq cur_iocbq;
+	struct lpfc_sli4_hdw_queue *hdwq;
+	uint16_t hdwq_no;
+	uint16_t cpu;
+
+	struct lpfc_nodelist *ndlp;
+	uint32_t timeout;
+	uint16_t flags;  /* TBD convert exch_busy to flags */
+#define LPFC_SBUF_XBUSY		0x1	/* SLI4 hba reported XB on WCQE cmpl */
+#define LPFC_SBUF_BUMP_QDEPTH	0x2	/* bumped queue depth counter */
+					/* External DIF device IO conversions */
+#define LPFC_SBUF_NORMAL_DIF	0x4	/* normal mode to insert/strip */
+#define LPFC_SBUF_PASS_DIF	0x8	/* insert/strip mode to passthru */
+#define LPFC_SBUF_NOT_POSTED    0x10    /* SGL failed post to FW. */
+	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
+	uint16_t status;	/* From IOCB Word 7- ulpStatus */
+	uint32_t result;	/* From IOCB Word 4. */
+
+	uint32_t   seg_cnt;	/* Number of scatter-gather segments returned by
+				 * dma_map_sg.  The driver needs this for calls
+				 * to dma_unmap_sg.
+				 */
+	unsigned long start_time;
+	spinlock_t buf_lock;	/* lock used in case of simultaneous abort */
+	bool expedite;		/* this is an expedite io_buf */
+
+	union {
+		/* SCSI specific fields */
+		struct {
+			struct scsi_cmnd *pCmd;
+			struct lpfc_rport_data *rdata;
+			uint32_t prot_seg_cnt;  /* seg_cnt's counterpart for
+						 * protection data
+						 */
+
+			/*
+			 * data and dma_handle are the kernel virtual and bus
+			 * address of the dma-able buffer containing the
+			 * fcp_cmd, fcp_rsp and a scatter gather bde list that
+			 * supports the sg_tablesize value.
+			 */
+			struct fcp_cmnd *fcp_cmnd;
+			struct fcp_rsp *fcp_rsp;
+
+			wait_queue_head_t *waitq;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+			/* Used to restore any changes to protection data for
+			 * error injection
+			 */
+			void *prot_data_segment;
+			uint32_t prot_data;
+			uint32_t prot_data_type;
+#define	LPFC_INJERR_REFTAG	1
+#define	LPFC_INJERR_APPTAG	2
+#define	LPFC_INJERR_GUARD	3
+#endif
+		};
+
+		/* NVME specific fields */
+		struct {
+			struct nvmefc_fcp_req *nvmeCmd;
+			uint16_t qidx;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+			uint64_t ts_cmd_start;
+			uint64_t ts_last_cmd;
+			uint64_t ts_cmd_wqput;
+			uint64_t ts_isr_cmpl;
+			uint64_t ts_data_nvme;
+#endif
+		};
+	};
+};
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 6b2d2350e2c6..40c85091c805 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -20,6 +20,10 @@
  * included with this package.                                     *
  *******************************************************************/
 
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
+#define CONFIG_SCSI_LPFC_DEBUG_FS
+#endif
+
 #define LPFC_ACTIVE_MBOX_WAIT_CNT               100
 #define LPFC_XRI_EXCH_BUSY_WAIT_TMO		10000
 #define LPFC_XRI_EXCH_BUSY_WAIT_T1   		10
@@ -36,14 +40,12 @@
 #define LPFC_NEMBED_MBOX_SGL_CNT		254
 
 /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
-#define LPFC_HBA_IO_CHAN_MIN	0
-#define LPFC_HBA_IO_CHAN_MAX	32
-#define LPFC_FCP_IO_CHAN_DEF	4
-#define LPFC_NVME_IO_CHAN_DEF	0
-
-/* Number of channels used for Flash Optimized Fabric (FOF) operations */
+#define LPFC_HBA_HDWQ_MIN	0
+#define LPFC_HBA_HDWQ_MAX	128
+#define LPFC_HBA_HDWQ_DEF	0
 
-#define LPFC_FOF_IO_CHAN_NUM       1
+/* Common buffer size to accomidate SCSI and NVME IO buffers */
+#define LPFC_COMMON_IO_BUF_SZ	768
 
 /*
  * Provide the default FCF Record attributes used by the driver
@@ -152,28 +154,58 @@ struct lpfc_queue {
 	struct list_head child_list;
 	struct list_head page_list;
 	struct list_head sgl_list;
+	struct list_head cpu_list;
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
-	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_EQ_REPOST		8
-#define LPFC_MQ_REPOST		8
-#define LPFC_CQ_REPOST		64
-#define LPFC_RQ_REPOST		64
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
+	uint32_t notify_interval; /* Queue Notification Interval
+				   * For chip->host queues (EQ, CQ, RQ):
+				   *  specifies the interval (number of
+				   *  entries) where the doorbell is rung to
+				   *  notify the chip of entry consumption.
+				   * For host->chip queues (WQ):
+				   *  specifies the interval (number of
+				   *  entries) where consumption CQE is
+				   *  requested to indicate WQ entries
+				   *  consumed by the chip.
+				   * Not used on an MQ.
+				   */
+#define LPFC_EQ_NOTIFY_INTRVL	16
+#define LPFC_CQ_NOTIFY_INTRVL	16
+#define LPFC_WQ_NOTIFY_INTRVL	16
+#define LPFC_RQ_NOTIFY_INTRVL	16
+	uint32_t max_proc_limit; /* Queue Processing Limit
+				  * For chip->host queues (EQ, CQ):
+				  *  specifies the maximum number of
+				  *  entries to be consumed in one
+				  *  processing iteration sequence. Queue
+				  *  will be rearmed after each iteration.
+				  * Not used on an MQ, RQ or WQ.
+				  */
+#define LPFC_EQ_MAX_PROC_LIMIT		256
+#define LPFC_CQ_MIN_PROC_LIMIT		64
+#define LPFC_CQ_MAX_PROC_LIMIT		LPFC_CQE_EXP_COUNT	// 4096
+#define LPFC_CQ_DEF_MAX_PROC_LIMIT	LPFC_CQE_DEF_COUNT	// 1024
+#define LPFC_CQ_MIN_THRESHOLD_TO_POLL	64
+#define LPFC_CQ_MAX_THRESHOLD_TO_POLL	LPFC_CQ_DEF_MAX_PROC_LIMIT
+#define LPFC_CQ_DEF_THRESHOLD_TO_POLL	LPFC_CQ_DEF_MAX_PROC_LIMIT
+	uint32_t queue_claimed; /* indicates queue is being processed */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t host_index;	/* The host's index for putting or getting */
 	uint32_t hba_index;	/* The last known hba index for get or put */
+	uint32_t q_mode;
 
 	struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
 	struct lpfc_rqb *rqbp;	/* ptr to RQ buffers */
 
-	uint32_t q_mode;
 	uint16_t page_count;	/* Number of pages allocated for this queue */
 	uint16_t page_size;	/* size of page allocated for this queue */
 #define LPFC_EXPANDED_PAGE_SIZE	16384
 #define LPFC_DEFAULT_PAGE_SIZE	4096
-	uint16_t chann;		/* IO channel this queue is associated with */
+	uint16_t chann;		/* Hardware Queue association WQ/CQ */
+				/* CPU affinity for EQ */
+#define LPFC_FIND_BY_EQ		0
+#define LPFC_FIND_BY_HDWQ	1
 	uint8_t db_format;
 #define LPFC_DB_RING_FORMAT	0x01
 #define LPFC_DB_LIST_FORMAT	0x02
@@ -212,10 +244,14 @@ struct lpfc_queue {
 #define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
-	struct work_struct irqwork;
-	struct work_struct spwork;
+	struct work_struct	irqwork;
+	struct work_struct	spwork;
+	struct delayed_work	sched_irqwork;
+	struct delayed_work	sched_spwork;
 
 	uint64_t isr_timestamp;
+	uint16_t hdwq;
+	uint16_t last_cpu;	/* most recent cpu */
 	uint8_t	qe_valid;
 	struct lpfc_queue *assoc_qp;
 	union sli4_qe qe[1];	/* array to index entries (must be last) */
@@ -428,11 +464,6 @@ struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_hba *phba;
-	atomic_t hba_eq_in_use;
-	struct cpumask *cpumask;
-	/* CPU affinitsed to or 0xffffffff if multiple */
-	uint32_t cpu;
-#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
 };
 
 /*BB Credit recovery value*/
@@ -526,11 +557,165 @@ struct lpfc_vector_map_info {
 	uint16_t	phys_id;
 	uint16_t	core_id;
 	uint16_t	irq;
-	uint16_t	channel_id;
+	uint16_t	eq;
+	uint16_t	hdwq;
+	uint16_t	hyper;
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff
 
+/* Multi-XRI pool */
+#define XRI_BATCH               8
+
+struct lpfc_pbl_pool {
+	struct list_head list;
+	u32 count;
+	spinlock_t lock;	/* lock for pbl_pool*/
+};
+
+struct lpfc_pvt_pool {
+	u32 low_watermark;
+	u32 high_watermark;
+
+	struct list_head list;
+	u32 count;
+	spinlock_t lock;	/* lock for pvt_pool */
+};
+
+struct lpfc_multixri_pool {
+	u32 xri_limit;
+
+	/* Starting point when searching a pbl_pool with round-robin method */
+	u32 rrb_next_hwqid;
+
+	/* Used by lpfc_adjust_pvt_pool_count.
+	 * io_req_count is incremented by 1 during IO submission. The heartbeat
+	 * handler uses these two variables to determine if pvt_pool is idle or
+	 * busy.
+	 */
+	u32 prev_io_req_count;
+	u32 io_req_count;
+
+	/* statistics */
+	u32 pbl_empty_count;
+#ifdef LPFC_MXP_STAT
+	u32 above_limit_count;
+	u32 below_limit_count;
+	u32 local_pbl_hit_count;
+	u32 other_pbl_hit_count;
+	u32 stat_max_hwm;
+
+#define LPFC_MXP_SNAPSHOT_TAKEN 3 /* snapshot is taken at 3rd heartbeats */
+	u32 stat_pbl_count;
+	u32 stat_pvt_count;
+	u32 stat_busy_count;
+	u32 stat_snapshot_taken;
+#endif
+
+	/* TODO: Separate pvt_pool into get and put list */
+	struct lpfc_pbl_pool pbl_pool;   /* Public free XRI pool */
+	struct lpfc_pvt_pool pvt_pool;   /* Private free XRI pool */
+};
+
+struct lpfc_fc4_ctrl_stat {
+	u32 input_requests;
+	u32 output_requests;
+	u32 control_requests;
+	u32 io_cmpls;
+};
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+struct lpfc_lock_stat {
+	uint32_t alloc_xri_get;
+	uint32_t alloc_xri_put;
+	uint32_t free_xri;
+	uint32_t wq_access;
+	uint32_t alloc_pvt_pool;
+	uint32_t mv_from_pvt_pool;
+	uint32_t mv_to_pub_pool;
+	uint32_t mv_to_pvt_pool;
+	uint32_t free_pub_pool;
+	uint32_t free_pvt_pool;
+};
+#endif
+
+struct lpfc_eq_intr_info {
+	struct list_head list;
+	uint32_t icnt;
+};
+
 /* SLI4 HBA data structure entries */
+struct lpfc_sli4_hdw_queue {
+	/* Pointers to the constructed SLI4 queues */
+	struct lpfc_queue *hba_eq;  /* Event queues for HBA */
+	struct lpfc_queue *fcp_cq;  /* Fast-path FCP compl queue */
+	struct lpfc_queue *nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_queue *fcp_wq;  /* Fast-path FCP work queue */
+	struct lpfc_queue *nvme_wq; /* Fast-path NVME work queue */
+	uint16_t fcp_cq_map;
+	uint16_t nvme_cq_map;
+
+	/* Keep track of IO buffers for this hardware queue */
+	spinlock_t io_buf_list_get_lock;  /* Common buf alloc list lock */
+	struct list_head lpfc_io_buf_list_get;
+	spinlock_t io_buf_list_put_lock;  /* Common buf free list lock */
+	struct list_head lpfc_io_buf_list_put;
+	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
+	struct list_head lpfc_abts_scsi_buf_list;
+	spinlock_t abts_nvme_buf_list_lock; /* list of aborted NVME IOs */
+	struct list_head lpfc_abts_nvme_buf_list;
+	uint32_t total_io_bufs;
+	uint32_t get_io_bufs;
+	uint32_t put_io_bufs;
+	uint32_t empty_io_bufs;
+	uint32_t abts_scsi_io_bufs;
+	uint32_t abts_nvme_io_bufs;
+
+	/* Multi-XRI pool per HWQ */
+	struct lpfc_multixri_pool *p_multixri_pool;
+
+	/* FC-4 Stats counters */
+	struct lpfc_fc4_ctrl_stat nvme_cstat;
+	struct lpfc_fc4_ctrl_stat scsi_cstat;
+#ifdef LPFC_HDWQ_LOCK_STAT
+	struct lpfc_lock_stat lock_conflict;
+#endif
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+#define LPFC_CHECK_CPU_CNT    128
+	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
+#endif
+};
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+/* compile time trylock stats */
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+	{ \
+	int only_once = 1; \
+	while (spin_trylock_irqsave(lock, flag) == 0) { \
+		if (only_once) { \
+			only_once = 0; \
+			qp->lock_conflict.lstat++; \
+		} \
+	} \
+	}
+#define lpfc_qp_spin_lock(lock, qp, lstat) \
+	{ \
+	int only_once = 1; \
+	while (spin_trylock(lock) == 0) { \
+		if (only_once) { \
+			only_once = 0; \
+			qp->lock_conflict.lstat++; \
+		} \
+	} \
+	}
+#else
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+	spin_lock_irqsave(lock, flag)
+#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock)
+#endif
+
 struct lpfc_sli4_hba {
 	void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
 					   * config space registers
@@ -599,21 +784,19 @@ struct lpfc_sli4_hba {
 	struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */
 
 	void (*sli4_eq_clr_intr)(struct lpfc_queue *q);
-	uint32_t (*sli4_eq_release)(struct lpfc_queue *q, bool arm);
-	uint32_t (*sli4_cq_release)(struct lpfc_queue *q, bool arm);
+	void (*sli4_write_eq_db)(struct lpfc_hba *phba, struct lpfc_queue *eq,
+				uint32_t count, bool arm);
+	void (*sli4_write_cq_db)(struct lpfc_hba *phba, struct lpfc_queue *cq,
+				uint32_t count, bool arm);
 
 	/* Pointers to the constructed SLI4 queues */
-	struct lpfc_queue **hba_eq;  /* Event queues for HBA */
-	struct lpfc_queue **fcp_cq;  /* Fast-path FCP compl queue */
-	struct lpfc_queue **nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_sli4_hdw_queue *hdwq;
+	struct list_head lpfc_wq_list;
+
+	/* Pointers to the constructed SLI4 queues for NVMET */
 	struct lpfc_queue **nvmet_cqset; /* Fast-path NVMET CQ Set queues */
 	struct lpfc_queue **nvmet_mrq_hdr; /* Fast-path NVMET hdr MRQs */
 	struct lpfc_queue **nvmet_mrq_data; /* Fast-path NVMET data MRQs */
-	struct lpfc_queue **fcp_wq;  /* Fast-path FCP work queue */
-	struct lpfc_queue **nvme_wq; /* Fast-path NVME work queue */
-	uint16_t *fcp_cq_map;
-	uint16_t *nvme_cq_map;
-	struct list_head lpfc_wq_list;
 
 	struct lpfc_queue *mbx_cq; /* Slow-path mailbox complete queue */
 	struct lpfc_queue *els_cq; /* Slow-path ELS response complete queue */
@@ -631,13 +814,7 @@ struct lpfc_sli4_hba {
 	uint32_t ulp0_mode;	/* ULP0 protocol mode */
 	uint32_t ulp1_mode;	/* ULP1 protocol mode */
 
-	struct lpfc_queue *fof_eq; /* Flash Optimized Fabric Event queue */
-
 	/* Optimized Access Storage specific queues/structures */
-
-	struct lpfc_queue *oas_cq; /* OAS completion queue */
-	struct lpfc_queue *oas_wq; /* OAS Work queue */
-	struct lpfc_sli_ring *oas_ring;
 	uint64_t oas_next_lun;
 	uint8_t oas_next_tgt_wwpn[8];
 	uint8_t oas_next_vpt_wwpn[8];
@@ -663,22 +840,22 @@ struct lpfc_sli4_hba {
 	uint16_t rpi_hdrs_in_use; /* must post rpi hdrs if set. */
 	uint16_t next_xri; /* last_xri - max_cfg_param.xri_base = used */
 	uint16_t next_rpi;
-	uint16_t nvme_xri_max;
-	uint16_t nvme_xri_cnt;
-	uint16_t nvme_xri_start;
-	uint16_t scsi_xri_max;
-	uint16_t scsi_xri_cnt;
-	uint16_t scsi_xri_start;
+	uint16_t io_xri_max;
+	uint16_t io_xri_cnt;
+	uint16_t io_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
 	uint16_t nvmet_io_wait_cnt;
 	uint16_t nvmet_io_wait_total;
+	uint16_t cq_max;
+	struct lpfc_queue **cq_lookup;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
+	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
+	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_nvmet_sgl_list;
+	spinlock_t abts_nvmet_buf_list_lock; /* list of aborted NVMET IOs */
 	struct list_head lpfc_abts_nvmet_ctx_list;
-	struct list_head lpfc_abts_scsi_buf_list;
-	struct list_head lpfc_abts_nvme_buf_list;
 	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_nvmet_ctx_info *nvmet_ctx_info;
 	struct lpfc_sglq **lpfc_sglq_active_list;
@@ -707,17 +884,16 @@ struct lpfc_sli4_hba {
 #define LPFC_SLI4_PPNAME_NON	0
 #define LPFC_SLI4_PPNAME_GET	1
 	struct lpfc_iov iov;
-	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
-	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
 	struct lpfc_vector_map_info *cpu_map;
-	uint16_t num_online_cpu;
+	uint16_t num_possible_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
+	struct lpfc_eq_intr_info __percpu *eq_info;
 	uint32_t conf_trunk;
 #define lpfc_conf_trunk_port0_WORD	conf_trunk
 #define lpfc_conf_trunk_port0_SHIFT	0
@@ -818,12 +994,12 @@ struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
 					 uint32_t, uint32_t);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
-int lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
-			     uint32_t numq, uint32_t imax);
+void lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
+			     uint32_t numq, uint32_t usdelay);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
 int lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
-			struct lpfc_queue **eqp, uint32_t type,
+			struct lpfc_sli4_hdw_queue *hdwq, uint32_t type,
 			uint32_t subtype);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
 		       struct lpfc_queue *, uint32_t);
@@ -843,12 +1019,10 @@ int lpfc_rq_destroy(struct lpfc_hba *, struct lpfc_queue *,
 int lpfc_sli4_queue_setup(struct lpfc_hba *);
 void lpfc_sli4_queue_unset(struct lpfc_hba *);
 int lpfc_sli4_post_sgl(struct lpfc_hba *, dma_addr_t, dma_addr_t, uint16_t);
-int lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *);
-int lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba);
+int lpfc_repost_io_sgl_list(struct lpfc_hba *phba);
 uint16_t lpfc_sli4_next_xritag(struct lpfc_hba *);
 void lpfc_sli4_free_xri(struct lpfc_hba *, int);
 int lpfc_sli4_post_async_mbox(struct lpfc_hba *);
-int lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *, struct list_head *, int);
 struct lpfc_cq_event *__lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
 struct lpfc_cq_event *lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
 void __lpfc_sli4_cq_event_release(struct lpfc_hba *, struct lpfc_cq_event *);
@@ -868,9 +1042,9 @@ int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
 void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
-			       struct sli4_wcqe_xri_aborted *);
+			       struct sli4_wcqe_xri_aborted *, int);
 void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
-				struct sli4_wcqe_xri_aborted *axri);
+				struct sli4_wcqe_xri_aborted *axri, int idx);
 void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				 struct sli4_wcqe_xri_aborted *axri);
 void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
@@ -884,11 +1058,15 @@ int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
 int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba);
 int lpfc_sli4_init_vpi(struct lpfc_vport *);
 inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
-uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
-uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
+void lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			   uint32_t count, bool arm);
+void lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			   uint32_t count, bool arm);
 inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);
-uint32_t lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm);
-uint32_t lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm);
+void lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			       uint32_t count, bool arm);
+void lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			       uint32_t count, bool arm);
 void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t);
 int lpfc_sli4_fcf_scan_read_fcf_rec(struct lpfc_hba *, uint16_t);
 int lpfc_sli4_fcf_rr_read_fcf_rec(struct lpfc_hba *, uint16_t);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 3f4398ffb567..43fd693cf042 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "12.0.0.10"
+#define LPFC_DRIVER_VERSION "12.2.0.0"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 102a011ff6d4..343bc71d4615 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -313,11 +313,11 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		goto error_out;
 	}
 
-	/* NPIV is not supported if HBA has NVME enabled */
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+	/* NPIV is not supported if HBA has NVME Target enabled */
+	if (phba->nvmet_support) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
 				"3189 Create VPORT failed: "
-				"NPIV is not supported on NVME\n");
+				"NPIV is not supported on NVME Target\n");
 		rc = VPORT_INVAL;
 		goto error_out;
 	}
@@ -403,6 +403,9 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 	/* Set the DFT_LUN_Q_DEPTH accordingly */
 	vport->cfg_lun_queue_depth  = phba->pport->cfg_lun_queue_depth;
 
+	/* Only the physical port can support NVME for now */
+	vport->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+
 	*(struct lpfc_vport **)fc_vport->dd_data = vport;
 	vport->fc_vport = fc_vport;
 
@@ -415,22 +418,6 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		vport->fdmi_port_mask = phba->pport->fdmi_port_mask;
 	}
 
-	if ((phba->nvmet_support == 0) &&
-	    ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-	     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))) {
-		/* Create NVME binding with nvme_fc_transport. This
-		 * ensures the vport is initialized.
-		 */
-		rc = lpfc_nvme_create_localport(vport);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"6003 %s status x%x\n",
-					"NVME registration failed, ",
-					rc);
-			goto error_out;
-		}
-	}
-
 	/*
 	 * In SLI4, the vpi must be activated before it can be used
 	 * by the port.