7 files changed, 729 insertions, 476 deletions
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 9fd2811ffa8b..0bc498172add 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -686,6 +686,7 @@ struct lpfc_hba {
 	struct lpfc_sli4_hba sli4_hba;
 
 	struct workqueue_struct *wq;
+	struct delayed_work     eq_delay_work;
 
 	struct lpfc_sli sli;
 	uint8_t pci_dev_grp;	/* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
@@ -789,7 +790,6 @@ struct lpfc_hba {
 	uint8_t  nvmet_support;	/* driver supports NVMET */
 #define LPFC_NVMET_MAX_PORTS	32
 	uint8_t  mds_diags_support;
-	uint32_t initial_imax;
 	uint8_t  bbcredit_support;
 	uint8_t  enab_exp_wqcq_pages;
 
@@ -817,6 +817,8 @@ struct lpfc_hba {
 	uint32_t cfg_use_msi;
 	uint32_t cfg_auto_imax;
 	uint32_t cfg_fcp_imax;
+	uint32_t cfg_cq_poll_threshold;
+	uint32_t cfg_cq_max_proc_limit;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_hdw_queue;
 	uint32_t cfg_irq_chann;
@@ -1084,7 +1086,6 @@ struct lpfc_hba {
 
 	uint8_t temp_sensor_support;
 	/* Fields used for heart beat. */
-	unsigned long last_eqdelay_time;
 	unsigned long last_completion_time;
 	unsigned long skipped_hb;
 	struct timer_list hb_tmofunc;
@@ -1287,3 +1288,23 @@ lpfc_phba_elsring(struct lpfc_hba *phba)
 	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }
+
+/**
+ * lpfc_sli4_mod_hba_eq_delay - update EQ delay
+ * @phba: Pointer to HBA context object.
+ * @q: The Event Queue to update.
+ * @delay: The delay value (in us) to be written.
+ *
+ **/
+static inline void
+lpfc_sli4_mod_hba_eq_delay(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			   u32 delay)
+{
+	struct lpfc_register reg_data;
+
+	reg_data.word0 = 0;
+	bf_set(lpfc_sliport_eqdelay_id, &reg_data, eq->queue_id);
+	bf_set(lpfc_sliport_eqdelay_delay, &reg_data, delay);
+	writel(reg_data.word0, phba->sli4_hba.u.if_type2.EQDregaddr);
+	eq->q_mode = delay;
+}
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ed8caeefe3a2..2864cb53b1e8 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4935,6 +4935,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
 	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_eq_intr_info *eqi;
 	uint32_t usdelay;
 	int val = 0, i;
 
@@ -4956,8 +4957,18 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
 		return -EINVAL;
 
+	phba->cfg_auto_imax = (val) ? 0 : 1;
+	if (phba->cfg_fcp_imax && !val) {
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
+		for_each_present_cpu(i) {
+			eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+			eqi->icnt = 0;
+		}
+	}
+
 	phba->cfg_fcp_imax = (uint32_t)val;
-	phba->initial_imax = phba->cfg_fcp_imax;
 
 	if (phba->cfg_fcp_imax)
 		usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax;
@@ -5020,15 +5031,119 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
 
 static DEVICE_ATTR_RW(lpfc_fcp_imax);
 
+/**
+ * lpfc_cq_max_proc_limit_store
+ *
+ * @dev: class device that is converted into a Scsi_host.
+ * @attr: device attribute, not used.
+ * @buf: string with the cq max processing limit of cqes
+ * @count: unused variable.
+ *
+ * Description:
+ * If val is in a valid range, then set value on each cq
+ *
+ * Returns:
+ * The length of the buf: if successful
+ * -ERANGE: if val is not in the valid range
+ * -EINVAL: if bad value format or intended mode is not supported.
+ **/
+static ssize_t
+lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_queue *eq, *cq;
+	unsigned long val;
+	int i;
+
+	/* cq_max_proc_limit is only valid for SLI4 */
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return -EINVAL;
+
+	/* Sanity check on user data */
+	if (!isdigit(buf[0]))
+		return -EINVAL;
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	if (val < LPFC_CQ_MIN_PROC_LIMIT || val > LPFC_CQ_MAX_PROC_LIMIT)
+		return -ERANGE;
+
+	phba->cfg_cq_max_proc_limit = (uint32_t)val;
+
+	/* set the values on the cq's */
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		if (!eq)
+			continue;
+
+		list_for_each_entry(cq, &eq->child_list, list)
+			cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+						 cq->entry_count);
+	}
+
+	return strlen(buf);
+}
+
 /*
- * lpfc_auto_imax: Controls Auto-interrupt coalescing values support.
- *       0       No auto_imax support
- *       1       auto imax on
- * Auto imax will change the value of fcp_imax on a per EQ basis, using
- * the EQ Delay Multiplier, depending on the activity for that EQ.
- * Value range [0,1]. Default value is 1.
+ * lpfc_cq_max_proc_limit: The maximum number CQE entries processed in an
+ *   itteration of CQ processing.
  */
-LPFC_ATTR_RW(auto_imax, 1, 0, 1, "Enable Auto imax");
+static int lpfc_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+module_param(lpfc_cq_max_proc_limit, int, 0644);
+MODULE_PARM_DESC(lpfc_cq_max_proc_limit,
+	    "Set the maximum number CQEs processed in an iteration of "
+	    "CQ processing");
+lpfc_param_show(cq_max_proc_limit)
+
+/*
+ * lpfc_cq_poll_threshold: Set the threshold of CQE completions in a
+ *   single handler call which should request a polled completion rather
+ *   than re-enabling interrupts.
+ */
+LPFC_ATTR_RW(cq_poll_threshold, LPFC_CQ_DEF_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MIN_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MAX_THRESHOLD_TO_POLL,
+	     "CQE Processing Threshold to enable Polling");
+
+/**
+ * lpfc_cq_max_proc_limit_init - Set the initial cq max_proc_limit
+ * @phba: lpfc_hba pointer.
+ * @val: entry limit
+ *
+ * Description:
+ * If val is in a valid range, then initialize the adapter's maximum
+ * value.
+ *
+ * Returns:
+ *  Always returns 0 for success, even if value not always set to
+ *  requested value. If value out of range or not supported, will fall
+ *  back to default.
+ **/
+static int
+lpfc_cq_max_proc_limit_init(struct lpfc_hba *phba, int val)
+{
+	phba->cfg_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (val >= LPFC_CQ_MIN_PROC_LIMIT && val <= LPFC_CQ_MAX_PROC_LIMIT) {
+		phba->cfg_cq_max_proc_limit = val;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0371 "LPFC_DRIVER_NAME"_cq_max_proc_limit: "
+			"%d out of range, using default\n",
+			phba->cfg_cq_max_proc_limit);
+
+	return 0;
+}
+
+static DEVICE_ATTR_RW(lpfc_cq_max_proc_limit);
 
 /**
  * lpfc_state_show - Display current driver CPU affinity
@@ -5788,8 +5903,9 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_use_msi,
 	&dev_attr_lpfc_nvme_oas,
 	&dev_attr_lpfc_nvme_embed_cmd,
-	&dev_attr_lpfc_auto_imax,
 	&dev_attr_lpfc_fcp_imax,
+	&dev_attr_lpfc_cq_poll_threshold,
+	&dev_attr_lpfc_cq_max_proc_limit,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_hdw_queue,
 	&dev_attr_lpfc_irq_chann,
@@ -6834,8 +6950,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_use_msi_init(phba, lpfc_use_msi);
 	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
 	lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd);
-	lpfc_auto_imax_init(phba, lpfc_auto_imax);
 	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
+	lpfc_cq_poll_threshold_init(phba, lpfc_cq_poll_threshold);
+	lpfc_cq_max_proc_limit_init(phba, lpfc_cq_max_proc_limit);
 	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
@@ -6888,9 +7005,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
 	}
 
-	if (phba->cfg_auto_imax && !phba->cfg_fcp_imax)
-		phba->cfg_auto_imax = 0;
-	phba->initial_imax = phba->cfg_fcp_imax;
+	phba->cfg_auto_imax = (phba->cfg_fcp_imax) ? 0 : 1;
 
 	phba->cfg_enable_pbde = 0;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 72076b2cd4ff..16b461778e59 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -3766,10 +3766,10 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index, qp->entry_repost);
+			qp->hba_index, qp->notify_interval);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3819,10 +3819,10 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], NTFI[%03d], PLMT[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index, qp->entry_repost);
+			qp->notify_interval, qp->max_proc_limit);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3845,15 +3845,15 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index, qp->entry_repost);
+			qp->host_index, qp->hba_index, qp->notify_interval);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
+			"HST-IDX[%04d], PRT-IDX[%04d], NTFI[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index, datqp->entry_repost);
+			datqp->hba_index, datqp->notify_interval);
 	return len;
 }
 
@@ -3934,10 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			(unsigned long long)qp->q_cnt_4, qp->q_mode);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]",
+			"HST-IDX[%04d], NTFI[%03d], PLMT[%03d], AFFIN[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index, qp->entry_repost,
-			qp->chann);
+			qp->host_index, qp->notify_interval,
+			qp->max_proc_limit, qp->chann);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 665852291a4f..c9a056ef321a 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -208,7 +208,14 @@ struct lpfc_sli_intf {
 /* Configuration of Interrupts / sec for entire HBA port */
 #define LPFC_MIN_IMAX          5000
 #define LPFC_MAX_IMAX          5000000
-#define LPFC_DEF_IMAX          150000
+#define LPFC_DEF_IMAX          0
+
+#define LPFC_IMAX_THRESHOLD    1000
+#define LPFC_MAX_AUTO_EQ_DELAY 120
+#define LPFC_EQ_DELAY_STEP     15
+#define LPFC_EQD_ISR_TRIGGER   20000
+/* 1s intervals */
+#define LPFC_EQ_DELAY_MSECS    1000
 
 #define LPFC_MIN_CPU_MAP       0
 #define LPFC_MAX_CPU_MAP       1
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 22aec1a9b689..3fa7240a8e31 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1251,6 +1251,69 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
 	return;
 }
 
+static void
+lpfc_hb_eq_delay_work(struct work_struct *work)
+{
+	struct lpfc_hba *phba = container_of(to_delayed_work(work),
+					     struct lpfc_hba, eq_delay_work);
+	struct lpfc_eq_intr_info *eqi, *eqi_new;
+	struct lpfc_queue *eq, *eq_next;
+	unsigned char *eqcnt = NULL;
+	uint32_t usdelay;
+	int i;
+
+	if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
+		return;
+
+	if (phba->link_state == LPFC_HBA_ERROR ||
+	    phba->pport->fc_flag & FC_OFFLINE_MODE)
+		goto requeue;
+
+	eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char),
+			GFP_KERNEL);
+	if (!eqcnt)
+		goto requeue;
+
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		if (eq && eqcnt[eq->last_cpu] < 2)
+			eqcnt[eq->last_cpu]++;
+		continue;
+	}
+
+	for_each_present_cpu(i) {
+		if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
+			continue;
+
+		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+
+		usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
+			   LPFC_EQ_DELAY_STEP;
+		if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
+			usdelay = LPFC_MAX_AUTO_EQ_DELAY;
+
+		eqi->icnt = 0;
+
+		list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) {
+			if (eq->last_cpu != i) {
+				eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info,
+						      eq->last_cpu);
+				list_move_tail(&eq->cpu_list, &eqi_new->list);
+				continue;
+			}
+			if (usdelay != eq->q_mode)
+				lpfc_modify_hba_eq_delay(phba, eq->hdwq, 1,
+							 usdelay);
+		}
+	}
+
+	kfree(eqcnt);
+
+requeue:
+	queue_delayed_work(phba->wq, &phba->eq_delay_work,
+			   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+}
+
 /**
  * lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution
  * @phba: pointer to lpfc hba data structure.
@@ -1303,16 +1366,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 	int retval, i;
 	struct lpfc_sli *psli = &phba->sli;
 	LIST_HEAD(completions);
-	struct lpfc_queue *qp;
-	unsigned long time_elapsed;
-	uint32_t tick_cqe, max_cqe, val;
-	uint64_t tot, data1, data2, data3;
-	struct lpfc_nvmet_tgtport *tgtp;
-	struct lpfc_register reg_data;
-	struct nvme_fc_local_port *localport;
-	struct lpfc_nvme_lport *lport;
-	struct lpfc_fc4_ctrl_stat *cstat;
-	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
 
 	if (phba->cfg_xri_rebalancing) {
 		/* Multi-XRI pools handler */
@@ -1332,104 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 		(phba->pport->fc_flag & FC_OFFLINE_MODE))
 		return;
 
-	if (phba->cfg_auto_imax) {
-		if (!phba->last_eqdelay_time) {
-			phba->last_eqdelay_time = jiffies;
-			goto skip_eqdelay;
-		}
-		time_elapsed = jiffies - phba->last_eqdelay_time;
-		phba->last_eqdelay_time = jiffies;
-
-		tot = 0xffff;
-		/* Check outstanding IO count */
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			if (phba->nvmet_support) {
-				tgtp = phba->targetport->private;
-				/* Calculate outstanding IOs */
-				tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
-				tot += atomic_read(&tgtp->xmt_fcp_release);
-				tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;
-			} else {
-				localport = phba->pport->localport;
-				if (!localport || !localport->private)
-					goto skip_eqdelay;
-				lport = (struct lpfc_nvme_lport *)
-					localport->private;
-				tot = 0;
-				for (i = 0;
-					i < phba->cfg_hdw_queue; i++) {
-					cstat =
-					     &phba->sli4_hba.hdwq[i].nvme_cstat;
-					data1 = cstat->input_requests;
-					data2 = cstat->output_requests;
-					data3 = cstat->control_requests;
-					tot += (data1 + data2 + data3);
-					tot -= cstat->io_cmpls;
-				}
-			}
-		}
-
-		/* Interrupts per sec per EQ */
-		val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
-		tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
-
-		/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
-		max_cqe = time_elapsed * tick_cqe;
-
-		for (i = 0; i < phba->cfg_irq_chann; i++) {
-			/* Fast-path EQ */
-			qp = phba->sli4_hba.hdwq[i].hba_eq;
-			if (!qp)
-				continue;
-
-			/* Use no EQ delay if we don't have many outstanding
-			 * IOs, or if we are only processing 1 CQE/ISR or less.
-			 * Otherwise, assume we can process up to lpfc_fcp_imax
-			 * interrupts per HBA.
-			 */
-			if (tot < LPFC_NODELAY_MAX_IO ||
-			    qp->EQ_cqe_cnt <= max_cqe)
-				val = 0;
-			else
-				val = phba->cfg_fcp_imax;
-
-			if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
-				/* Use EQ Delay Register method */
-
-				/* Convert for EQ Delay register */
-				if (val) {
-					/* First, interrupts per sec per EQ */
-					val = phba->cfg_fcp_imax /
-						phba->cfg_irq_chann;
-
-					/* us delay between each interrupt */
-					val = LPFC_SEC_TO_USEC / val;
-				}
-				if (val != qp->q_mode) {
-					reg_data.word0 = 0;
-					bf_set(lpfc_sliport_eqdelay_id,
-					       &reg_data, qp->queue_id);
-					bf_set(lpfc_sliport_eqdelay_delay,
-					       &reg_data, val);
-					writel(reg_data.word0, eqdreg);
-				}
-			} else {
-				/* Use mbox command method */
-				if (val != qp->q_mode)
-					lpfc_modify_hba_eq_delay(phba, i,
-								 1, val);
-			}
-
-			/*
-			 * val is cfg_fcp_imax or 0 for mbox delay or us delay
-			 * between interrupts for EQDR.
-			 */
-			qp->q_mode = val;
-			qp->EQ_cqe_cnt = 0;
-		}
-	}
-
-skip_eqdelay:
 	spin_lock_irq(&phba->pport->work_port_lock);
 
 	if (time_after(phba->last_completion_time +
@@ -2986,6 +2941,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba)
 {
 	if (phba->pport)
 		lpfc_stop_vport_timers(phba->pport);
+	cancel_delayed_work_sync(&phba->eq_delay_work);
 	del_timer_sync(&phba->sli.mbox_tmo);
 	del_timer_sync(&phba->fabric_block_timer);
 	del_timer_sync(&phba->eratt_poll);
@@ -6234,6 +6190,8 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	/* Heartbeat timer */
 	timer_setup(&phba->hb_tmofunc, lpfc_hb_timeout, 0);
 
+	INIT_DELAYED_WORK(&phba->eq_delay_work, lpfc_hb_eq_delay_work);
+
 	return 0;
 }
 
@@ -6849,6 +6807,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_hba_eq_hdl;
 	}
 
+	phba->sli4_hba.eq_info = alloc_percpu(struct lpfc_eq_intr_info);
+	if (!phba->sli4_hba.eq_info) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3321 Failed allocation for per_cpu stats\n");
+		rc = -ENOMEM;
+		goto out_free_hba_cpu_map;
+	}
 	/*
 	 * Enable sr-iov virtual functions if supported and configured
 	 * through the module parameter.
@@ -6868,6 +6833,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
+out_free_hba_cpu_map:
+	kfree(phba->sli4_hba.cpu_map);
 out_free_hba_eq_hdl:
 	kfree(phba->sli4_hba.hba_eq_hdl);
 out_free_fcf_rr_bmask:
@@ -6897,6 +6864,8 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 {
 	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
 
+	free_percpu(phba->sli4_hba.eq_info);
+
 	/* Free memory allocated for msi-x interrupt vector to CPU mapping */
 	kfree(phba->sli4_hba.cpu_map);
 	phba->sli4_hba.num_present_cpu = 0;
@@ -8753,6 +8722,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	struct lpfc_queue *qdesc;
 	int idx, eqidx;
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_eq_intr_info *eqi;
 
 	/*
 	 * Create HBA Record arrays.
@@ -8865,6 +8835,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
 						    LPFC_FIND_BY_EQ);
 		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+		qdesc->last_cpu = qdesc->chann;
+		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
+		list_add(&qdesc->cpu_list, &eqi->list);
 	}
 
 
@@ -10246,13 +10219,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 	case LPFC_SLI_INTF_IF_TYPE_0:
 	case LPFC_SLI_INTF_IF_TYPE_2:
 		phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_eq_clr_intr;
-		phba->sli4_hba.sli4_eq_release = lpfc_sli4_eq_release;
-		phba->sli4_hba.sli4_cq_release = lpfc_sli4_cq_release;
+		phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_write_eq_db;
+		phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_write_cq_db;
 		break;
 	case LPFC_SLI_INTF_IF_TYPE_6:
 		phba->sli4_hba.sli4_eq_clr_intr = lpfc_sli4_if6_eq_clr_intr;
-		phba->sli4_hba.sli4_eq_release = lpfc_sli4_if6_eq_release;
-		phba->sli4_hba.sli4_cq_release = lpfc_sli4_if6_cq_release;
+		phba->sli4_hba.sli4_write_eq_db = lpfc_sli4_if6_write_eq_db;
+		phba->sli4_hba.sli4_write_cq_db = lpfc_sli4_if6_write_cq_db;
 		break;
 	default:
 		break;
@@ -10773,6 +10746,14 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 		cpup++;
 	}
 
+	for_each_possible_cpu(i) {
+		struct lpfc_eq_intr_info *eqi =
+			per_cpu_ptr(phba->sli4_hba.eq_info, i);
+
+		INIT_LIST_HEAD(&eqi->list);
+		eqi->icnt = 0;
+	}
+
 	/*
 	 * If the number of IRQ vectors == number of CPUs,
 	 * mapping is pretty simple: 1 to 1.
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index a3477cecb0c9..ec385d3413f2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -78,12 +78,13 @@ static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
 static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
 					  struct hbq_dmabuf *dmabuf);
-static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
-				    struct lpfc_cqe *);
+static bool lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba,
+				   struct lpfc_queue *cq, struct lpfc_cqe *cqe);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
 				       int);
 static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
-				     struct lpfc_eqe *eqe, uint32_t qidx);
+				     struct lpfc_queue *eq,
+				     struct lpfc_eqe *eqe);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
 static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -160,7 +161,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
 	}
 	q->WQ_posted++;
 	/* set consumption flag every once in a while */
-	if (!((q->host_index + 1) % q->entry_repost))
+	if (!((q->host_index + 1) % q->notify_interval))
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 1);
 	else
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 0);
@@ -325,29 +326,16 @@ lpfc_sli4_mq_release(struct lpfc_queue *q)
 static struct lpfc_eqe *
 lpfc_sli4_eq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_eqe *eqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	eqe = q->qe[q->hba_index].eqe;
+	eqe = q->qe[q->host_index].eqe;
 
 	/* If the next EQE is not valid then we are done */
 	if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.eqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
-
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -397,44 +385,25 @@ lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
@@ -442,7 +411,7 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
 		bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
 	}
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
 	bf_set(lpfc_eqcq_doorbell_eqid_hi, &doorbell,
 			(q->queue_id >> LPFC_EQID_HI_FIELD_SHIFT));
@@ -451,60 +420,112 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_if6_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			  uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_eq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_eq_doorbell_eqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.EQDBregaddr);
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
+}
+
+static void
+__lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			struct lpfc_eqe *eqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.eqav)
+		bf_set_le32(lpfc_eqe_valid, eqe, 0);
+
+	eq->host_index = ((eq->host_index + 1) % eq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.eqav && !eq->host_index)
+		eq->qe_valid = (eq->qe_valid) ? 0 : 1;
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	uint32_t count = 0;
+
+	/* walk all the EQ entries and drop on the floor */
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+		count++;
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+
+	/* Clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM);
+}
+
+static int
+lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	int count = 0, consumed = 0;
+
+	if (cmpxchg(&eq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
+
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		lpfc_sli4_hba_handle_eqe(phba, eq, eqe);
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+
+		consumed++;
+		if (!(++count % eq->max_proc_limit))
+			break;
+
+		if (!(count % eq->notify_interval)) {
+			phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed,
+							LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+	eq->EQ_processed += count;
+
+	/* Track the max number of EQEs processed in 1 intr */
+	if (count > eq->EQ_max_eqe)
+		eq->EQ_max_eqe = count;
+
+	eq->queue_claimed = 0;
+
+rearm_and_exit:
+	/* Always clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM);
+
+	return count;
 }
 
 /**
@@ -519,28 +540,16 @@ lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
 static struct lpfc_cqe *
 lpfc_sli4_cq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_cqe *cqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	cqe = q->qe[q->hba_index].cqe;
+	cqe = q->qe[q->host_index].cqe;
 
 	/* If the next CQE is not valid then we are done */
 	if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.cqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -554,107 +563,81 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
 	return cqe;
 }
 
+static void
+__lpfc_sli4_consume_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			struct lpfc_cqe *cqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.cqav)
+		bf_set_le32(lpfc_cqe_valid, cqe, 0);
+
+	cq->host_index = ((cq->host_index + 1) % cq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.cqav && !cq->host_index)
+		cq->qe_valid = (cq->qe_valid) ? 0 : 1;
+}
+
 /**
- * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
 	bf_set(lpfc_eqcq_doorbell_cqid_hi, &doorbell,
 			(q->queue_id >> LPFC_CQID_HI_FIELD_SHIFT));
 	bf_set(lpfc_eqcq_doorbell_cqid_lo, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_if6_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			 uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_cq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_cq_doorbell_cqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
@@ -703,15 +686,15 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
-	if (!(hq->host_index % hq->entry_repost)) {
+	if (!(hq->host_index % hq->notify_interval)) {
 		doorbell.word0 = 0;
 		if (hq->db_format == LPFC_DB_RING_FORMAT) {
 			bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
 		} else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
 			bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_list_fm_index, &doorbell,
 			       hq->host_index);
 			bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
@@ -5571,30 +5554,30 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	struct lpfc_sli4_hdw_queue *qp;
 
-	sli4_hba->sli4_cq_release(sli4_hba->mbx_cq, LPFC_QUEUE_REARM);
-	sli4_hba->sli4_cq_release(sli4_hba->els_cq, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
 	if (sli4_hba->nvmels_cq)
-		sli4_hba->sli4_cq_release(sli4_hba->nvmels_cq,
-						LPFC_QUEUE_REARM);
+		sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
+					   LPFC_QUEUE_REARM);
 
 	qp = sli4_hba->hdwq;
 	if (sli4_hba->hdwq) {
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-			sli4_hba->sli4_cq_release(qp[qidx].fcp_cq,
-						LPFC_QUEUE_REARM);
-			sli4_hba->sli4_cq_release(qp[qidx].nvme_cq,
-						LPFC_QUEUE_REARM);
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+						   LPFC_QUEUE_REARM);
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+						   LPFC_QUEUE_REARM);
 		}
 
 		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
-			sli4_hba->sli4_eq_release(qp[qidx].hba_eq,
-						LPFC_QUEUE_REARM);
+			sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
+						0, LPFC_QUEUE_REARM);
 	}
 
 	if (phba->nvmet_support) {
 		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
-			sli4_hba->sli4_cq_release(
-				sli4_hba->nvmet_cqset[qidx],
+			sli4_hba->sli4_write_cq_db(phba,
+				sli4_hba->nvmet_cqset[qidx], 0,
 				LPFC_QUEUE_REARM);
 		}
 	}
@@ -7698,6 +7681,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	phba->hb_outstanding = 0;
 	phba->last_completion_time = jiffies;
 
+	/* start eq_delay heartbeat */
+	if (phba->cfg_auto_imax)
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
 	/* Start error attention (ERATT) polling timer */
 	mod_timer(&phba->eratt_poll,
 		  jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval));
@@ -7869,7 +7857,6 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	uint32_t eqidx;
 	struct lpfc_queue *fpeq = NULL;
-	struct lpfc_eqe *eqe;
 	bool mbox_pending;
 
 	if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
@@ -7903,14 +7890,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	 */
 
 	if (mbox_pending)
-		while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-			lpfc_sli4_hba_handle_eqe(phba, eqe, eqidx);
-			fpeq->EQ_processed++;
-		}
-
-	/* Always clear and re-arm the EQ */
-
-	sli4_hba->sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+		/* process and rearm the EQ */
+		lpfc_sli4_process_eq(phba, fpeq);
+	else
+		/* Always clear and re-arm the EQ */
+		sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM);
 
 	return mbox_pending;
 
@@ -13265,11 +13249,14 @@ out_no_mqe_complete:
  * Return: true if work posted to worker thread, otherwise false.
  **/
 static bool
-lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
 {
 	struct lpfc_mcqe mcqe;
 	bool workposted;
 
+	cq->CQ_mbox++;
+
 	/* Copy the mailbox MCQE and convert endian order as needed */
 	lpfc_sli4_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
 
@@ -13528,7 +13515,7 @@ out:
  * lpfc_sli4_sp_handle_cqe - Process a slow path completion queue entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to the completion queue.
- * @wcqe: Pointer to a completion queue entry.
+ * @cqe: Pointer to a completion queue entry.
  *
  * This routine process a slow-path work-queue or receive queue completion queue
  * entry.
@@ -13628,60 +13615,129 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 }
 
 /**
- * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * __lpfc_sli4_process_cq - Process elements of a CQ
  * @phba: Pointer to HBA context object.
+ * @cq: Pointer to CQ to be processed
+ * @handler: Routine to process each cqe
+ * @delay: Pointer to usdelay to set in case of rescheduling of the handler
  *
- * This routine process a event queue entry from the slow-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on that completion queue, rearm the
- * completion queue, and then return.
+ * This routine processes completion queue entries in a CQ. While a valid
+ * queue element is found, the handler is called. During processing checks
+ * are made for periodic doorbell writes to let the hardware know of
+ * element consumption.
  *
+ * If the max limit on cqes to process is hit, or there are no more valid
+ * entries, the loop stops. If we processed a sufficient number of elements,
+ * meaning there is sufficient load, rather than rearming and generating
+ * another interrupt, a cq rescheduling delay will be set. A delay of 0
+ * indicates no rescheduling.
+ *
+ * Returns True if work scheduled, False otherwise.
  **/
-static void
-lpfc_sli4_sp_process_cq(struct work_struct *work)
+static bool
+__lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
+	bool (*handler)(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_cqe *), unsigned long *delay)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, spwork);
-	struct lpfc_hba *phba = cq->phba;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
-	int ccount = 0;
+	int count = 0, consumed = 0;
+	bool arm = true;
+
+	/* default - no reschedule */
+	*delay = 0;
+
+	if (cmpxchg(&cq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
 
 	/* Process all the entries to the CQ */
+	cqe = lpfc_sli4_cq_get(cq);
+	while (cqe) {
+#if defined(CONFIG_SCSI_LPFC_DEBUG_FS) && defined(BUILD_NVME)
+		if (phba->ktime_on)
+			cq->isr_timestamp = ktime_get_ns();
+		else
+			cq->isr_timestamp = 0;
+#endif
+		workposted |= handler(phba, cq, cqe);
+		__lpfc_sli4_consume_cqe(phba, cq, cqe);
+
+		consumed++;
+		if (!(++count % cq->max_proc_limit))
+			break;
+
+		if (!(count % cq->notify_interval)) {
+			phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+						LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		cqe = lpfc_sli4_cq_get(cq);
+	}
+	if (count >= phba->cfg_cq_poll_threshold) {
+		*delay = 1;
+		arm = false;
+	}
+
+	/* Track the max number of CQEs processed in 1 EQ */
+	if (count > cq->CQ_max_cqe)
+		cq->CQ_max_cqe = count;
+
+	cq->assoc_qp->EQ_cqe_cnt += count;
+
+	/* Catch the no cq entry condition */
+	if (unlikely(count == 0))
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"0369 No entry from completion queue "
+				"qid=%d\n", cq->queue_id);
+
+	cq->queue_claimed = 0;
+
+rearm_and_exit:
+	phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+			arm ?  LPFC_QUEUE_REARM : LPFC_QUEUE_NOARM);
+
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * @cq: pointer to CQ to process
+ *
+ * This routine calls the cq processing routine with a handler specific
+ * to the type of queue bound to it.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
+ **/
+static void
+__lpfc_sli4_sp_process_cq(struct lpfc_queue *cq)
+{
+	struct lpfc_hba *phba = cq->phba;
+	unsigned long delay;
+	bool workposted = false;
+
+	/* Process and rearm the CQ */
 	switch (cq->type) {
 	case LPFC_MCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
-			if (!(++ccount % cq->entry_repost))
-				break;
-			cq->CQ_mbox++;
-		}
+		workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_mcqe,
+						&delay);
 		break;
 	case LPFC_WCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			if (cq->subtype == LPFC_FCP ||
-			    cq->subtype == LPFC_NVME) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-				if (phba->ktime_on)
-					cq->isr_timestamp = ktime_get_ns();
-				else
-					cq->isr_timestamp = 0;
-#endif
-				workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
-								       cqe);
-			} else {
-				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
-								      cqe);
-			}
-			if (!(++ccount % cq->entry_repost))
-				break;
-		}
-
-		/* Track the max number of CQEs processed in 1 EQ */
-		if (ccount > cq->CQ_max_cqe)
-			cq->CQ_max_cqe = ccount;
+		if (cq->subtype == LPFC_FCP || cq->subtype == LPFC_NVME)
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_fp_handle_cqe,
+						&delay);
+		else
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_cqe,
+						&delay);
 		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13690,14 +13746,14 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 		return;
 	}
 
-	/* Catch the no cq entry condition, log an error */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0371 No entry from the CQ: identifier "
-				"(x%x), type (%d)\n", cq->queue_id, cq->type);
-
-	/* In any case, flash and re-arm the RCQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_spwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0394 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
@@ -13705,6 +13761,36 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 }
 
 /**
+ * lpfc_sli4_sp_process_cq - slow-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
+ * lpfc_sli4_dly_sp_process_cq - slow-path work handler when started by timer
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_dly_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
  * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to associated CQ
@@ -13935,13 +14021,16 @@ out:
 
 /**
  * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
+ * @phba: adapter with cq
  * @cq: Pointer to the completion queue.
  * @eqe: Pointer to fast-path completion queue entry.
  *
  * This routine process a fast-path work queue completion entry from fast-path
  * event queue for FCP command response completion.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
  **/
-static int
+static bool
 lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			 struct lpfc_cqe *cqe)
 {
@@ -14008,10 +14097,11 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * completion queue, and then return.
  **/
 static void
-lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
-			uint32_t qidx)
+lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			 struct lpfc_eqe *eqe)
 {
 	struct lpfc_queue *cq = NULL;
+	uint32_t qidx = eq->hdwq;
 	uint16_t cqid, id;
 
 	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -14074,72 +14164,74 @@ work_cq:
 }
 
 /**
- * lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * __lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
+ * @cq: Pointer to CQ to be processed
  *
- * This routine process a event queue entry from the fast-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on the completion queue, rearm the
- * completion queue, and then return.
+ * This routine calls the cq processing routine with the handler for
+ * fast path CQEs.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
  **/
 static void
-lpfc_sli4_hba_process_cq(struct work_struct *work)
+__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, irqwork);
 	struct lpfc_hba *phba = cq->phba;
-	struct lpfc_cqe *cqe;
+	unsigned long delay;
 	bool workposted = false;
-	int ccount = 0;
-
-	/* Process all the entries to the CQ */
-	while ((cqe = lpfc_sli4_cq_get(cq))) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-		if (phba->ktime_on)
-			cq->isr_timestamp = ktime_get_ns();
-		else
-			cq->isr_timestamp = 0;
-#endif
-		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
-		if (!(++ccount % cq->entry_repost))
-			break;
-	}
-
-	/* Track the max number of CQEs processed in 1 EQ */
-	if (ccount > cq->CQ_max_cqe)
-		cq->CQ_max_cqe = ccount;
-	cq->assoc_qp->EQ_cqe_cnt += ccount;
 
-	/* Catch the no cq entry condition */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0369 No entry from fast-path completion "
-				"queue fcpcqid=%d\n", cq->queue_id);
+	/* process and rearm the CQ */
+	workposted |= __lpfc_sli4_process_cq(phba, cq, lpfc_sli4_fp_handle_cqe,
+					     &delay);
 
-	/* In any case, flash and re-arm the CQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_irqwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0367 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
 		lpfc_worker_wake_up(phba);
 }
 
+/**
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the fast-path handler.
+ **/
 static void
-lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+lpfc_sli4_hba_process_cq(struct work_struct *work)
 {
-	struct lpfc_eqe *eqe;
-
-	/* walk all the EQ entries and drop on the floor */
-	while ((eqe = lpfc_sli4_eq_get(eq)))
-		;
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, irqwork);
 
-	/* Clear and re-arm the EQ */
-	phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
+	__lpfc_sli4_hba_process_cq(cq);
 }
 
+/**
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by timer
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the fast-path handler.
+ **/
+static void
+lpfc_sli4_dly_hba_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_irqwork);
+
+	__lpfc_sli4_hba_process_cq(cq);
+}
 
 /**
  * lpfc_sli4_hba_intr_handler - HBA interrupt handler to SLI-4 device
@@ -14173,10 +14265,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba *phba;
 	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
 	int hba_eqidx;
+	struct lpfc_eq_intr_info *eqi;
+	uint32_t icnt;
 
 	/* Get the driver's phba structure from the dev_id */
 	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
@@ -14204,22 +14297,19 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	/*
-	 * Process all the event on FCP fast-path EQ
-	 */
-	while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
-		if (!(++ecount % fpeq->entry_repost))
-			break;
-		fpeq->EQ_processed++;
-	}
+	eqi = phba->sli4_hba.eq_info;
+	icnt = this_cpu_inc_return(eqi->icnt);
+	fpeq->last_cpu = smp_processor_id();
 
-	/* Track the max number of EQEs processed in 1 intr */
-	if (ecount > fpeq->EQ_max_eqe)
-		fpeq->EQ_max_eqe = ecount;
+	if (icnt > LPFC_EQD_ISR_TRIGGER &&
+	    phba->cfg_irq_chann == 1 &&
+	    phba->cfg_auto_imax &&
+	    fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY &&
+	    phba->sli.sli_flag & LPFC_SLI_USE_EQDR)
+		lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
 
-	/* Always clear and re-arm the fast-path EQ */
-	phba->sli4_hba.sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+	/* process and rearm the EQ */
+	ecount = lpfc_sli4_process_eq(phba, fpeq);
 
 	if (unlikely(ecount == 0)) {
 		fpeq->EQ_no_entry++;
@@ -14307,6 +14397,9 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 		kfree(queue->rqbp);
 	}
 
+	if (!list_empty(&queue->cpu_list))
+		list_del(&queue->cpu_list);
+
 	if (!list_empty(&queue->wq_list))
 		list_del(&queue->wq_list);
 
@@ -14355,6 +14448,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	INIT_LIST_HEAD(&queue->wqfull_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
+	INIT_LIST_HEAD(&queue->cpu_list);
 
 	/* Set queue parameters now.  If the system cannot provide memory
 	 * resources, the free routine needs to know what was allocated.
@@ -14387,8 +14481,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	}
 	INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
 	INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_irqwork, lpfc_sli4_dly_hba_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_spwork, lpfc_sli4_dly_sp_process_cq);
 
-	/* entry_repost will be set during q creation */
+	/* notify_interval will be set during q creation */
 
 	return queue;
 out_fail:
@@ -14457,7 +14553,6 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	int cnt = 0, rc, length;
 	uint32_t shdr_status, shdr_add_status;
 	uint32_t dmult;
-	struct lpfc_register reg_data;
 	int qidx;
 	union lpfc_sli4_cfg_shdr *shdr;
 
@@ -14478,16 +14573,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 			if (!eq)
 				continue;
 
-			/* save value last set */
-			eq->q_mode = usdelay;
-
-			/* write register */
-			reg_data.word0 = 0;
-			bf_set(lpfc_sliport_eqdelay_id, &reg_data,
-					eq->queue_id);
-			bf_set(lpfc_sliport_eqdelay_delay, &reg_data, usdelay);
-			writel(reg_data.word0,
-					phba->sli4_hba.u.if_type2.EQDregaddr);
+			lpfc_sli4_mod_hba_eq_delay(phba, eq, usdelay);
 
 			if (++cnt >= numq)
 				break;
@@ -14673,8 +14759,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 	if (eq->queue_id == 0xFFFF)
 		status = -ENXIO;
 	eq->host_index = 0;
-	eq->hba_index = 0;
-	eq->entry_repost = LPFC_EQ_REPOST;
+	eq->notify_interval = LPFC_EQ_NOTIFY_INTRVL;
+	eq->max_proc_limit = LPFC_EQ_MAX_PROC_LIMIT;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -14814,8 +14900,8 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->assoc_qid = eq->queue_id;
 	cq->assoc_qp = eq;
 	cq->host_index = 0;
-	cq->hba_index = 0;
-	cq->entry_repost = LPFC_CQ_REPOST;
+	cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+	cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit, cq->entry_count);
 
 	if (cq->queue_id > phba->sli4_hba.cq_max)
 		phba->sli4_hba.cq_max = cq->queue_id;
@@ -15026,8 +15112,9 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->assoc_qid = eq->queue_id;
 		cq->assoc_qp = eq;
 		cq->host_index = 0;
-		cq->hba_index = 0;
-		cq->entry_repost = LPFC_CQ_REPOST;
+		cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+		cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+					 cq->entry_count);
 		cq->chann = idx;
 
 		rc = 0;
@@ -15279,7 +15366,6 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
-	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -15545,7 +15631,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	wq->subtype = subtype;
 	wq->host_index = 0;
 	wq->hba_index = 0;
-	wq->entry_repost = LPFC_RELEASE_NOTIFICATION_INTERVAL;
+	wq->notify_interval = LPFC_WQ_NOTIFY_INTRVL;
 
 	/* link the wq onto the parent cq child list */
 	list_add_tail(&wq->list, &cq->child_list);
@@ -15739,7 +15825,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
-	hrq->entry_repost = LPFC_RQ_REPOST;
+	hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -15832,7 +15918,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
-	drq->entry_repost = LPFC_RQ_REPOST;
+	drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -15990,7 +16076,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
-		hrq->entry_repost = LPFC_RQ_REPOST;
+		hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -15999,7 +16085,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
-		drq->entry_repost = LPFC_RQ_REPOST;
+		drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16059,6 +16145,7 @@ lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
 	/* sanity check on queue memory */
 	if (!eq)
 		return -ENODEV;
+
 	mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index accccca3a027..20566c506e5f 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -154,14 +154,41 @@ struct lpfc_queue {
 	struct list_head child_list;
 	struct list_head page_list;
 	struct list_head sgl_list;
+	struct list_head cpu_list;
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
-	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_EQ_REPOST		8
-#define LPFC_MQ_REPOST		8
-#define LPFC_CQ_REPOST		64
-#define LPFC_RQ_REPOST		64
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
+	uint32_t notify_interval; /* Queue Notification Interval
+				   * For chip->host queues (EQ, CQ, RQ):
+				   *  specifies the interval (number of
+				   *  entries) where the doorbell is rung to
+				   *  notify the chip of entry consumption.
+				   * For host->chip queues (WQ):
+				   *  specifies the interval (number of
+				   *  entries) where consumption CQE is
+				   *  requested to indicate WQ entries
+				   *  consumed by the chip.
+				   * Not used on an MQ.
+				   */
+#define LPFC_EQ_NOTIFY_INTRVL	16
+#define LPFC_CQ_NOTIFY_INTRVL	16
+#define LPFC_WQ_NOTIFY_INTRVL	16
+#define LPFC_RQ_NOTIFY_INTRVL	16
+	uint32_t max_proc_limit; /* Queue Processing Limit
+				  * For chip->host queues (EQ, CQ):
+				  *  specifies the maximum number of
+				  *  entries to be consumed in one
+				  *  processing iteration sequence. Queue
+				  *  will be rearmed after each iteration.
+				  * Not used on an MQ, RQ or WQ.
+				  */
+#define LPFC_EQ_MAX_PROC_LIMIT		256
+#define LPFC_CQ_MIN_PROC_LIMIT		64
+#define LPFC_CQ_MAX_PROC_LIMIT		LPFC_CQE_EXP_COUNT	// 4096
+#define LPFC_CQ_DEF_MAX_PROC_LIMIT	LPFC_CQE_DEF_COUNT	// 1024
+#define LPFC_CQ_MIN_THRESHOLD_TO_POLL	64
+#define LPFC_CQ_MAX_THRESHOLD_TO_POLL	LPFC_CQ_DEF_MAX_PROC_LIMIT
+#define LPFC_CQ_DEF_THRESHOLD_TO_POLL	LPFC_CQ_DEF_MAX_PROC_LIMIT
+	uint32_t queue_claimed; /* indicates queue is being processed */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t host_index;	/* The host's index for putting or getting */
@@ -217,11 +244,14 @@ struct lpfc_queue {
 #define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
-	struct work_struct irqwork;
-	struct work_struct spwork;
+	struct work_struct	irqwork;
+	struct work_struct	spwork;
+	struct delayed_work	sched_irqwork;
+	struct delayed_work	sched_spwork;
 
 	uint64_t isr_timestamp;
 	uint16_t hdwq;
+	uint16_t last_cpu;	/* most recent cpu */
 	uint8_t	qe_valid;
 	struct lpfc_queue *assoc_qp;
 	union sli4_qe qe[1];	/* array to index entries (must be last) */
@@ -608,6 +638,11 @@ struct lpfc_lock_stat {
 };
 #endif
 
+struct lpfc_eq_intr_info {
+	struct list_head list;
+	uint32_t icnt;
+};
+
 /* SLI4 HBA data structure entries */
 struct lpfc_sli4_hdw_queue {
 	/* Pointers to the constructed SLI4 queues */
@@ -749,8 +784,10 @@ struct lpfc_sli4_hba {
 	struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */
 
 	void (*sli4_eq_clr_intr)(struct lpfc_queue *q);
-	uint32_t (*sli4_eq_release)(struct lpfc_queue *q, bool arm);
-	uint32_t (*sli4_cq_release)(struct lpfc_queue *q, bool arm);
+	void (*sli4_write_eq_db)(struct lpfc_hba *phba, struct lpfc_queue *eq,
+				uint32_t count, bool arm);
+	void (*sli4_write_cq_db)(struct lpfc_hba *phba, struct lpfc_queue *cq,
+				uint32_t count, bool arm);
 
 	/* Pointers to the constructed SLI4 queues */
 	struct lpfc_sli4_hdw_queue *hdwq;
@@ -856,6 +893,7 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
+	struct lpfc_eq_intr_info __percpu *eq_info;
 	uint32_t conf_trunk;
 #define lpfc_conf_trunk_port0_WORD	conf_trunk
 #define lpfc_conf_trunk_port0_SHIFT	0
@@ -1020,11 +1058,15 @@ int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
 int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba);
 int lpfc_sli4_init_vpi(struct lpfc_vport *);
 inline void lpfc_sli4_eq_clr_intr(struct lpfc_queue *);
-uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
-uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
+void lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			   uint32_t count, bool arm);
+void lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			   uint32_t count, bool arm);
 inline void lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q);
-uint32_t lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm);
-uint32_t lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm);
+void lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			       uint32_t count, bool arm);
+void lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			       uint32_t count, bool arm);
 void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t);
 int lpfc_sli4_fcf_scan_read_fcf_rec(struct lpfc_hba *, uint16_t);
 int lpfc_sli4_fcf_rr_read_fcf_rec(struct lpfc_hba *, uint16_t);