scsi: lpfc: Revise interrupt coalescing for missing scenarios

The existing "auto eq delay" mechanism was sometimes skipping over an EQ, not ramping the coalescing down under light load fast enough, and in other cases never kicked in as cpu sharing by multiple vectors didn't quite add up right. Tweak the interrupt mechanism such that: - Add a flag to the EQ to force checking for colaescing values when being serviced in the interrupt handler. The flag will be set by any CQ bound to the EQ whenever the number of CQ elements process in a single scan meets or exceeds the hardware queue notify level. E.g. there's a significant number of completions happening. - In the heartbeat work item that checks coalescing: - Replace the structure that was counting the number of EQs that interrupted on a single cpu with a new structure that looks at the EQ to see whether EQ currently has a coalescing value (thus it should be re-evaluate) or was marked by the new flag indicating heavy completions. - When a cpu, which may be servicing multiple vectors, had at least 1 EQ that should be checked, a new coalescing delay is calculated based on the number of interrupts that occurred on the cpu. - The new coalescing value is then applied to the EQs that had interrupted on the cpu. Link: https://lore.kernel.org/r/20191018211832.7917-11-jsmart2021@gmail.com Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
author: James Smart <jsmart2021@gmail.com> 2019-10-18 14:18:26 -0700
committer: Martin K. Petersen <martin.petersen@oracle.com> 2019-10-24 21:02:05 -0400
commit: 8156d378c4cbf8ca19df5d8f0c610ce6923b61e2 (patch)
tree: bbb8067946f1688d6fb6c79ee359f28cd4b58710 /drivers/scsi/lpfc/lpfc_init.c
parent: scsi: lpfc: Remove lock contention target write path (diff)
download: linux-dev-8156d378c4cbf8ca19df5d8f0c610ce6923b61e2.tar.xz
linux-dev-8156d378c4cbf8ca19df5d8f0c610ce6923b61e2.zip
1 files changed, 21 insertions, 30 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 8292b66e4b07..316a2c2beb0c 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1235,10 +1235,9 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
 					     struct lpfc_hba, eq_delay_work);
 	struct lpfc_eq_intr_info *eqi, *eqi_new;
 	struct lpfc_queue *eq, *eq_next;
-	unsigned char *eqcnt = NULL;
+	unsigned char *ena_delay = NULL;
 	uint32_t usdelay;
 	int i;
-	bool update = false;
 
 	if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
 		return;
@@ -1247,44 +1246,36 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
 	    phba->pport->fc_flag & FC_OFFLINE_MODE)
 		goto requeue;
 
-	eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char),
-			GFP_KERNEL);
-	if (!eqcnt)
+	ena_delay = kcalloc(phba->sli4_hba.num_possible_cpu, sizeof(*ena_delay),
+			    GFP_KERNEL);
+	if (!ena_delay)
 		goto requeue;
 
-	if (phba->cfg_irq_chann > 1) {
-		/* Loop thru all IRQ vectors */
-		for (i = 0; i < phba->cfg_irq_chann; i++) {
-			/* Get the EQ corresponding to the IRQ vector */
-			eq = phba->sli4_hba.hba_eq_hdl[i].eq;
-			if (!eq)
-				continue;
-			if (eq->q_mode) {
-				update = true;
-				break;
-			}
-			if (eqcnt[eq->last_cpu] < 2)
-				eqcnt[eq->last_cpu]++;
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[i].eq;
+		if (!eq)
+			continue;
+		if (eq->q_mode || eq->q_flag & HBA_EQ_DELAY_CHK) {
+			eq->q_flag &= ~HBA_EQ_DELAY_CHK;
+			ena_delay[eq->last_cpu] = 1;
 		}
-	} else
-		update = true;
+	}
 
 	for_each_present_cpu(i) {
 		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
-		if (!update && eqcnt[i] < 2) {
-			eqi->icnt = 0;
-			continue;
+		if (ena_delay[i]) {
+			usdelay = (eqi->icnt >> 10) * LPFC_EQ_DELAY_STEP;
+			if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
+				usdelay = LPFC_MAX_AUTO_EQ_DELAY;
+		} else {
+			usdelay = 0;
 		}
 
-		usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
-			   LPFC_EQ_DELAY_STEP;
-		if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
-			usdelay = LPFC_MAX_AUTO_EQ_DELAY;
-
 		eqi->icnt = 0;
 
 		list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) {
-			if (eq->last_cpu != i) {
+			if (unlikely(eq->last_cpu != i)) {
 				eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info,
 						      eq->last_cpu);
 				list_move_tail(&eq->cpu_list, &eqi_new->list);
@@ -1296,7 +1287,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
 		}
 	}
 
-	kfree(eqcnt);
+	kfree(ena_delay);
 
 requeue:
 	queue_delayed_work(phba->wq, &phba->eq_delay_work,
author	James Smart <jsmart2021@gmail.com>	2019-10-18 14:18:26 -0700
committer	Martin K. Petersen <martin.petersen@oracle.com>	2019-10-24 21:02:05 -0400
commit	8156d378c4cbf8ca19df5d8f0c610ce6923b61e2 (patch)
tree	bbb8067946f1688d6fb6c79ee359f28cd4b58710 /drivers/scsi/lpfc/lpfc_init.c
parent	scsi: lpfc: Remove lock contention target write path (diff)
download	linux-dev-8156d378c4cbf8ca19df5d8f0c610ce6923b61e2.tar.xz linux-dev-8156d378c4cbf8ca19df5d8f0c610ce6923b61e2.zip