4 files changed, 255 insertions, 113 deletions
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 58f26e5f3a59..065c526218b2 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5115,7 +5115,8 @@ lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
 
 	/* set the values on the cq's */
 	for (i = 0; i < phba->cfg_irq_chann; i++) {
-		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[i].eq;
 		if (!eq)
 			continue;
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 021b01561597..416f0fb155f5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -93,7 +93,6 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 static void lpfc_sli4_disable_intr(struct lpfc_hba *);
 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
-static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
@@ -1274,8 +1273,10 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
 	if (!eqcnt)
 		goto requeue;
 
+	/* Loop thru all IRQ vectors */
 	for (i = 0; i < phba->cfg_irq_chann; i++) {
-		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[i].eq;
 		if (eq && eqcnt[eq->last_cpu] < 2)
 			eqcnt[eq->last_cpu]++;
 		continue;
@@ -8748,8 +8749,10 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, eqidx, cpu;
+	int idx, cpu, eqcpu;
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_vector_map_info *cpup;
+	struct lpfc_vector_map_info *eqcpup;
 	struct lpfc_eq_intr_info *eqi;
 
 	/*
@@ -8834,40 +8837,60 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
 	/* Create HBA Event Queues (EQs) */
-	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-		/* determine EQ affinity */
-		eqidx = lpfc_find_eq_handle(phba, idx);
-		cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
-		/*
-		 * If there are more Hardware Queues than available
-		 * EQs, multiple Hardware Queues may share a common EQ.
+	for_each_present_cpu(cpu) {
+		/* We only want to create 1 EQ per vector, even though
+		 * multiple CPUs might be using that vector. so only
+		 * selects the CPUs that are LPFC_CPU_FIRST_IRQ.
 		 */
-		if (idx >= phba->cfg_irq_chann) {
-			/* Share an existing EQ */
-			phba->sli4_hba.hdwq[idx].hba_eq =
-				phba->sli4_hba.hdwq[eqidx].hba_eq;
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
 			continue;
-		}
-		/* Create an EQ */
+
+		/* Get a ptr to the Hardware Queue associated with this CPU */
+		qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+
+		/* Allocate an EQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount, cpu);
 		if (!qdesc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0497 Failed allocate EQ (%d)\n", idx);
+					"0497 Failed allocate EQ (%d)\n",
+					cpup->hdwq);
 			goto out_error;
 		}
 		qdesc->qe_valid = 1;
-		qdesc->hdwq = idx;
-
-		/* Save the CPU this EQ is affinitised to */
-		qdesc->chann = cpu;
-		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+		qdesc->hdwq = cpup->hdwq;
+		qdesc->chann = cpu; /* First CPU this EQ is affinitised to */
 		qdesc->last_cpu = qdesc->chann;
+
+		/* Save the allocated EQ in the Hardware Queue */
+		qp->hba_eq = qdesc;
+
 		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
 		list_add(&qdesc->cpu_list, &eqi->list);
 	}
 
+	/* Now we need to populate the other Hardware Queues, that share
+	 * an IRQ vector, with the associated EQ ptr.
+	 */
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Check for EQ already allocated in previous loop */
+		if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+			continue;
+
+		/* Check for multiple CPUs per hdwq */
+		qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+		if (qp->hba_eq)
+			continue;
+
+		/* We need to share an EQ for this hdwq */
+		eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ);
+		eqcpup = &phba->sli4_hba.cpu_map[eqcpu];
+		qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq;
+	}
 
 	/* Allocate SCSI SLI4 CQ/WQs */
 	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9130,23 +9153,31 @@ static inline void
 lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 {
 	struct lpfc_sli4_hdw_queue *hdwq;
+	struct lpfc_queue *eq;
 	uint32_t idx;
 
 	hdwq = phba->sli4_hba.hdwq;
-	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-		if (idx < phba->cfg_irq_chann)
-			lpfc_sli4_queue_free(hdwq[idx].hba_eq);
-		hdwq[idx].hba_eq = NULL;
 
+	/* Loop thru all Hardware Queues */
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		/* Free the CQ/WQ corresponding to the Hardware Queue */
 		lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
 		lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
+		hdwq[idx].hba_eq = NULL;
 		hdwq[idx].fcp_cq = NULL;
 		hdwq[idx].nvme_cq = NULL;
 		hdwq[idx].fcp_wq = NULL;
 		hdwq[idx].nvme_wq = NULL;
 	}
+	/* Loop thru all IRQ vectors */
+	for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+		/* Free the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[idx].eq;
+		lpfc_sli4_queue_free(eq);
+		phba->sli4_hba.hba_eq_hdl[idx].eq = NULL;
+	}
 }
 
 /**
@@ -9330,10 +9361,13 @@ lpfc_setup_cq_lookup(struct lpfc_hba *phba)
 	qp = phba->sli4_hba.hdwq;
 	memset(phba->sli4_hba.cq_lookup, 0,
 	       (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+	/* Loop thru all IRQ vectors */
 	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-		eq = qp[qidx].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 		if (!eq)
 			continue;
+		/* Loop through all CQs associated with that EQ */
 		list_for_each_entry(childq, &eq->child_list, list) {
 			if (childq->queue_id > phba->sli4_hba.cq_max)
 				continue;
@@ -9362,9 +9396,10 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
+	struct lpfc_vector_map_info *cpup;
 	struct lpfc_sli4_hdw_queue *qp;
 	LPFC_MBOXQ_t *mboxq;
-	int qidx;
+	int qidx, cpu;
 	uint32_t length, usdelay;
 	int rc = -ENOMEM;
 
@@ -9425,32 +9460,55 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		rc = -ENOMEM;
 		goto out_error;
 	}
+
+	/* Loop thru all IRQ vectors */
 	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-		if (!qp[qidx].hba_eq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0522 Fast-path EQ (%d) not "
-					"allocated\n", qidx);
-			rc = -ENOMEM;
-			goto out_destroy;
-		}
-		rc = lpfc_eq_create(phba, qp[qidx].hba_eq,
-				    phba->cfg_fcp_imax);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0523 Failed setup of fast-path EQ "
-					"(%d), rc = 0x%x\n", qidx,
-					(uint32_t)rc);
-			goto out_destroy;
+		/* Create HBA Event Queues (EQs) in order */
+		for_each_present_cpu(cpu) {
+			cpup = &phba->sli4_hba.cpu_map[cpu];
+
+			/* Look for the CPU thats using that vector with
+			 * LPFC_CPU_FIRST_IRQ set.
+			 */
+			if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+				continue;
+			if (qidx != cpup->eq)
+				continue;
+
+			/* Create an EQ for that vector */
+			rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq,
+					    phba->cfg_fcp_imax);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0523 Failed setup of fast-path"
+						" EQ (%d), rc = 0x%x\n",
+						cpup->eq, (uint32_t)rc);
+				goto out_destroy;
+			}
+
+			/* Save the EQ for that vector in the hba_eq_hdl */
+			phba->sli4_hba.hba_eq_hdl[cpup->eq].eq =
+				qp[cpup->hdwq].hba_eq;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"2584 HBA EQ setup: queue[%d]-id=%d\n",
+					cpup->eq,
+					qp[cpup->hdwq].hba_eq->queue_id);
 		}
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2584 HBA EQ setup: queue[%d]-id=%d\n", qidx,
-				qp[qidx].hba_eq->queue_id);
 	}
 
+	/* Loop thru all Hardware Queues */
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			cpu = lpfc_find_cpu_handle(phba, qidx,
+						   LPFC_FIND_BY_HDWQ);
+			cpup = &phba->sli4_hba.cpu_map[cpu];
+
+			/* Create the CQ/WQ corresponding to the
+			 * Hardware Queue
+			 */
 			rc = lpfc_create_wq_cq(phba,
-					qp[qidx].hba_eq,
+					phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
 					qp[qidx].nvme_cq,
 					qp[qidx].nvme_wq,
 					&phba->sli4_hba.hdwq[qidx].nvme_cq_map,
@@ -9466,8 +9524,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	}
 
 	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+		cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ);
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Create the CQ/WQ corresponding to the Hardware Queue */
 		rc = lpfc_create_wq_cq(phba,
-				       qp[qidx].hba_eq,
+				       phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
 				       qp[qidx].fcp_cq,
 				       qp[qidx].fcp_wq,
 				       &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
@@ -9719,6 +9781,7 @@ void
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_queue *eq;
 	int qidx;
 
 	/* Unset mailbox command work queue */
@@ -9770,14 +9833,20 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 
 	/* Unset fast-path SLI4 queues */
 	if (phba->sli4_hba.hdwq) {
+		/* Loop thru all Hardware Queues */
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			/* Destroy the CQ/WQ corresponding to Hardware Queue */
 			qp = &phba->sli4_hba.hdwq[qidx];
 			lpfc_wq_destroy(phba, qp->fcp_wq);
 			lpfc_wq_destroy(phba, qp->nvme_wq);
 			lpfc_cq_destroy(phba, qp->fcp_cq);
 			lpfc_cq_destroy(phba, qp->nvme_cq);
-			if (qidx < phba->cfg_irq_chann)
-				lpfc_eq_destroy(phba, qp->hba_eq);
+		}
+		/* Loop thru all IRQ vectors */
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+			/* Destroy the EQ corresponding to the IRQ vector */
+			eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
+			lpfc_eq_destroy(phba, eq);
 		}
 	}
 
@@ -10567,11 +10636,12 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue
  * @phba: pointer to lpfc hba data structure.
  * @id: EQ vector index or Hardware Queue index
  * @match: LPFC_FIND_BY_EQ = match by EQ
  *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ * Return the CPU that matches the selection criteria
  */
 static uint16_t
 lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
@@ -10579,40 +10649,27 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
 	struct lpfc_vector_map_info *cpup;
 	int cpu;
 
-	/* Find the desired phys_id for the specified EQ */
+	/* Loop through all CPUs */
 	for_each_present_cpu(cpu) {
 		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* If we are matching by EQ, there may be multiple CPUs using
+		 * using the same vector, so select the one with
+		 * LPFC_CPU_FIRST_IRQ set.
+		 */
 		if ((match == LPFC_FIND_BY_EQ) &&
+		    (cpup->flag & LPFC_CPU_FIRST_IRQ) &&
 		    (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
 		    (cpup->eq == id))
 			return cpu;
+
+		/* If matching by HDWQ, select the first CPU that matches */
 		if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
 			return cpu;
 	}
 	return 0;
 }
 
-/**
- * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
- *                       Hardware Queue
- * @phba: pointer to lpfc hba data structure.
- * @hdwq: Hardware Queue index
- */
-static uint16_t
-lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
-{
-	struct lpfc_vector_map_info *cpup;
-	int cpu;
-
-	/* Find the desired phys_id for the specified EQ */
-	for_each_present_cpu(cpu) {
-		cpup = &phba->sli4_hba.cpu_map[cpu];
-		if (cpup->hdwq == hdwq)
-			return cpup->eq;
-	}
-	return 0;
-}
-
 #ifdef CONFIG_X86
 /**
  * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
@@ -10719,32 +10776,40 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 
 	/* This loop sets up all CPUs that are affinitized with a
 	 * irq vector assigned to the driver. All affinitized CPUs
-	 * will get a link to that vectors IRQ and EQ. For now we
-	 * are assuming all CPUs using the same EQ will all share
-	 * the same hardware queue.
+	 * will get a link to that vectors IRQ and EQ.
 	 */
 	for (idx = 0; idx <  phba->cfg_irq_chann; idx++) {
+		/* Get a CPU mask for all CPUs affinitized to this vector */
 		maskp = pci_irq_get_affinity(phba->pcidev, idx);
 		if (!maskp)
 			continue;
 
+		i = 0;
+		/* Loop through all CPUs associated with vector idx */
 		for_each_cpu_and(cpu, maskp, cpu_present_mask) {
+			/* Set the EQ index and IRQ for that vector */
 			cpup = &phba->sli4_hba.cpu_map[cpu];
 			cpup->eq = idx;
-			cpup->hdwq = idx;
 			cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 					"3336 Set Affinity: CPU %d "
-					"hdwq %d irq %d\n",
-					cpu, cpup->hdwq, cpup->irq);
+					"irq %d eq %d\n",
+					cpu, cpup->irq, cpup->eq);
+
+			/* If this is the first CPU thats assigned to this
+			 * vector, set LPFC_CPU_FIRST_IRQ.
+			 */
+			if (!i)
+				cpup->flag |= LPFC_CPU_FIRST_IRQ;
+			i++;
 		}
 	}
 
 	/* After looking at each irq vector assigned to this pcidev, its
 	 * possible to see that not ALL CPUs have been accounted for.
-	 * Next we will set any unassigned cpu map entries to a IRQ
-	 * on the same phys_id
+	 * Next we will set any unassigned (unaffinitized) cpu map
+	 * entries to a IRQ on the same phys_id.
 	 */
 	first_cpu = cpumask_first(cpu_present_mask);
 	start_cpu = first_cpu;
@@ -10757,7 +10822,7 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 			/* Mark CPU as IRQ not assigned by the kernel */
 			cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
 
-			/* If so, find a new_cpup thats on the the same
+			/* If so, find a new_cpup thats on the the SAME
 			 * phys_id as cpup. start_cpu will start where we
 			 * left off so all unassigned entries don't get assgined
 			 * the IRQ of the first entry.
@@ -10779,7 +10844,6 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 found_same:
 			/* We found a matching phys_id, so copy the IRQ info */
 			cpup->eq = new_cpup->eq;
-			cpup->hdwq = new_cpup->hdwq;
 			cpup->irq = new_cpup->irq;
 
 			/* Bump start_cpu to the next slot to minmize the
@@ -10790,12 +10854,11 @@ found_same:
 			if (start_cpu == nr_cpumask_bits)
 				start_cpu = first_cpu;
 
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 					"3337 Set Affinity: CPU %d "
-					"hdwq %d irq %d from id %d same "
+					"irq %d from id %d same "
 					"phys_id (%d)\n",
-					cpu, cpup->hdwq, cpup->irq,
-					new_cpu, cpup->phys_id);
+					cpu, cpup->irq, new_cpu, cpup->phys_id);
 		}
 	}
 
@@ -10810,7 +10873,7 @@ found_same:
 			/* Mark it as IRQ not assigned by the kernel */
 			cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
 
-			/* If so, find a new_cpup thats on any phys_id
+			/* If so, find a new_cpup thats on ANY phys_id
 			 * as the cpup. start_cpu will start where we
 			 * left off so all unassigned entries don't get
 			 * assigned the IRQ of the first entry.
@@ -10829,13 +10892,12 @@ found_same:
 			/* We should never leave an entry unassigned */
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"3339 Set Affinity: CPU %d "
-					"hdwq %d irq %d UNASSIGNED\n",
-					cpu, cpup->hdwq, cpup->irq);
+					"irq %d UNASSIGNED\n",
+					cpup->hdwq, cpup->irq);
 			continue;
 found_any:
 			/* We found an available entry, copy the IRQ info */
 			cpup->eq = new_cpup->eq;
-			cpup->hdwq = new_cpup->hdwq;
 			cpup->irq = new_cpup->irq;
 
 			/* Bump start_cpu to the next slot to minmize the
@@ -10846,13 +10908,82 @@ found_any:
 			if (start_cpu == nr_cpumask_bits)
 				start_cpu = first_cpu;
 
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 					"3338 Set Affinity: CPU %d "
-					"hdwq %d irq %d from id %d (%d/%d)\n",
-					cpu, cpup->hdwq, cpup->irq, new_cpu,
+					"irq %d from id %d (%d/%d)\n",
+					cpu, cpup->irq, new_cpu,
 					new_cpup->phys_id, new_cpup->core_id);
 		}
 	}
+
+	/* Finally we need to associate a hdwq with each cpu_map entry
+	 * This will be 1 to 1 - hdwq to cpu, unless there are less
+	 * hardware queues then CPUs. For that case we will just round-robin
+	 * the available hardware queues as they get assigned to CPUs.
+	 */
+	idx = 0;
+	start_cpu = 0;
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if (idx >=  phba->cfg_hdw_queue) {
+			/* We need to reuse a Hardware Queue for another CPU,
+			 * so be smart about it and pick one that has its
+			 * IRQ/EQ mapped to the same phys_id (CPU package).
+			 * and core_id.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+				    (new_cpup->phys_id == cpup->phys_id) &&
+				    (new_cpup->core_id == cpup->core_id))
+					goto found_hdwq;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+
+			/* If we can't match both phys_id and core_id,
+			 * settle for just a phys_id match.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+				    (new_cpup->phys_id == cpup->phys_id))
+					goto found_hdwq;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+
+			/* Otherwise just round robin on cfg_hdw_queue */
+			cpup->hdwq = idx % phba->cfg_hdw_queue;
+			goto logit;
+found_hdwq:
+			/* We found an available entry, copy the IRQ info */
+			start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+			if (start_cpu == nr_cpumask_bits)
+				start_cpu = first_cpu;
+			cpup->hdwq = new_cpup->hdwq;
+		} else {
+			/* 1 to 1, CPU to hdwq */
+			cpup->hdwq = idx;
+		}
+logit:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3335 Set Affinity: CPU %d (phys %d core %d): "
+				"hdwq %d eq %d irq %d flg x%x\n",
+				cpu, cpup->phys_id, cpup->core_id,
+				cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
+		idx++;
+	}
+
+	/* The cpu_map array will be used later during initialization
+	 * when EQ / CQ / WQs are allocated and configured.
+	 */
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 4432060f7315..d55259fc0af5 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -5548,6 +5548,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 	int qidx;
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_queue *eq;
 
 	sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
 	sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
@@ -5555,18 +5556,24 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 		sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
 					   LPFC_QUEUE_REARM);
 
-	qp = sli4_hba->hdwq;
 	if (sli4_hba->hdwq) {
+		/* Loop thru all Hardware Queues */
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-			sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+			qp = &sli4_hba->hdwq[qidx];
+			/* ARM the corresponding CQ */
+			sli4_hba->sli4_write_cq_db(phba, qp->fcp_cq, 0,
 						   LPFC_QUEUE_REARM);
-			sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+			sli4_hba->sli4_write_cq_db(phba, qp->nvme_cq, 0,
 						   LPFC_QUEUE_REARM);
 		}
 
-		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
-			sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
-						0, LPFC_QUEUE_REARM);
+		/* Loop thru all IRQ vectors */
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+			eq = sli4_hba->hba_eq_hdl[qidx].eq;
+			/* ARM the corresponding EQ */
+			sli4_hba->sli4_write_eq_db(phba, eq,
+						   0, LPFC_QUEUE_REARM);
+		}
 	}
 
 	if (phba->nvmet_support) {
@@ -7858,20 +7865,22 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	uint32_t eqidx;
 	struct lpfc_queue *fpeq = NULL;
+	struct lpfc_queue *eq;
 	bool mbox_pending;
 
 	if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
 		return false;
 
-	/* Find the eq associated with the mcq */
-
-	if (sli4_hba->hdwq)
-		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
-			if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
-			    sli4_hba->mbx_cq->assoc_qid) {
-				fpeq = sli4_hba->hdwq[eqidx].hba_eq;
+	/* Find the EQ associated with the mbox CQ */
+	if (sli4_hba->hdwq) {
+		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) {
+			eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq;
+			if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
+				fpeq = eq;
 				break;
 			}
+		}
+	}
 	if (!fpeq)
 		return false;
 
@@ -14217,7 +14226,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq;
+	fpeq = phba->sli4_hba.hba_eq_hdl[hba_eqidx].eq;
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
@@ -14502,7 +14511,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	/* set values by EQ_DELAY register if supported */
 	if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
 		for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
-			eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+			eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 			if (!eq)
 				continue;
 
@@ -14511,7 +14520,6 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 			if (++cnt >= numq)
 				break;
 		}
-
 		return;
 	}
 
@@ -14539,7 +14547,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 		dmult = LPFC_DMULT_MAX;
 
 	for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
-		eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+		eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 		if (!eq)
 			continue;
 		eq->q_mode = usdelay;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 69c6dba77dce..3aeca387b22a 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -452,6 +452,7 @@ struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_hba *phba;
+	struct lpfc_queue *eq;
 };
 
 /*BB Credit recovery value*/
@@ -552,6 +553,7 @@ struct lpfc_vector_map_info {
 	uint16_t	flag;
 #define LPFC_CPU_MAP_HYPER	0x1
 #define LPFC_CPU_MAP_UNASSIGN	0x2
+#define LPFC_CPU_FIRST_IRQ	0x4
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff