aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/lpfc/lpfc_nvmet.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_nvmet.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c279
1 files changed, 211 insertions, 68 deletions
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index bbbd0f84160d..0b7c1a49e203 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -170,12 +170,14 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct rqb_dmabuf *nvmebuf;
+ struct lpfc_nvmet_ctx_info *infop;
uint32_t *payload;
uint32_t size, oxid, sid, rc;
+ int cpu;
unsigned long iflag;
if (ctxp->txrdy) {
- pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+ dma_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
ctxp->txrdy_phys);
ctxp->txrdy = NULL;
ctxp->txrdy_phys = 0;
@@ -267,11 +269,16 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
}
spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
- spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_put_lock, iflag);
- list_add_tail(&ctx_buf->list,
- &phba->sli4_hba.lpfc_nvmet_ctx_put_list);
- phba->sli4_hba.nvmet_ctx_put_cnt++;
- spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_put_lock, iflag);
+ /*
+ * Use the CPU context list, from the MRQ the IO was received on
+ * (ctxp->idx), to save context structure.
+ */
+ cpu = smp_processor_id();
+ infop = lpfc_get_ctx_list(phba, cpu, ctxp->idx);
+ spin_lock_irqsave(&infop->nvmet_ctx_list_lock, iflag);
+ list_add_tail(&ctx_buf->list, &infop->nvmet_ctx_list);
+ infop->nvmet_ctx_list_cnt++;
+ spin_unlock_irqrestore(&infop->nvmet_ctx_list_lock, iflag);
#endif
}
@@ -552,7 +559,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
/* lpfc_nvmet_xmt_fcp_release() will recycle the context */
} else {
ctxp->entry_cnt++;
- start_clean = offsetof(struct lpfc_iocbq, wqe);
+ start_clean = offsetof(struct lpfc_iocbq, iocb_flag);
memset(((char *)cmdwqe) + start_clean, 0,
(sizeof(struct lpfc_iocbq) - start_clean));
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -879,51 +886,54 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
};
static void
-lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+__lpfc_nvmet_clean_io_for_cpu(struct lpfc_hba *phba,
+ struct lpfc_nvmet_ctx_info *infop)
{
struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf;
unsigned long flags;
- spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_get_lock, flags);
- spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock);
+ spin_lock_irqsave(&infop->nvmet_ctx_list_lock, flags);
list_for_each_entry_safe(ctx_buf, next_ctx_buf,
- &phba->sli4_hba.lpfc_nvmet_ctx_get_list, list) {
+ &infop->nvmet_ctx_list, list) {
spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
list_del_init(&ctx_buf->list);
spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
- __lpfc_clear_active_sglq(phba,
- ctx_buf->sglq->sli4_lxritag);
+
+ __lpfc_clear_active_sglq(phba, ctx_buf->sglq->sli4_lxritag);
ctx_buf->sglq->state = SGL_FREED;
ctx_buf->sglq->ndlp = NULL;
spin_lock(&phba->sli4_hba.sgl_list_lock);
list_add_tail(&ctx_buf->sglq->list,
- &phba->sli4_hba.lpfc_nvmet_sgl_list);
+ &phba->sli4_hba.lpfc_nvmet_sgl_list);
spin_unlock(&phba->sli4_hba.sgl_list_lock);
lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
kfree(ctx_buf->context);
}
- list_for_each_entry_safe(ctx_buf, next_ctx_buf,
- &phba->sli4_hba.lpfc_nvmet_ctx_put_list, list) {
- spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
- list_del_init(&ctx_buf->list);
- spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
- __lpfc_clear_active_sglq(phba,
- ctx_buf->sglq->sli4_lxritag);
- ctx_buf->sglq->state = SGL_FREED;
- ctx_buf->sglq->ndlp = NULL;
+ spin_unlock_irqrestore(&infop->nvmet_ctx_list_lock, flags);
+}
- spin_lock(&phba->sli4_hba.sgl_list_lock);
- list_add_tail(&ctx_buf->sglq->list,
- &phba->sli4_hba.lpfc_nvmet_sgl_list);
- spin_unlock(&phba->sli4_hba.sgl_list_lock);
+static void
+lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+{
+ struct lpfc_nvmet_ctx_info *infop;
+ int i, j;
- lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
- kfree(ctx_buf->context);
+ /* The first context list, MRQ 0 CPU 0 */
+ infop = phba->sli4_hba.nvmet_ctx_info;
+ if (!infop)
+ return;
+
+ /* Cycle the the entire CPU context list for every MRQ */
+ for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
+ for (j = 0; j < phba->sli4_hba.num_present_cpu; j++) {
+ __lpfc_nvmet_clean_io_for_cpu(phba, infop);
+ infop++; /* next */
+ }
}
- spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
- spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_get_lock, flags);
+ kfree(phba->sli4_hba.nvmet_ctx_info);
+ phba->sli4_hba.nvmet_ctx_info = NULL;
}
static int
@@ -932,15 +942,71 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
struct lpfc_nvmet_ctxbuf *ctx_buf;
struct lpfc_iocbq *nvmewqe;
union lpfc_wqe128 *wqe;
- int i;
+ struct lpfc_nvmet_ctx_info *last_infop;
+ struct lpfc_nvmet_ctx_info *infop;
+ int i, j, idx;
lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
"6403 Allocate NVMET resources for %d XRIs\n",
phba->sli4_hba.nvmet_xri_cnt);
+ phba->sli4_hba.nvmet_ctx_info = kcalloc(
+ phba->sli4_hba.num_present_cpu * phba->cfg_nvmet_mrq,
+ sizeof(struct lpfc_nvmet_ctx_info), GFP_KERNEL);
+ if (!phba->sli4_hba.nvmet_ctx_info) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "6419 Failed allocate memory for "
+ "nvmet context lists\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Assuming X CPUs in the system, and Y MRQs, allocate some
+ * lpfc_nvmet_ctx_info structures as follows:
+ *
+ * cpu0/mrq0 cpu1/mrq0 ... cpuX/mrq0
+ * cpu0/mrq1 cpu1/mrq1 ... cpuX/mrq1
+ * ...
+ * cpuX/mrqY cpuX/mrqY ... cpuX/mrqY
+ *
+ * Each line represents a MRQ "silo" containing an entry for
+ * every CPU.
+ *
+ * MRQ X is initially assumed to be associated with CPU X, thus
+ * contexts are initially distributed across all MRQs using
+ * the MRQ index (N) as follows cpuN/mrqN. When contexts are
+ * freed, the are freed to the MRQ silo based on the CPU number
+ * of the IO completion. Thus a context that was allocated for MRQ A
+ * whose IO completed on CPU B will be freed to cpuB/mrqA.
+ */
+ infop = phba->sli4_hba.nvmet_ctx_info;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
+ INIT_LIST_HEAD(&infop->nvmet_ctx_list);
+ spin_lock_init(&infop->nvmet_ctx_list_lock);
+ infop->nvmet_ctx_list_cnt = 0;
+ infop++;
+ }
+ }
+
+ /*
+ * Setup the next CPU context info ptr for each MRQ.
+ * MRQ 0 will cycle thru CPUs 0 - X separately from
+ * MRQ 1 cycling thru CPUs 0 - X, and so on.
+ */
+ for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
+ last_infop = lpfc_get_ctx_list(phba, 0, j);
+ for (i = phba->sli4_hba.num_present_cpu - 1; i >= 0; i--) {
+ infop = lpfc_get_ctx_list(phba, i, j);
+ infop->nvmet_ctx_next_cpu = last_infop;
+ last_infop = infop;
+ }
+ }
+
/* For all nvmet xris, allocate resources needed to process a
* received command on a per xri basis.
*/
+ idx = 0;
for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
if (!ctx_buf) {
@@ -977,7 +1043,6 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
/* Word 7 */
bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
- bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
/* Word 10 */
bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
@@ -995,12 +1060,35 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
"6407 Ran out of NVMET XRIs\n");
return -ENOMEM;
}
- spin_lock(&phba->sli4_hba.nvmet_ctx_get_lock);
- list_add_tail(&ctx_buf->list,
- &phba->sli4_hba.lpfc_nvmet_ctx_get_list);
- spin_unlock(&phba->sli4_hba.nvmet_ctx_get_lock);
+
+ /*
+ * Add ctx to MRQidx context list. Our initial assumption
+ * is MRQidx will be associated with CPUidx. This association
+ * can change on the fly.
+ */
+ infop = lpfc_get_ctx_list(phba, idx, idx);
+ spin_lock(&infop->nvmet_ctx_list_lock);
+ list_add_tail(&ctx_buf->list, &infop->nvmet_ctx_list);
+ infop->nvmet_ctx_list_cnt++;
+ spin_unlock(&infop->nvmet_ctx_list_lock);
+
+ /* Spread ctx structures evenly across all MRQs */
+ idx++;
+ if (idx >= phba->cfg_nvmet_mrq)
+ idx = 0;
+ }
+
+ infop = phba->sli4_hba.nvmet_ctx_info;
+ for (j = 0; j < phba->cfg_nvmet_mrq; j++) {
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
+ "6408 TOTAL NVMET ctx for CPU %d "
+ "MRQ %d: cnt %d nextcpu %p\n",
+ i, j, infop->nvmet_ctx_list_cnt,
+ infop->nvmet_ctx_next_cpu);
+ infop++;
+ }
}
- phba->sli4_hba.nvmet_ctx_get_cnt = phba->sli4_hba.nvmet_xri_cnt;
return 0;
}
@@ -1365,10 +1453,65 @@ dropit:
#endif
}
+static struct lpfc_nvmet_ctxbuf *
+lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
+ struct lpfc_nvmet_ctx_info *current_infop)
+{
+ struct lpfc_nvmet_ctxbuf *ctx_buf = NULL;
+ struct lpfc_nvmet_ctx_info *get_infop;
+ int i;
+
+ /*
+ * The current_infop for the MRQ a NVME command IU was received
+ * on is empty. Our goal is to replenish this MRQs context
+ * list from a another CPUs.
+ *
+ * First we need to pick a context list to start looking on.
+ * nvmet_ctx_start_cpu has available context the last time
+ * we needed to replenish this CPU where nvmet_ctx_next_cpu
+ * is just the next sequential CPU for this MRQ.
+ */
+ if (current_infop->nvmet_ctx_start_cpu)
+ get_infop = current_infop->nvmet_ctx_start_cpu;
+ else
+ get_infop = current_infop->nvmet_ctx_next_cpu;
+
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ if (get_infop == current_infop) {
+ get_infop = get_infop->nvmet_ctx_next_cpu;
+ continue;
+ }
+ spin_lock(&get_infop->nvmet_ctx_list_lock);
+
+ /* Just take the entire context list, if there are any */
+ if (get_infop->nvmet_ctx_list_cnt) {
+ list_splice_init(&get_infop->nvmet_ctx_list,
+ &current_infop->nvmet_ctx_list);
+ current_infop->nvmet_ctx_list_cnt =
+ get_infop->nvmet_ctx_list_cnt - 1;
+ get_infop->nvmet_ctx_list_cnt = 0;
+ spin_unlock(&get_infop->nvmet_ctx_list_lock);
+
+ current_infop->nvmet_ctx_start_cpu = get_infop;
+ list_remove_head(&current_infop->nvmet_ctx_list,
+ ctx_buf, struct lpfc_nvmet_ctxbuf,
+ list);
+ return ctx_buf;
+ }
+
+ /* Otherwise, move on to the next CPU for this MRQ */
+ spin_unlock(&get_infop->nvmet_ctx_list_lock);
+ get_infop = get_infop->nvmet_ctx_next_cpu;
+ }
+
+ /* Nothing found, all contexts for the MRQ are in-flight */
+ return NULL;
+}
+
/**
* lpfc_nvmet_unsol_fcp_buffer - Process an unsolicited event data buffer
* @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
+ * @idx: relative index of MRQ vector
* @nvmebuf: pointer to lpfc nvme command HBQ data structure.
*
* This routine is used for processing the WQE associated with a unsolicited
@@ -1380,22 +1523,26 @@ dropit:
**/
static void
lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
- struct lpfc_sli_ring *pring,
+ uint32_t idx,
struct rqb_dmabuf *nvmebuf,
uint64_t isr_timestamp)
{
-#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_nvmet_rcv_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct lpfc_nvmet_ctxbuf *ctx_buf;
+ struct lpfc_nvmet_ctx_info *current_infop;
uint32_t *payload;
uint32_t size, oxid, sid, rc, qno;
unsigned long iflag;
+ int current_cpu;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint32_t id;
#endif
+ if (!IS_ENABLED(CONFIG_NVME_TARGET_FC))
+ return;
+
ctx_buf = NULL;
if (!nvmebuf || !phba->targetport) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -1407,31 +1554,24 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
goto dropit;
}
- spin_lock_irqsave(&phba->sli4_hba.nvmet_ctx_get_lock, iflag);
- if (phba->sli4_hba.nvmet_ctx_get_cnt) {
- list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_get_list,
+ /*
+ * Get a pointer to the context list for this MRQ based on
+ * the CPU this MRQ IRQ is associated with. If the CPU association
+ * changes from our initial assumption, the context list could
+ * be empty, thus it would need to be replenished with the
+ * context list from another CPU for this MRQ.
+ */
+ current_cpu = smp_processor_id();
+ current_infop = lpfc_get_ctx_list(phba, current_cpu, idx);
+ spin_lock_irqsave(&current_infop->nvmet_ctx_list_lock, iflag);
+ if (current_infop->nvmet_ctx_list_cnt) {
+ list_remove_head(&current_infop->nvmet_ctx_list,
ctx_buf, struct lpfc_nvmet_ctxbuf, list);
- phba->sli4_hba.nvmet_ctx_get_cnt--;
+ current_infop->nvmet_ctx_list_cnt--;
} else {
- spin_lock(&phba->sli4_hba.nvmet_ctx_put_lock);
- if (phba->sli4_hba.nvmet_ctx_put_cnt) {
- list_splice(&phba->sli4_hba.lpfc_nvmet_ctx_put_list,
- &phba->sli4_hba.lpfc_nvmet_ctx_get_list);
- INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_put_list);
- phba->sli4_hba.nvmet_ctx_get_cnt =
- phba->sli4_hba.nvmet_ctx_put_cnt;
- phba->sli4_hba.nvmet_ctx_put_cnt = 0;
- spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
-
- list_remove_head(
- &phba->sli4_hba.lpfc_nvmet_ctx_get_list,
- ctx_buf, struct lpfc_nvmet_ctxbuf, list);
- phba->sli4_hba.nvmet_ctx_get_cnt--;
- } else {
- spin_unlock(&phba->sli4_hba.nvmet_ctx_put_lock);
- }
+ ctx_buf = lpfc_nvmet_replenish_context(phba, current_infop);
}
- spin_unlock_irqrestore(&phba->sli4_hba.nvmet_ctx_get_lock, iflag);
+ spin_unlock_irqrestore(&current_infop->nvmet_ctx_list_lock, iflag);
fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
oxid = be16_to_cpu(fc_hdr->fh_ox_id);
@@ -1483,6 +1623,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
ctxp->size = size;
ctxp->oxid = oxid;
ctxp->sid = sid;
+ ctxp->idx = idx;
ctxp->state = LPFC_NVMET_STE_RCV;
ctxp->entry_cnt = 1;
ctxp->flag = 0;
@@ -1556,7 +1697,6 @@ dropit:
if (nvmebuf)
lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
-#endif
}
/**
@@ -1591,7 +1731,7 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
/**
* lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport
* @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
+ * @idx: relative index of MRQ vector
* @nvmebuf: pointer to received nvme data structure.
*
* This routine is used to process an unsolicited event received from a SLI
@@ -1602,7 +1742,7 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
**/
void
lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
- struct lpfc_sli_ring *pring,
+ uint32_t idx,
struct rqb_dmabuf *nvmebuf,
uint64_t isr_timestamp)
{
@@ -1610,7 +1750,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
return;
}
- lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
+ lpfc_nvmet_unsol_fcp_buffer(phba, idx, nvmebuf,
isr_timestamp);
}
@@ -1863,6 +2003,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
nvmewqe->sli4_xritag);
/* Word 7 */
+ bf_set(wqe_pu, &wqe->fcp_tsend.wqe_com, 1);
bf_set(wqe_cmnd, &wqe->fcp_tsend.wqe_com, CMD_FCP_TSEND64_WQE);
/* Word 8 */
@@ -1939,7 +2080,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
case NVMET_FCOP_WRITEDATA:
/* Words 0 - 2 : The first sg segment */
- txrdy = pci_pool_alloc(phba->txrdy_payload_pool,
+ txrdy = dma_pool_alloc(phba->txrdy_payload_pool,
GFP_KERNEL, &physaddr);
if (!txrdy) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -1971,6 +2112,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
nvmewqe->sli4_xritag);
/* Word 7 */
+ bf_set(wqe_pu, &wqe->fcp_treceive.wqe_com, 1);
bf_set(wqe_ar, &wqe->fcp_treceive.wqe_com, 0);
bf_set(wqe_cmnd, &wqe->fcp_treceive.wqe_com,
CMD_FCP_TRECEIVE64_WQE);
@@ -2054,6 +2196,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
nvmewqe->sli4_xritag);
/* Word 7 */
+ bf_set(wqe_pu, &wqe->fcp_trsp.wqe_com, 0);
bf_set(wqe_ag, &wqe->fcp_trsp.wqe_com, 1);
bf_set(wqe_cmnd, &wqe->fcp_trsp.wqe_com, CMD_FCP_TRSP64_WQE);