From 1cf24dcef4e1dd0c34d8c39b09a9ce9a01accc72 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 6 Aug 2013 21:04:35 +0530 Subject: RDMA/cxgb4: Fix QP flush logic This patch makes following fixes in QP flush logic: - correctly flushes unsignaled WRs followed by a signaled WR - supports for flushing a CQ bound to multiple QPs - resets cidx_flush if a active queue starts getting HW CQEs again - marks WQ in error when we leave RTS. This was only being done for user queues, but we need it for kernel queues too so that post_send/post_recv will start returning the appropriate error synchronously - eats unsignaled read resp CQEs. HW always inserts CQEs so we must silently discard them if the read work request was unsignaled. - handles QP flushes with pending SW CQEs. The flush and out of order completion logic has a bug where if out of order completions are flushed but not yet polled by the consumer and the qp is then flushed then we end up inserting duplicate completions. - c4iw_flush_sq() should only flush wrs that have not already been flushed. Since we already track where in the SQ we've flushed via sq.cidx_flush, just start at that point and flush any remaining. This bug only caused a problem in the presence of unsignaled work requests. Signed-off-by: Steve Wise Signed-off-by: Vipul Pandya [ Fixed sparse warning due to htonl/ntohl confusion. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 326 +++++++++++++++++++++++++-------------- 1 file changed, 210 insertions(+), 116 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/cq.c') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 0f1607c8325a..6657390de956 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -225,43 +225,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, t4_swcq_produce(cq); } -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp) { int flushed = 0; - struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; - int in_use = wq->sq.in_use - count; - - BUG_ON(in_use < 0); - while (in_use--) { - swsqe->signaled = 0; - insert_sq_cqe(wq, cq, swsqe); - swsqe++; - if (swsqe == (wq->sq.sw_sq + wq->sq.size)) - swsqe = wq->sq.sw_sq; - flushed++; + struct t4_wq *wq = &qhp->wq; + struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; + int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING && + qhp->attr.state != C4IW_QP_STATE_IDLE); + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + BUG_ON(idx >= wq->sq.size); + while (idx != wq->sq.pidx) { + if (error) { + swsqe = &wq->sq.sw_sq[idx]; + BUG_ON(swsqe->flushed); + swsqe->flushed = 1; + insert_sq_cqe(wq, cq, swsqe); + if (wq->sq.oldest_read == swsqe) { + BUG_ON(swsqe->opcode != FW_RI_READ_REQ); + advance_oldest_read(wq); + } + flushed++; + } else { + t4_sq_consume(wq); + } + if (++idx == wq->sq.size) + idx = 0; } + wq->sq.flush_cidx += flushed; + if (wq->sq.flush_cidx >= wq->sq.size) + wq->sq.flush_cidx -= wq->sq.size; return flushed; } +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + int cidx; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + cidx = wq->sq.flush_cidx; + BUG_ON(cidx > wq->sq.size); + + while (cidx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[cidx]; + if (!swsqe->signaled) { + if (++cidx == wq->sq.size) + cidx = 0; + } else if (swsqe->complete) { + + BUG_ON(swsqe->flushed); + + /* + * Insert this completed cqe into the swcq. + */ + PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", + __func__, cidx, cq->sw_pidx); + swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->flushed = 1; + if (++cidx == wq->sq.size) + cidx = 0; + wq->sq.flush_cidx = cidx; + } else + break; + } +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | + V_CQE_SWCQE(SW_CQE(hw_cqe)) | + V_CQE_OPCODE(FW_RI_READ_REQ) | + V_CQE_TYPE(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + /* * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct t4_cq *cq) +void c4iw_flush_hw_cq(struct c4iw_cq *chp) { - struct t4_cqe *cqe = NULL, *swcqe; + struct t4_cqe *hw_cqe, *swcqe, read_cqe; + struct c4iw_qp *qhp; + struct t4_swsqe *swsqe; int ret; - PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - ret = t4_next_hw_cqe(cq, &cqe); + PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + + /* + * This logic is similar to poll_cq(), but not quite the same + * unfortunately. Need to move pertinent HW CQEs to the SW CQ but + * also do any translation magic that poll_cq() normally does. + */ while (!ret) { - PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n", - __func__, cq->cidx, cq->sw_pidx); - swcqe = &cq->sw_queue[cq->sw_pidx]; - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - t4_swcq_produce(cq); - t4_hwcq_consume(cq); - ret = t4_next_hw_cqe(cq, &cqe); + qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + + /* + * drop CQEs with no associated QP + */ + if (qhp == NULL) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + + /* + * drop peer2peer RTR reads. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) + goto next_cqe; + + /* + * Eat completions for unsignaled read WRs. + */ + if (!qhp->wq.sq.oldest_read->signaled) { + advance_oldest_read(&qhp->wq); + goto next_cqe; + } + + /* + * Don't write to the HWCQ, create a new read req CQE + * in local memory and move it into the swcq. + */ + create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(&qhp->wq); + } + + /* if its a SQ completion, then do the magic to move all the + * unsignaled and now in-order completions into the swcq. + */ + if (SQ_TYPE(hw_cqe)) { + swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + flush_completed_wrs(&qhp->wq, &chp->cq); + } else { + swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; + *swcqe = *hw_cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(&chp->cq); + } +next_cqe: + t4_hwcq_consume(&chp->cq); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); } } @@ -281,25 +424,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) return 1; } -void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) -{ - struct t4_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_cidx; - while (ptr != cq->sw_pidx) { - cqe = &cq->sw_queue[ptr]; - if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && - wq->sq.oldest_read)) && - (CQE_QPID(cqe) == wq->sq.qid)) - (*count)++; - if (++ptr == cq->size) - ptr = 0; - } - PDBG("%s cq %p count %d\n", __func__, cq, *count); -} - void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) { struct t4_cqe *cqe; @@ -319,70 +443,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) PDBG("%s cq %p count %d\n", __func__, cq, *count); } -static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) -{ - struct t4_swsqe *swsqe; - u16 ptr = wq->sq.cidx; - int count = wq->sq.in_use; - int unsignaled = 0; - - swsqe = &wq->sq.sw_sq[ptr]; - while (count--) - if (!swsqe->signaled) { - if (++ptr == wq->sq.size) - ptr = 0; - swsqe = &wq->sq.sw_sq[ptr]; - unsignaled++; - } else if (swsqe->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", - __func__, ptr, cq->sw_pidx); - swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); - cq->sw_queue[cq->sw_pidx] = swsqe->cqe; - t4_swcq_produce(cq); - swsqe->signaled = 0; - wq->sq.in_use -= unsignaled; - break; - } else - break; -} - -static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, - struct t4_cqe *read_cqe) -{ - read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; - read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | - V_CQE_SWCQE(SW_CQE(hw_cqe)) | - V_CQE_OPCODE(FW_RI_READ_REQ) | - V_CQE_TYPE(1)); - read_cqe->bits_type_ts = hw_cqe->bits_type_ts; -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t4_wq *wq) -{ - - u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; - - if (rptr == wq->sq.size) - rptr = 0; - while (rptr != wq->sq.pidx) { - wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; - - if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) - return; - if (++rptr == wq->sq.size) - rptr = 0; - } - wq->sq.oldest_read = NULL; -} - /* * poll_cq * @@ -426,6 +486,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, goto skip_cqe; } + /* + * skip hw cqe's if the wq is flushed. + */ + if (wq->flushed && !SW_CQE(hw_cqe)) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip TERMINATE cqes... + */ + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + /* * Gotta tweak READ completions: * 1) the cqe doesn't contain the sq_wptr from the wr. @@ -440,13 +516,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ - if (!wq->sq.oldest_read) { + if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) t4_set_wq_in_error(wq); ret = -EAGAIN; goto skip_cqe; } + /* + * Eat completions for unsignaled read WRs. + */ + if (!wq->sq.oldest_read->signaled) { + advance_oldest_read(wq); + ret = -EAGAIN; + goto skip_cqe; + } + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. @@ -457,14 +542,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { - *cqe_flushed = t4_wq_in_error(wq); + *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); t4_set_wq_in_error(wq); - goto proc_cqe; - } - - if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { - ret = -EAGAIN; - goto skip_cqe; } /* @@ -523,7 +602,21 @@ proc_cqe: * completion. */ if (SQ_TYPE(hw_cqe)) { - wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); + int idx = CQE_WRID_SQ_IDX(hw_cqe); + BUG_ON(idx > wq->sq.size); + + /* + * Account for any unsignaled completions completed by + * this signaled completion. In this case, cidx points + * to the first unsignaled one, and idx points to the + * signaled one. So adjust in_use based on this delta. + * if this is not completing any unsigned wrs, then the + * delta will be 0. + */ + wq->sq.in_use -= idx - wq->sq.cidx; + BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size); + + wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); @@ -532,6 +625,7 @@ proc_cqe: *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); t4_rq_consume(wq); + goto skip_cqe; } flush_wq: -- cgit v1.2.3-59-g8ed1b