diff options
author | Steve Wise <swise@opengridcomputing.com> | 2016-12-22 07:04:59 -0800 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2017-01-10 14:01:38 -0500 |
commit | 4fe7c2962e110dfd58e61888514726aac419562f (patch) | |
tree | a526b5b26a1cbb848545446cfa627f427410da2d /drivers/infiniband/hw/cxgb4/qp.c | |
parent | Linux 4.10-rc3 (diff) | |
download | linux-dev-4fe7c2962e110dfd58e61888514726aac419562f.tar.xz linux-dev-4fe7c2962e110dfd58e61888514726aac419562f.zip |
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/qp.c')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 112 |
1 files changed, 68 insertions, 44 deletions
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cda5542e13a2..31ab4512f827 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -776,6 +776,64 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; } +static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *schp; + unsigned long flag; + struct t4_cq *cq; + + schp = to_c4iw_cq(qhp->ibqp.send_cq); + cq = &schp->cq; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_TYPE_V(1) | + CQE_SWCQE_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&schp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&schp->lock, flag); + + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); +} + +static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *rchp; + unsigned long flag; + struct t4_cq *cq; + + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + cq = &rchp->cq; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_TYPE_V(0) | + CQE_SWCQE_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&rchp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&rchp->lock, flag); + + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -794,8 +852,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); - *bad_wr = wr; - return -EINVAL; + complete_sq_drain_wr(qhp, wr); + return err; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { @@ -937,8 +995,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); - *bad_wr = wr; - return -EINVAL; + complete_rq_drain_wr(qhp, wr); + return err; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { @@ -1550,7 +1608,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_CLOSING: - if (!internal) { + + /* + * Allow kernel users to move to ERROR for qp draining. + */ + if (!internal && (qhp->ibqp.uobject || attrs->next_state != + C4IW_QP_STATE_ERROR)) { ret = -EINVAL; goto out; } @@ -1763,8 +1826,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); - init_completion(&qhp->sq_drained); - init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); kref_init(&qhp->kref); @@ -1958,40 +2019,3 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } - -static void move_qp_to_err(struct c4iw_qp *qp) -{ - struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR }; - - (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); -} - -void c4iw_drain_sq(struct ib_qp *ibqp) -{ - struct c4iw_qp *qp = to_c4iw_qp(ibqp); - unsigned long flag; - bool need_to_wait; - - move_qp_to_err(qp); - spin_lock_irqsave(&qp->lock, flag); - need_to_wait = !t4_sq_empty(&qp->wq); - spin_unlock_irqrestore(&qp->lock, flag); - - if (need_to_wait) - wait_for_completion(&qp->sq_drained); -} - -void c4iw_drain_rq(struct ib_qp *ibqp) -{ - struct c4iw_qp *qp = to_c4iw_qp(ibqp); - unsigned long flag; - bool need_to_wait; - - move_qp_to_err(qp); - spin_lock_irqsave(&qp->lock, flag); - need_to_wait = !t4_rq_empty(&qp->wq); - spin_unlock_irqrestore(&qp->lock, flag); - - if (need_to_wait) - wait_for_completion(&qp->rq_drained); -} |