aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
authorJianxin Xiong <jianxin.xiong@intel.com>2016-07-25 13:39:45 -0700
committerDoug Ledford <dledford@redhat.com>2016-08-02 16:00:58 -0400
commitd9b13c203003cfb78c1f216049a204d385ccaeff (patch)
tree9df5767ede1a28416cb31e8df3f9ab5eeca21f85 /drivers/infiniband/hw/hfi1
parentIB/hfi1: Add the capability for reserved operations (diff)
downloadlinux-dev-d9b13c203003cfb78c1f216049a204d385ccaeff.tar.xz
linux-dev-d9b13c203003cfb78c1f216049a204d385ccaeff.zip
IB/rdmavt, hfi1: Fix NFSoRDMA failure with FRMR enabled
Hanging has been observed while writing a file over NFSoRDMA. Dmesg on the server contains messages like these: [ 931.992501] svcrdma: Error -22 posting RDMA_READ [ 952.076879] svcrdma: Error -22 posting RDMA_READ [ 982.154127] svcrdma: Error -22 posting RDMA_READ [ 1012.235884] svcrdma: Error -22 posting RDMA_READ [ 1042.319194] svcrdma: Error -22 posting RDMA_READ Here is why: With the base memory management extension enabled, FRMR is used instead of FMR. The xprtrdma server issues each RDMA read request as the following bundle: (1)IB_WR_REG_MR, signaled; (2)IB_WR_RDMA_READ, signaled; (3)IB_WR_LOCAL_INV, signaled & fencing. These requests are signaled. In order to generate completion, the fast register work request is processed by the hfi1 send engine after being posted to the work queue, and the corresponding lkey is not valid until the request is processed. However, the rdmavt driver validates lkey when the RDMA read request is posted and thus it fails immediately with error -EINVAL (-22). This patch changes the work flow of local operations (fast register and local invalidate) so that fast register work requests are always processed immediately to ensure that the corresponding lkey is valid when subsequent work requests are posted. Local invalidate requests are processed immediately if fencing is not required and no previous local invalidate request is pending. To allow completion generation for signaled local operations that have been processed before posting to the work queue, an internal send flag RVT_SEND_COMPLETION_ONLY is added. The hfi1 send engine checks this flag and only generates completion for such requests. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c17
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c13
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c15
3 files changed, 23 insertions, 22 deletions
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 0bc43b67d0b8..5da190e6011b 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -402,7 +402,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
char newreq;
int middle = 0;
int delta;
- int err;
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
@@ -484,25 +483,27 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
*/
if (wqe->wr.opcode == IB_WR_REG_MR ||
wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
if (qp->s_last != qp->s_cur)
goto bail;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
if (++qp->s_tail == qp->s_size)
qp->s_tail = 0;
- if (wqe->wr.opcode == IB_WR_REG_MR)
- err = rvt_fast_reg_mr(
- qp, wqe->reg_wr.mr,
- wqe->reg_wr.key,
- wqe->reg_wr.access);
- else
+ if (!(wqe->wr.send_flags &
+ RVT_SEND_COMPLETION_ONLY)) {
err = rvt_invalidate_rkey(
qp,
wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
hfi1_send_complete(qp, wqe,
err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
- atomic_dec(&qp->local_ops_pending);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
qp->s_hdrwords = 0;
goto done_free_tx;
}
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 76b9c9e42d86..7e76d33a5774 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -442,16 +442,15 @@ again:
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_REG_MR:
- if (rvt_fast_reg_mr(sqp, wqe->reg_wr.mr, wqe->reg_wr.key,
- wqe->reg_wr.access))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
goto send_comp;
case IB_WR_LOCAL_INV:
- if (rvt_invalidate_rkey(sqp, wqe->wr.ex.invalidate_rkey))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ if (rvt_invalidate_rkey(sqp,
+ wqe->wr.ex.invalidate_rkey))
+ send_status = IB_WC_LOC_PROT_ERR;
+ local_ops = 1;
+ }
goto send_comp;
case IB_WR_SEND_WITH_INV:
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index ef6c96cd3d68..a726d96d185f 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -77,7 +77,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
u32 len;
u32 pmtu = qp->pmtu;
int middle = 0;
- int err;
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
@@ -125,20 +124,22 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
*/
if (wqe->wr.opcode == IB_WR_REG_MR ||
wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
if (qp->s_last != qp->s_cur)
goto bail;
if (++qp->s_cur == qp->s_size)
qp->s_cur = 0;
- if (wqe->wr.opcode == IB_WR_REG_MR)
- err = rvt_fast_reg_mr(qp, wqe->reg_wr.mr,
- wqe->reg_wr.key,
- wqe->reg_wr.access);
- else
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
err = rvt_invalidate_rkey(
qp, wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
- atomic_dec(&qp->local_ops_pending);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
qp->s_hdrwords = 0;
goto done_free_tx;
}