aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw/rxe/rxe_req.c
diff options
context:
space:
mode:
authorBob Pearson <rpearsonhpe@gmail.com>2022-06-30 14:04:22 -0500
committerJason Gunthorpe <jgg@nvidia.com>2022-07-22 17:43:00 -0300
commit445fd4f4fb76d513de6b05b08b3a4d0bb980fc80 (patch)
treea3ca2186041f8be3d10592944b8b537e9d513d98 /drivers/infiniband/sw/rxe/rxe_req.c
parentRDMA/rxe: Add rxe_is_fenced() subroutine (diff)
downloadlinux-dev-445fd4f4fb76d513de6b05b08b3a4d0bb980fc80.tar.xz
linux-dev-445fd4f4fb76d513de6b05b08b3a4d0bb980fc80.zip
RDMA/rxe: Fix rnr retry behavior
Currently the completer tasklet when retransmit timer or the rnr timer fires the same flag (qp->req.need_retry) is set so that if either timer fires it will attempt to perform a retry flow on the send queue. This has the effect of responding to an RNR NAK at the first retransmit timer event which might not allow the requested rnr timeout. This patch adds a new flag (qp->req.wait_for_rnr_timer) which, if set, prevents a retry flow until the rnr nak timer fires. This patch fixes rnr retry errors which can be observed by running the pyverbs test_rdmacm_async_traffic_external_qp multiple times. With this patch applied they do not occur. Link: https://lore.kernel.org/linux-rdma/a8287823-1408-4273-bc22-99a0678db640@gmail.com/ Link: https://lore.kernel.org/linux-rdma/2bafda9e-2bb6-186d-12a1-179e8f6a2678@talpey.com/ Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20220630190425.2251-6-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_req.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c15
1 files changed, 13 insertions, 2 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 12a4a47ed969..f33699a094e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -100,7 +100,11 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+ pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+ /* request a send queue retry */
+ qp->req.need_retry = 1;
+ qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1);
}
@@ -641,10 +645,17 @@ next_wqe:
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
goto exit;
}
- if (unlikely(qp->req.need_retry)) {
+ /* we come here if the retransmot timer has fired
+ * or if the rnr timer has fired. If the retransmit
+ * timer fires while we are processing an RNR NAK wait
+ * until the rnr timer has fired before starting the
+ * retry flow
+ */
+ if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
req_retry(qp);
qp->req.need_retry = 0;
}