aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/svc_rdma_sendto.c
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2016-09-13 10:52:50 -0400
committerJ. Bruce Fields <bfields@redhat.com>2016-09-23 10:18:52 -0400
commitcace564f8b6260e806f5e28d7f192fd0e0c603ed (patch)
treee20368a823dcfb07faeea2d9bdfb21c6dbd43358 /net/sunrpc/xprtrdma/svc_rdma_sendto.c
parentnfsd: fix dprintk in nfsd4_encode_getdeviceinfo (diff)
downloadlinux-dev-cace564f8b6260e806f5e28d7f192fd0e0c603ed.tar.xz
linux-dev-cace564f8b6260e806f5e28d7f192fd0e0c603ed.zip
svcrdma: Tail iovec leaves an orphaned DMA mapping
The ctxt's count field is overloaded to mean the number of pages in the ctxt->page array and the number of SGEs in the ctxt->sge array. Typically these two numbers are the same. However, when an inline RPC reply is constructed from an xdr_buf with a tail iovec, the head and tail often occupy the same page, but each are DMA mapped independently. In that case, ->count equals the number of pages, but it does not equal the number of SGEs. There's one more SGE, for the tail iovec. Hence there is one more DMA mapping than there are pages in the ctxt->page array. This isn't a real problem until the server's iommu is enabled. Then each RPC reply that has content in that iovec orphans a DMA mapping that consists of real resources. krb5i and krb5p always populate that tail iovec. After a couple million sent krb5i/p RPC replies, the NFS server starts behaving erratically. Reboot is needed to clear the problem. Fixes: 9d11b51ce7c1 ("svcrdma: Fix send_reply() scatter/gather set-up") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/svc_rdma_sendto.c')
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c22
1 files changed, 4 insertions, 18 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 54d533300620..e5b49e62420c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -280,7 +280,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
@@ -489,7 +489,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->direction = DMA_TO_DEVICE;
@@ -505,7 +505,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
@@ -523,23 +523,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
- /*
- * If there are more pages than SGE, terminate SGE
- * list so that svc_rdma_unmap_dma doesn't attempt to
- * unmap garbage.
- */
- if (page_no+1 >= sge_no)
- ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* The loop above bumps sc_dma_used for each sge. The
- * xdr_buf.tail gets a separate sge, but resides in the
- * same page as xdr_buf.head. Don't count it twice.
- */
- if (sge_no > ctxt->count)
- atomic_dec(&rdma->sc_dma_used);
-
if (sge_no > rdma->sc_max_sge) {
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err;
@@ -692,7 +678,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
svc_rdma_put_context(ctxt, 1);
return;
}
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
/* Prepare SEND WR */
memset(&err_wr, 0, sizeof(err_wr));