aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:22:20 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:22:20 -0800
commit99f1a5872b706094ece117368170a92c66b2e242 (patch)
tree0cac82e658e4ed5acd43db63ef3a381c42f4d4d4 /net
parentMerge tag 'erofs-for-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs (diff)
parentnfsd: skip some unnecessary stats in the v4 case (diff)
downloadlinux-dev-99f1a5872b706094ece117368170a92c66b2e242.tar.xz
linux-dev-99f1a5872b706094ece117368170a92c66b2e242.zip
Merge tag 'nfsd-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: - Update NFSv2 and NFSv3 XDR decoding functions - Further improve support for re-exporting NFS mounts - Convert NFSD stats to per-CPU counters - Add batch Receive posting to the server's RPC/RDMA transport * tag 'nfsd-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (65 commits) nfsd: skip some unnecessary stats in the v4 case nfs: use change attribute for NFS re-exports NFSv4_2: SSC helper should use its own config. nfsd: cstate->session->se_client -> cstate->clp nfsd: simplify nfsd4_check_open_reclaim nfsd: remove unused set_client argument nfsd: find_cpntf_state cleanup nfsd: refactor set_client nfsd: rename lookup_clientid->set_client nfsd: simplify nfsd_renew nfsd: simplify process_lock nfsd4: simplify process_lookup1 SUNRPC: Correct a comment svcrdma: DMA-sync the receive buffer in svc_rdma_recvfrom() svcrdma: Reduce Receive doorbell rate svcrdma: Deprecate stat variables that are no longer used svcrdma: Restore read and write stats svcrdma: Convert rdma_stat_sq_starve to a per-CPU counter svcrdma: Convert rdma_stat_recv to a per-CPU counter svcrdma: Refactor svc_rdma_init() and svc_rdma_clean_up() ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c198
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c91
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c2
5 files changed, 175 insertions, 121 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4187745887f0..61fb8a18552c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -559,7 +559,7 @@ EXPORT_SYMBOL_GPL(svc_destroy);
/*
* Allocate an RPC server's buffer space.
- * We allocate pages and place them in rq_argpages.
+ * We allocate pages and place them in rq_pages.
*/
static int
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 526da5d4710b..5bc20e9d09cd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -62,51 +62,47 @@ static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH;
static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH;
static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH;
+static unsigned int svcrdma_stat_unused;
+static unsigned int zero;
-atomic_t rdma_stat_recv;
-atomic_t rdma_stat_read;
-atomic_t rdma_stat_write;
-atomic_t rdma_stat_sq_starve;
-atomic_t rdma_stat_rq_starve;
-atomic_t rdma_stat_rq_poll;
-atomic_t rdma_stat_rq_prod;
-atomic_t rdma_stat_sq_poll;
-atomic_t rdma_stat_sq_prod;
+struct percpu_counter svcrdma_stat_read;
+struct percpu_counter svcrdma_stat_recv;
+struct percpu_counter svcrdma_stat_sq_starve;
+struct percpu_counter svcrdma_stat_write;
-/*
- * This function implements reading and resetting an atomic_t stat
- * variable through read/write to a proc file. Any write to the file
- * resets the associated statistic to zero. Any read returns it's
- * current value.
- */
-static int read_reset_stat(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+enum {
+ SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long),
+};
+
+static int svcrdma_counter_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
- atomic_t *stat = (atomic_t *)table->data;
-
- if (!stat)
- return -EINVAL;
-
- if (write)
- atomic_set(stat, 0);
- else {
- char str_buf[32];
- int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
- if (len >= 32)
- return -EFAULT;
- len = strlen(str_buf);
- if (*ppos > len) {
- *lenp = 0;
- return 0;
- }
- len -= *ppos;
- if (len > *lenp)
- len = *lenp;
- if (len)
- memcpy(buffer, str_buf, len);
- *lenp = len;
- *ppos += len;
+ struct percpu_counter *stat = (struct percpu_counter *)table->data;
+ char tmp[SVCRDMA_COUNTER_BUFSIZ + 1];
+ int len;
+
+ if (write) {
+ percpu_counter_set(stat, 0);
+ return 0;
}
+
+ len = snprintf(tmp, SVCRDMA_COUNTER_BUFSIZ, "%lld\n",
+ percpu_counter_sum_positive(stat));
+ if (len >= SVCRDMA_COUNTER_BUFSIZ)
+ return -EFAULT;
+ len = strlen(tmp);
+ if (*ppos > len) {
+ *lenp = 0;
+ return 0;
+ }
+ len -= *ppos;
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ memcpy(buffer, tmp, len);
+ *lenp = len;
+ *ppos += len;
+
return 0;
}
@@ -142,66 +138,76 @@ static struct ctl_table svcrdma_parm_table[] = {
{
.procname = "rdma_stat_read",
- .data = &rdma_stat_read,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_read,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_recv",
- .data = &rdma_stat_recv,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_recv,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_write",
- .data = &rdma_stat_write,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_write,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_sq_starve",
- .data = &rdma_stat_sq_starve,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_sq_starve,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_rq_starve",
- .data = &rdma_stat_rq_starve,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_rq_poll",
- .data = &rdma_stat_rq_poll,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_rq_prod",
- .data = &rdma_stat_rq_prod,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_sq_poll",
- .data = &rdma_stat_sq_poll,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_sq_prod",
- .data = &rdma_stat_sq_prod,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{ },
};
@@ -224,27 +230,69 @@ static struct ctl_table svcrdma_root_table[] = {
{ },
};
+static void svc_rdma_proc_cleanup(void)
+{
+ if (!svcrdma_table_header)
+ return;
+ unregister_sysctl_table(svcrdma_table_header);
+ svcrdma_table_header = NULL;
+
+ percpu_counter_destroy(&svcrdma_stat_write);
+ percpu_counter_destroy(&svcrdma_stat_sq_starve);
+ percpu_counter_destroy(&svcrdma_stat_recv);
+ percpu_counter_destroy(&svcrdma_stat_read);
+}
+
+static int svc_rdma_proc_init(void)
+{
+ int rc;
+
+ if (svcrdma_table_header)
+ return 0;
+
+ rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+
+ svcrdma_table_header = register_sysctl_table(svcrdma_root_table);
+ return 0;
+
+out_err:
+ percpu_counter_destroy(&svcrdma_stat_sq_starve);
+ percpu_counter_destroy(&svcrdma_stat_recv);
+ percpu_counter_destroy(&svcrdma_stat_read);
+ return rc;
+}
+
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
- if (svcrdma_table_header) {
- unregister_sysctl_table(svcrdma_table_header);
- svcrdma_table_header = NULL;
- }
svc_unreg_xprt_class(&svc_rdma_class);
+ svc_rdma_proc_cleanup();
}
int svc_rdma_init(void)
{
+ int rc;
+
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
- if (!svcrdma_table_header)
- svcrdma_table_header =
- register_sysctl_table(svcrdma_root_table);
+ rc = svc_rdma_proc_init();
+ if (rc)
+ return rc;
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index cbdb71247755..6d28f23ceb35 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -266,33 +266,46 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
-static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
- struct svc_rdma_recv_ctxt *ctxt)
+static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
+ unsigned int wanted, bool temp)
{
+ const struct ib_recv_wr *bad_wr = NULL;
+ struct svc_rdma_recv_ctxt *ctxt;
+ struct ib_recv_wr *recv_chain;
int ret;
- trace_svcrdma_post_recv(ctxt);
- ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
+ recv_chain = NULL;
+ while (wanted--) {
+ ctxt = svc_rdma_recv_ctxt_get(rdma);
+ if (!ctxt)
+ break;
+
+ trace_svcrdma_post_recv(ctxt);
+ ctxt->rc_temp = temp;
+ ctxt->rc_recv_wr.next = recv_chain;
+ recv_chain = &ctxt->rc_recv_wr;
+ rdma->sc_pending_recvs++;
+ }
+ if (!recv_chain)
+ return false;
+
+ ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
goto err_post;
- return 0;
+ return true;
err_post:
- trace_svcrdma_rq_post_err(rdma, ret);
- svc_rdma_recv_ctxt_put(rdma, ctxt);
- return ret;
-}
-
-static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
-{
- struct svc_rdma_recv_ctxt *ctxt;
+ while (bad_wr) {
+ ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
+ rc_recv_wr);
+ bad_wr = bad_wr->next;
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return 0;
- ctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!ctxt)
- return -ENOMEM;
- return __svc_rdma_post_recv(rdma, ctxt);
+ trace_svcrdma_rq_post_err(rdma, ret);
+ /* Since we're destroying the xprt, no need to reset
+ * sc_pending_recvs. */
+ return false;
}
/**
@@ -303,20 +316,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
- struct svc_rdma_recv_ctxt *ctxt;
- unsigned int i;
- int ret;
-
- for (i = 0; i < rdma->sc_max_requests; i++) {
- ctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!ctxt)
- return false;
- ctxt->rc_temp = true;
- ret = __svc_rdma_post_recv(rdma, ctxt);
- if (ret)
- return false;
- }
- return true;
+ return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
}
/**
@@ -324,8 +324,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
* @cq: Completion Queue context
* @wc: Work Completion object
*
- * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
- * the Receive completion handler could be running.
*/
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
@@ -333,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;
+ rdma->sc_pending_recvs--;
+
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
@@ -340,14 +340,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto flushed;
- if (svc_rdma_post_recv(rdma))
- goto post_err;
-
/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
- ctxt->rc_recv_sge.addr,
- wc->byte_len, DMA_FROM_DEVICE);
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
@@ -356,11 +350,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
+
+ if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
+ rdma->sc_pending_recvs < rdma->sc_max_requests)
+ if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
+ false))
+ goto post_err;
+
return;
flushed:
-post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
+post_err:
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
@@ -845,9 +846,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+ percpu_counter_inc(&svcrdma_stat_recv);
- atomic_inc(&rdma_stat_recv);
-
+ ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
+ ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
+ DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
/* Prevent svc_xprt_release from releasing pages in rq_pages
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 0b63e1321d74..693d139a8633 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -364,6 +364,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
return 0;
}
+ percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
@@ -468,6 +469,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
DMA_TO_DEVICE);
if (ret < 0)
return -EIO;
+ percpu_counter_inc(&svcrdma_stat_write);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
@@ -718,6 +720,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
+ percpu_counter_inc(&svcrdma_stat_read);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 68af79d4f04f..52c759a8543e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -317,7 +317,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
- atomic_inc(&rdma_stat_sq_starve);
+ percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma);
atomic_inc(&rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,