aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-28 12:21:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-28 12:21:12 -0700
commit2c26b68cd5c51689b8cee9cb6a21abb5d2ab2d0f (patch)
treebecf130707976d0104b33e66666aae0bf5de9a4d /net
parentMerge tag 'ext4_for_linus-6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 (diff)
parentxdrgen: Fix code generated for counted arrays (diff)
downloadwireguard-linux-2c26b68cd5c51689b8cee9cb6a21abb5d2ab2d0f.tar.xz
wireguard-linux-2c26b68cd5c51689b8cee9cb6a21abb5d2ab2d0f.zip
Merge tag 'nfsd-6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: "The marquee feature for this release is that the limit on the maximum rsize and wsize has been raised to 4MB. The default remains at 1MB, but risk-seeking administrators now have the ability to try larger I/O sizes with NFS clients that support them. Eventually the default setting will be increased when we have confidence that this change will not have negative impact. With v6.16, NFSD now has its own debugfs file system where we can add experimental features and make them available outside of our development community without impacting production deployments. The first experimental setting added is one that makes all NFS READ operations use vfs_iter_read() instead of the NFSD splice actor. The plan is to eventually retire the splice actor, as that will enable a number of new capabilities such as the use of struct bio_vec from the top to the bottom of the NFSD stack. Jeff Layton contributed a number of observability improvements. The use of dprintk() in a number of high-traffic code paths has been replaced with static trace points. This release sees the continuation of efforts to harden the NFSv4.2 COPY operation. Soon, the restriction on async COPY operations can be lifted. Many thanks to the contributors, reviewers, testers, and bug reporters who participated during the v6.16 development cycle" * tag 'nfsd-6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (60 commits) xdrgen: Fix code generated for counted arrays SUNRPC: Bump the maximum payload size for the server NFSD: Add a "default" block size NFSD: Remove NFSSVC_MAXBLKSIZE_V2 macro NFSD: Remove NFSD_BUFSIZE sunrpc: Remove the RPCSVC_MAXPAGES macro svcrdma: Adjust the number of entries in svc_rdma_send_ctxt::sc_pages svcrdma: Adjust the number of entries in svc_rdma_recv_ctxt::rc_pages sunrpc: Adjust size of socket's receive page array dynamically SUNRPC: Remove svc_rqst :: rq_vec SUNRPC: Remove svc_fill_write_vector() NFSD: Use rqstp->rq_bvec in nfsd_iter_write() SUNRPC: Export xdr_buf_to_bvec() NFSD: De-duplicate the svc_fill_write_vector() call sites NFSD: Use rqstp->rq_bvec in nfsd_iter_read() sunrpc: Replace the rq_bvec array with dynamically-allocated memory sunrpc: Replace the rq_pages array with dynamically-allocated memory sunrpc: Remove backchannel check in svc_init_buffer() sunrpc: Add a helper to derive maxpages from sv_max_mesg svcrdma: Reduce the number of rdma_rw contexts per-QP ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/cache.c17
-rw-r--r--net/sunrpc/svc.c80
-rw-r--r--net/sunrpc/svc_xprt.c11
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xdr.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c15
9 files changed, 74 insertions, 93 deletions
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7ce5e28a6c03..131090f31e6a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -135,6 +135,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
hlist_add_head_rcu(&new->cache_list, head);
detail->entries++;
+ if (detail->nextcheck > new->expiry_time)
+ detail->nextcheck = new->expiry_time + 1;
cache_get(new);
spin_unlock(&detail->hash_lock);
@@ -462,24 +464,21 @@ static int cache_clean(void)
}
}
+ spin_lock(&current_detail->hash_lock);
+
/* find a non-empty bucket in the table */
- while (current_detail &&
- current_index < current_detail->hash_size &&
+ while (current_index < current_detail->hash_size &&
hlist_empty(&current_detail->hash_table[current_index]))
current_index++;
/* find a cleanable entry in the bucket and clean it, or set to next bucket */
-
- if (current_detail && current_index < current_detail->hash_size) {
+ if (current_index < current_detail->hash_size) {
struct cache_head *ch = NULL;
struct cache_detail *d;
struct hlist_head *head;
struct hlist_node *tmp;
- spin_lock(&current_detail->hash_lock);
-
/* Ok, now to clean this strand */
-
head = &current_detail->hash_table[current_index];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
if (current_detail->nextcheck > ch->expiry_time)
@@ -500,8 +499,10 @@ static int cache_clean(void)
spin_unlock(&cache_list_lock);
if (ch)
sunrpc_end_cache_remove_entry(ch, d);
- } else
+ } else {
+ spin_unlock(&current_detail->hash_lock);
spin_unlock(&cache_list_lock);
+ }
return rv;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e7f9c295d13c..939b6239df8a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -636,24 +636,22 @@ svc_destroy(struct svc_serv **servp)
EXPORT_SYMBOL_GPL(svc_destroy);
static bool
-svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
+svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
{
- unsigned long pages, ret;
+ unsigned long ret;
- /* bc_xprt uses fore channel allocated buffers */
- if (svc_is_backchannel(rqstp))
- return true;
+ rqstp->rq_maxpages = svc_serv_maxpages(serv);
- pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
- * We assume one is at most one page
- */
- WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
- if (pages > RPCSVC_MAXPAGES)
- pages = RPCSVC_MAXPAGES;
+ /* rq_pages' last entry is NULL for historical reasons. */
+ rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL, node);
+ if (!rqstp->rq_pages)
+ return false;
- ret = alloc_pages_bulk_node(GFP_KERNEL, node, pages,
+ ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages,
rqstp->rq_pages);
- return ret == pages;
+ return ret == rqstp->rq_maxpages;
}
/*
@@ -662,17 +660,19 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
static void
svc_release_buffer(struct svc_rqst *rqstp)
{
- unsigned int i;
+ unsigned long i;
- for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
+ for (i = 0; i < rqstp->rq_maxpages; i++)
if (rqstp->rq_pages[i])
put_page(rqstp->rq_pages[i]);
+ kfree(rqstp->rq_pages);
}
static void
svc_rqst_free(struct svc_rqst *rqstp)
{
folio_batch_release(&rqstp->rq_fbatch);
+ kfree(rqstp->rq_bvec);
svc_release_buffer(rqstp);
if (rqstp->rq_scratch_page)
put_page(rqstp->rq_scratch_page);
@@ -708,7 +708,13 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp->rq_resp)
goto out_enomem;
- if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
+ if (!svc_init_buffer(rqstp, serv, node))
+ goto out_enomem;
+
+ rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages,
+ sizeof(struct bio_vec),
+ GFP_KERNEL, node);
+ if (!rqstp->rq_bvec)
goto out_enomem;
rqstp->rq_err = -EAGAIN; /* No error yet */
@@ -900,7 +906,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
{
struct page **begin = rqstp->rq_pages;
- struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES];
+ struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages];
if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) {
trace_svc_replace_page_err(rqstp);
@@ -1714,46 +1720,6 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset,
EXPORT_SYMBOL_GPL(svc_encode_result_payload);
/**
- * svc_fill_write_vector - Construct data argument for VFS write call
- * @rqstp: svc_rqst to operate on
- * @payload: xdr_buf containing only the write data payload
- *
- * Fills in rqstp::rq_vec, and returns the number of elements.
- */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
- struct xdr_buf *payload)
-{
- struct page **pages = payload->pages;
- struct kvec *first = payload->head;
- struct kvec *vec = rqstp->rq_vec;
- size_t total = payload->len;
- unsigned int i;
-
- /* Some types of transport can present the write payload
- * entirely in rq_arg.pages. In this case, @first is empty.
- */
- i = 0;
- if (first->iov_len) {
- vec[i].iov_base = first->iov_base;
- vec[i].iov_len = min_t(size_t, total, first->iov_len);
- total -= vec[i].iov_len;
- ++i;
- }
-
- while (total) {
- vec[i].iov_base = page_address(*pages);
- vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
- total -= vec[i].iov_len;
- ++i;
- ++pages;
- }
-
- WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
- return i;
-}
-EXPORT_SYMBOL_GPL(svc_fill_write_vector);
-
-/**
* svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
* @rqstp: svc_rqst to operate on
* @first: buffer containing first section of pathname
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index ae25405d8bd2..cb14d6ddac6c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -488,6 +488,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
pool = svc_pool_for_cpu(xprt->xpt_server);
percpu_counter_inc(&pool->sp_sockets_queued);
+ xprt->xpt_qtime = ktime_get();
lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts);
svc_pool_wake_idle_thread(pool);
@@ -651,18 +652,10 @@ static void svc_check_conn_limits(struct svc_serv *serv)
static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled, ret;
- pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
- if (pages > RPCSVC_MAXPAGES) {
- pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
- pages, RPCSVC_MAXPAGES);
- /* use as many pages as possible */
- pages = RPCSVC_MAXPAGES;
- }
-
+ pages = rqstp->rq_maxpages;
for (filled = 0; filled < pages; filled = ret) {
ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages);
if (ret > filled)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 72e5a01df3d3..e1c85123b445 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -713,8 +713,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- count = xdr_buf_to_bvec(rqstp->rq_bvec,
- ARRAY_SIZE(rqstp->rq_bvec), xdr);
+ count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
count, rqstp->rq_res.len);
@@ -1219,8 +1218,8 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
memcpy(buf, &marker, sizeof(marker));
bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
- count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
- ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+ count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
+ &rqstp->rq_res);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
@@ -1340,7 +1339,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_marker = xdr_zero;
svsk->sk_tcplen = 0;
svsk->sk_datalen = 0;
- memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
+ memset(&svsk->sk_pages[0], 0,
+ svsk->sk_maxpages * sizeof(struct page *));
tcp_sock_set_nodelay(sk);
@@ -1379,10 +1379,13 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+ unsigned long pages;
- svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
+ pages = svc_serv_maxpages(serv);
+ svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
+ svsk->sk_maxpages = pages;
inet = sock->sk;
@@ -1542,7 +1545,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
if (protocol == IPPROTO_TCP) {
sk_net_refcnt_upgrade(sock->sk);
- if ((error = kernel_listen(sock, 64)) < 0)
+ if ((error = kernel_listen(sock, SOMAXCONN)) < 0)
goto bummer;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4e003cb516fe..2ea00e354ba6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -213,6 +213,7 @@ bvec_overflow:
pr_warn_once("%s: bio_vec array overflow\n", __func__);
return count - 1;
}
+EXPORT_SYMBOL_GPL(xdr_buf_to_bvec);
/**
* xdr_inline_pages - Prepare receive buffer for a large reply
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 292022f0976e..e7e4a39ca6c6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -120,12 +120,16 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt;
+ unsigned long pages;
dma_addr_t addr;
void *buffer;
- ctxt = kzalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
+ pages = svc_serv_maxpages(rdma->sc_xprt.xpt_server);
+ ctxt = kzalloc_node(struct_size(ctxt, rc_pages, pages),
+ GFP_KERNEL, node);
if (!ctxt)
goto fail0;
+ ctxt->rc_maxpages = pages;
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
@@ -497,7 +501,7 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
* a computation, perform a simple range check. This is an
* arbitrary but sensible limit (ie, not architectural).
*/
- if (unlikely(segcount > RPCSVC_MAXPAGES))
+ if (unlikely(segcount > rctxt->rc_maxpages))
return false;
p = xdr_inline_decode(&rctxt->rc_stream,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 40797114d50a..661b3fe2779f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -765,7 +765,7 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
}
len -= seg_len;
- if (len && ((head->rc_curpage + 1) > ARRAY_SIZE(rqstp->rq_pages)))
+ if (len && ((head->rc_curpage + 1) > rqstp->rq_maxpages))
goto out_overrun;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 96154a2367a1..914cd263c2f1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -118,6 +118,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt;
+ unsigned long pages;
dma_addr_t addr;
void *buffer;
int i;
@@ -126,13 +127,19 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
GFP_KERNEL, node);
if (!ctxt)
goto fail0;
+ pages = svc_serv_maxpages(rdma->sc_xprt.xpt_server);
+ ctxt->sc_pages = kcalloc_node(pages, sizeof(struct page *),
+ GFP_KERNEL, node);
+ if (!ctxt->sc_pages)
+ goto fail1;
+ ctxt->sc_maxpages = pages;
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
- goto fail1;
+ goto fail2;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
rdma->sc_max_req_size, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
- goto fail2;
+ goto fail3;
svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
@@ -151,8 +158,10 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey;
return ctxt;
-fail2:
+fail3:
kfree(buffer);
+fail2:
+ kfree(ctxt->sc_pages);
fail1:
kfree(ctxt);
fail0:
@@ -176,6 +185,7 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
rdma->sc_max_req_size,
DMA_TO_DEVICE);
kfree(ctxt->sc_xprt_buf);
+ kfree(ctxt->sc_pages);
kfree(ctxt);
}
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index aca8bdf65d72..3d7f1413df02 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -406,12 +406,12 @@ static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
*/
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
{
+ unsigned int ctxts, rq_depth, maxpayload;
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
- unsigned int ctxts, rq_depth;
struct ib_device *dev;
int ret = 0;
RPC_IFDEBUG(struct sockaddr *sap);
@@ -462,12 +462,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_bc_requests = 2;
}
- /* Arbitrarily estimate the number of rw_ctxs needed for
- * this transport. This is enough rw_ctxs to make forward
- * progress even if the client is using one rkey per page
- * in each Read chunk.
+ /* Arbitrary estimate of the needed number of rdma_rw contexts.
*/
- ctxts = 3 * RPCSVC_MAXPAGES;
+ maxpayload = min(xprt->xpt_server->sv_max_payload,
+ RPCSVC_MAXPAYLOAD_RDMA);
+ ctxts = newxprt->sc_max_requests * 3 *
+ rdma_rw_mr_factor(dev, newxprt->sc_port_num,
+ maxpayload >> PAGE_SHIFT);
+
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@@ -575,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
ib_destroy_qp(newxprt->sc_qp);
rdma_destroy_id(newxprt->sc_cm_id);
+ rpcrdma_rn_unregister(dev, &newxprt->sc_rn);
/* This call to put will destroy the transport */
svc_xprt_put(&newxprt->sc_xprt);
return NULL;