netfs: Speed up buffered reading

Improve the efficiency of buffered reads in a number of ways: (1) Overhaul the algorithm in general so that it's a lot more compact and split the read submission code between buffered and unbuffered versions. The unbuffered version can be vastly simplified. (2) Read-result collection is handed off to a work queue rather than being done in the I/O thread. Multiple subrequests can be processes simultaneously. (3) When a subrequest is collected, any folios it fully spans are collected and "spare" data on either side is donated to either the previous or the next subrequest in the sequence. Notes: (*) Readahead expansion is massively slows down fio, presumably because it causes a load of extra allocations, both folio and xarray, up front before RPC requests can be transmitted. (*) RDMA with cifs does appear to work, both with SIW and RXE. (*) PG_private_2-based reading and copy-to-cache is split out into its own file and altered to use folio_queue. Note that the copy to the cache now creates a new write transaction against the cache and adds the folios to be copied into it. This allows it to use part of the writeback I/O code. Signed-off-by: David Howells <dhowells@redhat.com> cc: Jeff Layton <jlayton@kernel.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
author: David Howells <dhowells@redhat.com> 2024-07-02 00:40:22 +0100
committer: Christian Brauner <brauner@kernel.org> 2024-09-12 12:20:41 +0200
commit: ee4cdf7ba857a894ad1650d6ab77669cbbfa329e (patch)
tree: 8258e3b756adf109085d66a8b63cd08db03abad0 /fs/netfs/read_pgpriv2.c
parent: afs: Make read subreqs async (diff)
download: wireguard-linux-ee4cdf7ba857a894ad1650d6ab77669cbbfa329e.tar.xz
wireguard-linux-ee4cdf7ba857a894ad1650d6ab77669cbbfa329e.zip
1 files changed, 264 insertions, 0 deletions
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
new file mode 100644
index 000000000000..9439461d535f
--- /dev/null
+++ b/fs/netfs/read_pgpriv2.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Read with PG_private_2 [DEPRECATED].
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/task_io_accounting_ops.h>
+#include "internal.h"
+
+/*
+ * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2.  The
+ * third mark in the folio queue is used to indicate that this folio needs
+ * writing.
+ */
+void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
+				      struct netfs_io_request *rreq,
+				      struct folio_queue *folioq,
+				      int slot)
+{
+	struct folio *folio = folioq_folio(folioq, slot);
+
+	trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+	folio_start_private_2(folio);
+	folioq_mark3(folioq, slot);
+}
+
+/*
+ * [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
+ * unrecoverable error.
+ */
+static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
+{
+	struct folio *folio;
+	int slot;
+
+	while (folioq) {
+		if (!folioq->marks3) {
+			folioq = folioq->next;
+			continue;
+		}
+
+		slot = __ffs(folioq->marks3);
+		folio = folioq_folio(folioq, slot);
+
+		trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
+		folio_end_private_2(folio);
+		folioq_unmark3(folioq, slot);
+	}
+}
+
+/*
+ * [DEPRECATED] Copy a folio to the cache with PG_private_2 set.
+ */
+static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio)
+{
+	struct netfs_io_stream *cache  = &wreq->io_streams[1];
+	size_t fsize = folio_size(folio), flen = fsize;
+	loff_t fpos = folio_pos(folio), i_size;
+	bool to_eof = false;
+
+	_enter("");
+
+	/* netfs_perform_write() may shift i_size around the page or from out
+	 * of the page to beyond it, but cannot move i_size into or through the
+	 * page since we have it locked.
+	 */
+	i_size = i_size_read(wreq->inode);
+
+	if (fpos >= i_size) {
+		/* mmap beyond eof. */
+		_debug("beyond eof");
+		folio_end_private_2(folio);
+		return 0;
+	}
+
+	if (fpos + fsize > wreq->i_size)
+		wreq->i_size = i_size;
+
+	if (flen > i_size - fpos) {
+		flen = i_size - fpos;
+		to_eof = true;
+	} else if (flen == i_size - fpos) {
+		to_eof = true;
+	}
+
+	_debug("folio %zx %zx", flen, fsize);
+
+	trace_netfs_folio(folio, netfs_folio_trace_store_copy);
+
+	/* Attach the folio to the rolling buffer. */
+	if (netfs_buffer_append_folio(wreq, folio, false) < 0)
+		return -ENOMEM;
+
+	cache->submit_max_len = fsize;
+	cache->submit_off = 0;
+	cache->submit_len = flen;
+
+	/* Attach the folio to one or more subrequests.  For a big folio, we
+	 * could end up with thousands of subrequests if the wsize is small -
+	 * but we might need to wait during the creation of subrequests for
+	 * network resources (eg. SMB credits).
+	 */
+	do {
+		ssize_t part;
+
+		wreq->io_iter.iov_offset = cache->submit_off;
+
+		atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
+		part = netfs_advance_write(wreq, cache, fpos + cache->submit_off,
+					   cache->submit_len, to_eof);
+		cache->submit_off += part;
+		cache->submit_max_len -= part;
+		if (part > cache->submit_len)
+			cache->submit_len = 0;
+		else
+			cache->submit_len -= part;
+	} while (cache->submit_len > 0);
+
+	wreq->io_iter.iov_offset = 0;
+	iov_iter_advance(&wreq->io_iter, fsize);
+	atomic64_set(&wreq->issued_to, fpos + fsize);
+
+	if (flen < fsize)
+		netfs_issue_write(wreq, cache);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * [DEPRECATED] Go through the buffer and write any folios that are marked with
+ * the third mark to the cache.
+ */
+void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
+{
+	struct netfs_io_request *wreq;
+	struct folio_queue *folioq;
+	struct folio *folio;
+	int error = 0;
+	int slot = 0;
+
+	_enter("");
+
+	if (!fscache_resources_valid(&rreq->cache_resources))
+		goto couldnt_start;
+
+	/* Need the first folio to be able to set up the op. */
+	for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
+		if (folioq->marks3) {
+			slot = __ffs(folioq->marks3);
+			break;
+		}
+	}
+	if (!folioq)
+		return;
+	folio = folioq_folio(folioq, slot);
+
+	wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
+				      NETFS_PGPRIV2_COPY_TO_CACHE);
+	if (IS_ERR(wreq)) {
+		kleave(" [create %ld]", PTR_ERR(wreq));
+		goto couldnt_start;
+	}
+
+	trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
+	netfs_stat(&netfs_n_wh_copy_to_cache);
+
+	for (;;) {
+		error = netfs_pgpriv2_copy_folio(wreq, folio);
+		if (error < 0)
+			break;
+
+		folioq_unmark3(folioq, slot);
+		if (!folioq->marks3) {
+			folioq = folioq->next;
+			if (!folioq)
+				break;
+		}
+
+		slot = __ffs(folioq->marks3);
+		folio = folioq_folio(folioq, slot);
+	}
+
+	netfs_issue_write(wreq, &wreq->io_streams[1]);
+	smp_wmb(); /* Write lists before ALL_QUEUED. */
+	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+	_leave(" = %d", error);
+couldnt_start:
+	netfs_pgpriv2_cancel(rreq->buffer);
+}
+
+/*
+ * [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished
+ * copying.
+ */
+bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
+{
+	struct folio_queue *folioq = wreq->buffer;
+	unsigned long long collected_to = wreq->collected_to;
+	unsigned int slot = wreq->buffer_head_slot;
+	bool made_progress = false;
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = netfs_delete_buffer_head(wreq);
+		slot = 0;
+	}
+
+	for (;;) {
+		struct folio *folio;
+		unsigned long long fpos, fend;
+		size_t fsize, flen;
+
+		folio = folioq_folio(folioq, slot);
+		if (WARN_ONCE(!folio_test_private_2(folio),
+			      "R=%08x: folio %lx is not marked private_2\n",
+			      wreq->debug_id, folio->index))
+			trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
+
+		fpos = folio_pos(folio);
+		fsize = folio_size(folio);
+		flen = fsize;
+
+		fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
+
+		trace_netfs_collect_folio(wreq, folio, fend, collected_to);
+
+		/* Unlock any folio we've transferred all of. */
+		if (collected_to < fend)
+			break;
+
+		trace_netfs_folio(folio, netfs_folio_trace_end_copy);
+		folio_end_private_2(folio);
+		wreq->cleaned_to = fpos + fsize;
+		made_progress = true;
+
+		/* Clean up the head folioq.  If we clear an entire folioq, then
+		 * we can get rid of it provided it's not also the tail folioq
+		 * being filled by the issuer.
+		 */
+		folioq_clear(folioq, slot);
+		slot++;
+		if (slot >= folioq_nr_slots(folioq)) {
+			if (READ_ONCE(wreq->buffer_tail) == folioq)
+				break;
+			folioq = netfs_delete_buffer_head(wreq);
+			slot = 0;
+		}
+
+		if (fpos + fsize >= collected_to)
+			break;
+	}
+
+	wreq->buffer = folioq;
+	wreq->buffer_head_slot = slot;
+	return made_progress;
+}
author	David Howells <dhowells@redhat.com>	2024-07-02 00:40:22 +0100
committer	Christian Brauner <brauner@kernel.org>	2024-09-12 12:20:41 +0200
commit	ee4cdf7ba857a894ad1650d6ab77669cbbfa329e (patch)
tree	8258e3b756adf109085d66a8b63cd08db03abad0 /fs/netfs/read_pgpriv2.c
parent	afs: Make read subreqs async (diff)
download	wireguard-linux-ee4cdf7ba857a894ad1650d6ab77669cbbfa329e.tar.xz wireguard-linux-ee4cdf7ba857a894ad1650d6ab77669cbbfa329e.zip