From 06a17bbe1d47fec6232505c355b367797f6a635c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Oct 2020 09:39:04 +0000 Subject: afs: Fix copy_file_range() The prevention of splice-write without explicit ops made the copy_file_write() syscall to an afs file (as done by the generic/112 xfstest) fail with EINVAL. Fix by using iter_file_splice_write() for afs. Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Signed-off-by: David Howells Reviewed-by: Christoph Hellwig --- fs/afs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/file.c b/fs/afs/file.c index 371d1488cc54..91225421ad37 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -33,6 +33,7 @@ const struct file_operations afs_file_operations = { .write_iter = afs_file_write, .mmap = afs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = afs_fsync, .lock = afs_lock, .flock = afs_flock, -- cgit v1.2.3-59-g8ed1b From acc080d15dde820bd39eb55a04f9a09c7ef52e67 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Oct 2020 10:42:56 +0000 Subject: afs: Fix tracing deref-before-check The patch dca54a7bbb8c: "afs: Add tracing for cell refcount and active user count" from Oct 13, 2020, leads to the following Smatch complaint: fs/afs/cell.c:596 afs_unuse_cell() warn: variable dereferenced before check 'cell' (see line 592) Fix this by moving the retrieval of the cell debug ID to after the check of the validity of the cell pointer. Reported-by: Dan Carpenter Fixes: dca54a7bbb8c ("afs: Add tracing for cell refcount and active user count") Signed-off-by: David Howells cc: Dan Carpenter --- fs/afs/cell.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 52233fa6195f..887b673f6223 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -589,7 +589,7 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) */ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) { - unsigned int debug_id = cell->debug_id; + unsigned int debug_id; time64_t now, expire_delay; int u, a; @@ -604,6 +604,7 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; + debug_id = cell->debug_id; u = atomic_read(&cell->ref); a = atomic_dec_return(&cell->active); trace_afs_cell(debug_id, u, a, reason); -- cgit v1.2.3-59-g8ed1b From 248c944e2159de4868bef558feea40214aaf8464 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Aug 2020 11:58:12 +0300 Subject: afs: Fix a use after free in afs_xattr_get_acl() The "op" pointer is freed earlier when we call afs_put_operation(). Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Signed-off-by: Dan Carpenter Signed-off-by: David Howells cc: Colin Ian King --- fs/afs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 84f3c4f57531..38884d6c57cd 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -85,7 +85,7 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, if (acl->size <= size) memcpy(buffer, acl->data, acl->size); else - op->error = -ERANGE; + ret = -ERANGE; } } -- cgit v1.2.3-59-g8ed1b From d383e346f97d6bb0d654bb3d63c44ab106d92d29 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:40:31 +0100 Subject: afs: Fix afs_launder_page to not clear PG_writeback Fix afs_launder_page() to not clear PG_writeback on the page it is laundering as the flag isn't set in this case. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/internal.h | 1 + fs/afs/write.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 81b0485fd22a..289f5dffa46f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -812,6 +812,7 @@ struct afs_operation { pgoff_t last; /* last page in mapping to deal with */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ + bool laundering; /* Laundering page, PG_writeback not set */ } store; struct { struct iattr *attr; diff --git a/fs/afs/write.c b/fs/afs/write.c index da12abd6db21..b937ec047ec9 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -396,7 +396,8 @@ static void afs_store_data_success(struct afs_operation *op) op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); if (op->error == 0) { - afs_pages_written_back(vnode, op->store.first, op->store.last); + if (!op->store.laundering) + afs_pages_written_back(vnode, op->store.first, op->store.last); afs_stat_v(vnode, n_stores); atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) - (op->store.first * PAGE_SIZE + op->store.first_offset), @@ -415,7 +416,7 @@ static const struct afs_operation_ops afs_store_data_operation = { */ static int afs_store_data(struct address_space *mapping, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) + unsigned offset, unsigned to, bool laundering) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_operation *op; @@ -448,6 +449,7 @@ static int afs_store_data(struct address_space *mapping, op->store.last = last; op->store.first_offset = offset; op->store.last_to = to; + op->store.laundering = laundering; op->mtime = vnode->vfs_inode.i_mtime; op->flags |= AFS_OPERATION_UNINTR; op->ops = &afs_store_data_operation; @@ -601,7 +603,7 @@ no_more: if (end > i_size) to = i_size & ~PAGE_MASK; - ret = afs_store_data(mapping, first, last, offset, to); + ret = afs_store_data(mapping, first, last, offset, to, false); switch (ret) { case 0: ret = count; @@ -921,7 +923,7 @@ int afs_launder_page(struct page *page) trace_afs_page_dirty(vnode, tracepoint_string("launder"), page->index, priv); - ret = afs_store_data(mapping, page->index, page->index, t, f); + ret = afs_store_data(mapping, page->index, page->index, t, f, true); } trace_afs_page_dirty(vnode, tracepoint_string("laundered"), -- cgit v1.2.3-59-g8ed1b From fa04a40b169fcee615afbae97f71a09332993f64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Oct 2020 13:22:19 +0100 Subject: afs: Fix to take ref on page when PG_private is set Fix afs to take a ref on a page when it sets PG_private on it and to drop the ref when removing the flag. Note that in afs_write_begin(), a lot of the time, PG_private is already set on a page to which we're going to add some data. In such a case, we leave the bit set and mustn't increment the page count. As suggested by Matthew Wilcox, use attach/detach_page_private() where possible. Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) --- fs/afs/dir.c | 12 ++++-------- fs/afs/dir_edit.c | 6 ++---- fs/afs/file.c | 8 ++------ fs/afs/write.c | 18 ++++++++++-------- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 1d2e61e0ab04..1bb5b9d7f0a2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -281,8 +281,7 @@ retry: if (ret < 0) goto error; - set_page_private(req->pages[i], 1); - SetPagePrivate(req->pages[i]); + attach_page_private(req->pages[i], (void *)1); unlock_page(req->pages[i]); i++; } else { @@ -1975,8 +1974,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); - set_page_private(page, 0); - ClearPagePrivate(page); + detach_page_private(page); /* The directory will need reloading. */ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) @@ -2003,8 +2001,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset, afs_stat_v(dvnode, n_inval); /* we clean up only if the entire page is being invalidated */ - if (offset == 0 && length == PAGE_SIZE) { - set_page_private(page, 0); - ClearPagePrivate(page); - } + if (offset == 0 && length == PAGE_SIZE) + detach_page_private(page); } diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index b108528bf010..2ffe09abae7f 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -243,10 +243,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode, index, gfp); if (!page) goto error; - if (!PagePrivate(page)) { - set_page_private(page, 1); - SetPagePrivate(page); - } + if (!PagePrivate(page)) + attach_page_private(page, (void *)1); dir_page = kmap(page); } diff --git a/fs/afs/file.c b/fs/afs/file.c index 91225421ad37..322973d12614 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -627,11 +627,9 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, #endif if (PagePrivate(page)) { - priv = page_private(page); + priv = (unsigned long)detach_page_private(page); trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); } } @@ -661,11 +659,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) #endif if (PagePrivate(page)) { - priv = page_private(page); + priv = (unsigned long)detach_page_private(page); trace_afs_page_dirty(vnode, tracepoint_string("rel"), page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); } /* indicate that the page can be released */ diff --git a/fs/afs/write.c b/fs/afs/write.c index b937ec047ec9..02facb19a0f1 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -151,8 +151,10 @@ try_again: priv |= f; trace_afs_page_dirty(vnode, tracepoint_string("begin"), page->index, priv); - SetPagePrivate(page); - set_page_private(page, priv); + if (PagePrivate(page)) + set_page_private(page, priv); + else + attach_page_private(page, (void *)priv); _leave(" = 0"); return 0; @@ -334,10 +336,9 @@ static void afs_pages_written_back(struct afs_vnode *vnode, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - priv = page_private(pv.pages[loop]); + priv = (unsigned long)detach_page_private(pv.pages[loop]); trace_afs_page_dirty(vnode, tracepoint_string("clear"), pv.pages[loop]->index, priv); - set_page_private(pv.pages[loop], 0); end_page_writeback(pv.pages[loop]); } first += count; @@ -863,8 +864,10 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) priv |= 0; /* From */ trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); - SetPagePrivate(vmf->page); - set_page_private(vmf->page, priv); + if (PagePrivate(vmf->page)) + set_page_private(vmf->page, priv); + else + attach_page_private(vmf->page, (void *)priv); file_update_time(file); sb_end_pagefault(inode->i_sb); @@ -926,10 +929,9 @@ int afs_launder_page(struct page *page) ret = afs_store_data(mapping, page->index, page->index, t, f, true); } + priv = (unsigned long)detach_page_private(page); trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); #ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page)) { -- cgit v1.2.3-59-g8ed1b From 21db2cdc667f744691a407105b7712bc18d74023 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:03:03 +0100 Subject: afs: Fix page leak on afs_write_begin() failure Fix the leak of the target page in afs_write_begin() when it fails. Fixes: 15b4650e55e0 ("afs: convert to new aops") Signed-off-by: David Howells cc: Nick Piggin --- fs/afs/write.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 02facb19a0f1..7fae9f8b38eb 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -76,7 +76,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, */ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + struct page **_page, void **fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; @@ -110,9 +110,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } - /* page won't leak in error case: it eventually gets cleaned off LRU */ - *pagep = page; - try_again: /* See if this page is already partially written in a way that we can * merge the new write with. @@ -155,6 +152,7 @@ try_again: set_page_private(page, priv); else attach_page_private(page, (void *)priv); + *_page = page; _leave(" = 0"); return 0; @@ -164,17 +162,18 @@ try_again: flush_conflicting_write: _debug("flush conflict"); ret = write_one_page(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + if (ret < 0) + goto error; ret = lock_page_killable(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + if (ret < 0) + goto error; goto try_again; + +error: + put_page(page); + _leave(" = %d", ret); + return ret; } /* -- cgit v1.2.3-59-g8ed1b From f792e3ac82fe2c6c863e93187eb7ddfccab68fa7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 14:05:33 +0000 Subject: afs: Fix where page->private is set during write In afs, page->private is set to indicate the dirty region of a page. This is done in afs_write_begin(), but that can't take account of whether the copy into the page actually worked. Fix this by moving the change of page->private into afs_write_end(). Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/write.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 7fae9f8b38eb..f28d85c38cd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -135,23 +135,8 @@ try_again: if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && (to < f || from > t)) goto flush_conflicting_write; - if (from < f) - f = from; - if (to > t) - t = to; - } else { - f = from; - t = to; } - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; - trace_afs_page_dirty(vnode, tracepoint_string("begin"), - page->index, priv); - if (PagePrivate(page)) - set_page_private(page, priv); - else - attach_page_private(page, (void *)priv); *_page = page; _leave(" = 0"); return 0; @@ -185,6 +170,9 @@ int afs_write_end(struct file *file, struct address_space *mapping, { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct key *key = afs_file_key(file); + unsigned long priv; + unsigned int f, from = pos & (PAGE_SIZE - 1); + unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; int ret; @@ -216,6 +204,29 @@ int afs_write_end(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + if (PagePrivate(page)) { + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + if (from < f) + f = from; + if (to > t) + t = to; + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; + set_page_private(page, priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), + page->index, priv); + } else { + f = from; + t = to; + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; + attach_page_private(page, (void *)priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty"), + page->index, priv); + } + set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); -- cgit v1.2.3-59-g8ed1b From 185f0c7073bd5c78f86265f703f5daf1306ab5a7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 13:22:47 +0000 Subject: afs: Wrap page->private manipulations in inline functions The afs filesystem uses page->private to store the dirty range within a page such that in the event of a conflicting 3rd-party write to the server, we write back just the bits that got changed locally. However, there are a couple of problems with this: (1) I need a bit to note if the page might be mapped so that partial invalidation doesn't shrink the range. (2) There aren't necessarily sufficient bits to store the entire range of data altered (say it's a 32-bit system with 64KiB pages or transparent huge pages are in use). So wrap the accesses in inline functions so that future commits can change how this works. Also move them out of the tracing header into the in-directory header. There's not really any need for them to be in the tracing header. Signed-off-by: David Howells --- fs/afs/internal.h | 28 ++++++++++++++++++++++++++++ fs/afs/write.c | 31 +++++++++++++------------------ include/trace/events/afs.h | 19 +++---------------- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 289f5dffa46f..edaccd07e18e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -858,6 +858,34 @@ struct afs_vnode_cache_aux { u64 data_version; } __packed; +/* + * We use page->private to hold the amount of the page that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + */ +#if PAGE_SIZE > 32768 +#define __AFS_PAGE_PRIV_MASK 0xffffffffUL +#define __AFS_PAGE_PRIV_SHIFT 32 +#else +#define __AFS_PAGE_PRIV_MASK 0xffffUL +#define __AFS_PAGE_PRIV_SHIFT 16 +#endif + +static inline size_t afs_page_dirty_from(unsigned long priv) +{ + return priv & __AFS_PAGE_PRIV_MASK; +} + +static inline size_t afs_page_dirty_to(unsigned long priv) +{ + return (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; +} + +static inline unsigned long afs_page_dirty(size_t from, size_t to) +{ + return ((unsigned long)to << __AFS_PAGE_PRIV_SHIFT) | from; +} + #include /*****************************************************************************/ diff --git a/fs/afs/write.c b/fs/afs/write.c index f28d85c38cd8..ea1768b3c0b5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -117,8 +117,8 @@ try_again: t = f = 0; if (PagePrivate(page)) { priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); ASSERTCMP(f, <=, t); } @@ -206,22 +206,18 @@ int afs_write_end(struct file *file, struct address_space *mapping, if (PagePrivate(page)) { priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); if (from < f) f = from; if (to > t) t = to; - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; + priv = afs_page_dirty(f, t); set_page_private(page, priv); trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page->index, priv); } else { - f = from; - t = to; - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; + priv = afs_page_dirty(from, to); attach_page_private(page, (void *)priv); trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page->index, priv); @@ -522,8 +518,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, */ start = primary_page->index; priv = page_private(primary_page); - offset = priv & AFS_PRIV_MAX; - to = priv >> AFS_PRIV_SHIFT; + offset = afs_page_dirty_from(priv); + to = afs_page_dirty_to(priv); trace_afs_page_dirty(vnode, tracepoint_string("store"), primary_page->index, priv); @@ -568,8 +564,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); if (f != 0 && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { unlock_page(page); @@ -870,8 +866,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) */ wait_on_page_writeback(vmf->page); - priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ - priv |= 0; /* From */ + priv = afs_page_dirty(0, PAGE_SIZE); trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); if (PagePrivate(vmf->page)) @@ -930,8 +925,8 @@ int afs_launder_page(struct page *page) f = 0; t = PAGE_SIZE; if (PagePrivate(page)) { - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); } trace_afs_page_dirty(vnode, tracepoint_string("launder"), diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8eb49231c6bb..866fc67d5aa5 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -966,19 +966,6 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); -/* - * We use page->private to hold the amount of the page that we've written to, - * splitting the field into two parts. However, we need to represent a range - * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. - */ -#if PAGE_SIZE > 32768 -#define AFS_PRIV_MAX 0xffffffff -#define AFS_PRIV_SHIFT 32 -#else -#define AFS_PRIV_MAX 0xffff -#define AFS_PRIV_SHIFT 16 -#endif - TRACE_EVENT(afs_page_dirty, TP_PROTO(struct afs_vnode *vnode, const char *where, pgoff_t page, unsigned long priv), @@ -999,10 +986,10 @@ TRACE_EVENT(afs_page_dirty, __entry->priv = priv; ), - TP_printk("vn=%p %lx %s %lu-%lu", + TP_printk("vn=%p %lx %s %zx-%zx", __entry->vnode, __entry->page, __entry->where, - __entry->priv & AFS_PRIV_MAX, - __entry->priv >> AFS_PRIV_SHIFT) + afs_page_dirty_from(__entry->priv), + afs_page_dirty_to(__entry->priv)) ); TRACE_EVENT(afs_call_state, -- cgit v1.2.3-59-g8ed1b From 65dd2d6072d393a3aa14ded8afa9a12f27d9c8ad Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 13:57:44 +0000 Subject: afs: Alter dirty range encoding in page->private Currently, page->private on an afs page is used to store the range of dirtied data within the page, where the range includes the lower bound, but excludes the upper bound (e.g. 0-1 is a range covering a single byte). This, however, requires a superfluous bit for the last-byte bound so that on a 4KiB page, it can say 0-4096 to indicate the whole page, the idea being that having both numbers the same would indicate an empty range. This is unnecessary as the PG_private bit is clear if it's an empty range (as is PG_dirty). Alter the way the dirty range is encoded in page->private such that the upper bound is reduced by 1 (e.g. 0-0 is then specified the same single byte range mentioned above). Applying this to both bounds frees up two bits, one of which can be used in a future commit. This allows the afs filesystem to be compiled on ppc32 with 64K pages; without this, the following warnings are seen: ../fs/afs/internal.h: In function 'afs_page_dirty_to': ../fs/afs/internal.h:881:15: warning: right shift count >= width of type [-Wshift-count-overflow] 881 | return (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; | ^~ ../fs/afs/internal.h: In function 'afs_page_dirty': ../fs/afs/internal.h:886:28: warning: left shift count >= width of type [-Wshift-count-overflow] 886 | return ((unsigned long)to << __AFS_PAGE_PRIV_SHIFT) | from; | ^~ Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/internal.h | 6 +++--- fs/afs/write.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index edaccd07e18e..344c545f934c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -863,7 +863,7 @@ struct afs_vnode_cache_aux { * splitting the field into two parts. However, we need to represent a range * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. */ -#if PAGE_SIZE > 32768 +#ifdef CONFIG_64BIT #define __AFS_PAGE_PRIV_MASK 0xffffffffUL #define __AFS_PAGE_PRIV_SHIFT 32 #else @@ -878,12 +878,12 @@ static inline size_t afs_page_dirty_from(unsigned long priv) static inline size_t afs_page_dirty_to(unsigned long priv) { - return (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; + return ((priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK) + 1; } static inline unsigned long afs_page_dirty(size_t from, size_t to) { - return ((unsigned long)to << __AFS_PAGE_PRIV_SHIFT) | from; + return ((unsigned long)(to - 1) << __AFS_PAGE_PRIV_SHIFT) | from; } #include diff --git a/fs/afs/write.c b/fs/afs/write.c index ea1768b3c0b5..1a49f5c89342 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -93,7 +93,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, /* We want to store information about how much of a page is altered in * page->private. */ - BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); + BUILD_BUG_ON(PAGE_SIZE - 1 > __AFS_PAGE_PRIV_MASK && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) -- cgit v1.2.3-59-g8ed1b From f86726a69dec5df6ba051baf9265584419478b64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:08:23 +0100 Subject: afs: Fix afs_invalidatepage to adjust the dirty region Fix afs_invalidatepage() to adjust the dirty region recorded in page->private when truncating a page. If the dirty region is entirely removed, then the private data is cleared and the page dirty state is cleared. Without this, if the page is truncated and then expanded again by truncate, zeros from the expanded, but no-longer dirty region may get written back to the server if the page gets laundered due to a conflicting 3rd-party write. It mustn't, however, shorten the dirty region of the page if that page is still mmapped and has been marked dirty by afs_page_mkwrite(), so a flag is stored in page->private to record this. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/file.c | 71 +++++++++++++++++++++++++++++++++++++++------- fs/afs/internal.h | 16 +++++++++-- fs/afs/write.c | 1 + include/trace/events/afs.h | 5 ++-- 4 files changed, 79 insertions(+), 14 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 322973d12614..85f5adf21aa0 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -601,6 +601,63 @@ static int afs_readpages(struct file *file, struct address_space *mapping, return ret; } +/* + * Adjust the dirty region of the page on truncation or full invalidation, + * getting rid of the markers altogether if the region is entirely invalidated. + */ +static void afs_invalidate_dirty(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; + unsigned int f, t, end = offset + length; + + priv = page_private(page); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == thp_size(page)) + goto full_invalidate; + + /* If the page was dirtied by page_mkwrite(), the PTE stays writable + * and we don't get another notification to tell us to expand it + * again. + */ + if (afs_is_page_dirty_mmapped(priv)) + return; + + /* We may need to shorten the dirty region */ + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); + + if (t <= offset || f >= end) + return; /* Doesn't overlap */ + + if (f < offset && t > end) + return; /* Splits the dirty region - just absorb it */ + + if (f >= offset && t <= end) + goto undirty; + + if (f < offset) + t = offset; + else + f = end; + if (f == t) + goto undirty; + + priv = afs_page_dirty(f, t); + set_page_private(page, priv); + trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page->index, priv); + return; + +undirty: + trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page->index, priv); + clear_page_dirty_for_io(page); +full_invalidate: + priv = (unsigned long)detach_page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv); +} + /* * invalidate part or all of a page * - release a page and clean up its private data if offset is 0 (indicating @@ -609,29 +666,23 @@ static int afs_readpages(struct file *file, struct address_space *mapping, static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - unsigned long priv; - _enter("{%lu},%u,%u", page->index, offset, length); BUG_ON(!PageLocked(page)); +#ifdef CONFIG_AFS_FSCACHE /* we clean up only if the entire page is being invalidated */ if (offset == 0 && length == PAGE_SIZE) { -#ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page)) { struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); fscache_wait_on_page_write(vnode->cache, page); fscache_uncache_page(vnode->cache, page); } + } #endif - if (PagePrivate(page)) { - priv = (unsigned long)detach_page_private(page); - trace_afs_page_dirty(vnode, tracepoint_string("inval"), - page->index, priv); - } - } + if (PagePrivate(page)) + afs_invalidate_dirty(page, offset, length); _leave(""); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 344c545f934c..b0fce1f75397 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -864,11 +864,13 @@ struct afs_vnode_cache_aux { * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. */ #ifdef CONFIG_64BIT -#define __AFS_PAGE_PRIV_MASK 0xffffffffUL +#define __AFS_PAGE_PRIV_MASK 0x7fffffffUL #define __AFS_PAGE_PRIV_SHIFT 32 +#define __AFS_PAGE_PRIV_MMAPPED 0x80000000UL #else -#define __AFS_PAGE_PRIV_MASK 0xffffUL +#define __AFS_PAGE_PRIV_MASK 0x7fffUL #define __AFS_PAGE_PRIV_SHIFT 16 +#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL #endif static inline size_t afs_page_dirty_from(unsigned long priv) @@ -886,6 +888,16 @@ static inline unsigned long afs_page_dirty(size_t from, size_t to) return ((unsigned long)(to - 1) << __AFS_PAGE_PRIV_SHIFT) | from; } +static inline unsigned long afs_page_dirty_mmapped(unsigned long priv) +{ + return priv | __AFS_PAGE_PRIV_MMAPPED; +} + +static inline bool afs_is_page_dirty_mmapped(unsigned long priv) +{ + return priv & __AFS_PAGE_PRIV_MMAPPED; +} + #include /*****************************************************************************/ diff --git a/fs/afs/write.c b/fs/afs/write.c index 1a49f5c89342..a2511e3ad2cc 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -867,6 +867,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) wait_on_page_writeback(vmf->page); priv = afs_page_dirty(0, PAGE_SIZE); + priv = afs_page_dirty_mmapped(priv); trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); if (PagePrivate(vmf->page)) diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 866fc67d5aa5..4eef374d4413 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -986,10 +986,11 @@ TRACE_EVENT(afs_page_dirty, __entry->priv = priv; ), - TP_printk("vn=%p %lx %s %zx-%zx", + TP_printk("vn=%p %lx %s %zx-%zx%s", __entry->vnode, __entry->page, __entry->where, afs_page_dirty_from(__entry->priv), - afs_page_dirty_to(__entry->priv)) + afs_page_dirty_to(__entry->priv), + afs_is_page_dirty_mmapped(__entry->priv) ? " M" : "") ); TRACE_EVENT(afs_call_state, -- cgit v1.2.3-59-g8ed1b From 2d9900f26ad61e63a34f239bc76c80d2f8a6ff41 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Oct 2020 12:08:39 +0000 Subject: afs: Fix dirty-region encoding on ppc32 with 64K pages The dirty region bounds stored in page->private on an afs page are 15 bits on a 32-bit box and can, at most, represent a range of up to 32K within a 32K page with a resolution of 1 byte. This is a problem for powerpc32 with 64K pages enabled. Further, transparent huge pages may get up to 2M, which will be a problem for the afs filesystem on all 32-bit arches in the future. Fix this by decreasing the resolution. For the moment, a 64K page will have a resolution determined from PAGE_SIZE. In the future, the page will need to be passed in to the helper functions so that the page size can be assessed and the resolution determined dynamically. Note that this might not be the ideal way to handle this, since it may allow some leakage of undirtied zero bytes to the server's copy in the case of a 3rd-party conflict. Fixing that would require a separately allocated record and is a more complicated fix. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Reported-by: kernel test robot Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) --- fs/afs/internal.h | 24 ++++++++++++++++++++---- fs/afs/write.c | 5 ----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b0fce1f75397..14d5d75f4b6e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -861,7 +861,8 @@ struct afs_vnode_cache_aux { /* * We use page->private to hold the amount of the page that we've written to, * splitting the field into two parts. However, we need to represent a range - * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. + * 0...PAGE_SIZE, so we reduce the resolution if the size of the page + * exceeds what we can encode. */ #ifdef CONFIG_64BIT #define __AFS_PAGE_PRIV_MASK 0x7fffffffUL @@ -873,19 +874,34 @@ struct afs_vnode_cache_aux { #define __AFS_PAGE_PRIV_MMAPPED 0x8000UL #endif +static inline unsigned int afs_page_dirty_resolution(void) +{ + int shift = PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1); + return (shift > 0) ? shift : 0; +} + static inline size_t afs_page_dirty_from(unsigned long priv) { - return priv & __AFS_PAGE_PRIV_MASK; + unsigned long x = priv & __AFS_PAGE_PRIV_MASK; + + /* The lower bound is inclusive */ + return x << afs_page_dirty_resolution(); } static inline size_t afs_page_dirty_to(unsigned long priv) { - return ((priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK) + 1; + unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; + + /* The upper bound is immediately beyond the region */ + return (x + 1) << afs_page_dirty_resolution(); } static inline unsigned long afs_page_dirty(size_t from, size_t to) { - return ((unsigned long)(to - 1) << __AFS_PAGE_PRIV_SHIFT) | from; + unsigned int res = afs_page_dirty_resolution(); + from >>= res; + to = (to - 1) >> res; + return (to << __AFS_PAGE_PRIV_SHIFT) | from; } static inline unsigned long afs_page_dirty_mmapped(unsigned long priv) diff --git a/fs/afs/write.c b/fs/afs/write.c index a2511e3ad2cc..50371207f327 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -90,11 +90,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping, _enter("{%llx:%llu},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); - /* We want to store information about how much of a page is altered in - * page->private. - */ - BUILD_BUG_ON(PAGE_SIZE - 1 > __AFS_PAGE_PRIV_MASK && sizeof(page->private) < 8); - page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b