From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 28 Nov 2012 11:30:53 -0500 Subject: vfs: remove DCACHE_NEED_LOOKUP The code that relied on that flag was ripped out of btrfs quite some time ago, and never added back. Josef indicated that he was going to take a different approach to the problem in btrfs, and that we could just eliminate this flag. Cc: Josef Bacik Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/dcache.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59200795482e..c1754b59ddd3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -202,7 +202,6 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ -#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) @@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } -static inline bool d_need_lookup(struct dentry *dentry) -{ - return dentry->d_flags & DCACHE_NEED_LOOKUP; -} - -extern void d_clear_need_lookup(struct dentry *dentry); - extern int sysctl_vfs_cache_pressure; #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3-59-g8ed1b From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:32 +0000 Subject: CacheFiles: Fix the marking of cached pages Under some circumstances CacheFiles defers the marking of pages with PG_fscache so that it can take advantage of pagevecs to reduce the number of calls to fscache_mark_pages_cached() and the netfs's hook to keep track of this. There are, however, two problems with this: (1) It can lead to the PG_fscache mark being applied _after_ the page is set PG_uptodate and unlocked (by the call to fscache_end_io()). (2) CacheFiles's ref on the page is dropped immediately following fscache_end_io() - and so may not still be held when the mark is applied. This can lead to the page being passed back to the allocator before the mark is applied. Fix this by, where appropriate, marking the page before calling fscache_end_io() and releasing the page. This means that we can't take advantage of pagevecs and have to make a separate call for each page to the marking routines. The symptoms of this are Bad Page state errors cropping up under memory pressure, for example: BUG: Bad page state in process tar pfn:002da page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447 page flags: 0x1000(private_2) Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064 Call Trace: [] ? dump_page+0xb9/0xbe [] bad_page+0xd5/0xea [] get_page_from_freelist+0x35b/0x46a [] __alloc_pages_nodemask+0x362/0x662 [] __do_page_cache_readahead+0x13a/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] ? ondemand_readahead+0x163/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b As can be seen, PG_private_2 (== PG_fscache) is set in the page flags. Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was set appropriately showed that sometimes it wasn't. This led to the discovery that sometimes the page has apparently been reclaimed by the time the marker got to see it. Reported-by: M. Stevens Signed-off-by: David Howells Reviewed-by: Jeff Layton --- fs/cachefiles/rdwr.c | 34 ++++++++----------------- fs/fscache/page.c | 59 ++++++++++++++++++++++++++----------------- include/linux/fscache-cache.h | 3 +++ include/linux/fscache.h | 12 ++++----- 4 files changed, 56 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c994691d9445..3367abdcdac4 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op) recheck: if (PageUptodate(monitor->back_page)) { copy_highpage(monitor->netfs_page, monitor->back_page); - - pagevec_add(&pagevec, monitor->netfs_page); - fscache_mark_pages_cached(monitor->op, &pagevec); + fscache_mark_page_cached(monitor->op, + monitor->netfs_page); error = 0; } else if (!PageError(monitor->back_page)) { /* the page has probably been truncated */ @@ -335,8 +334,7 @@ backing_page_already_present: backing_page_already_uptodate: _debug("- uptodate"); - pagevec_add(pagevec, netpage); - fscache_mark_pages_cached(op, pagevec); + fscache_mark_page_cached(op, netpage); copy_highpage(netpage, backpage); fscache_end_io(op, netpage, 0); @@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, &pagevec); } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); + fscache_mark_page_cached(op, page); ret = -ENODATA; } else { ret = -ENOBUFS; @@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, */ static int cachefiles_read_backing_file(struct cachefiles_object *object, struct fscache_retrieval *op, - struct list_head *list, - struct pagevec *mark_pvec) + struct list_head *list) { struct cachefiles_one_read *monitor = NULL; struct address_space *bmapping = object->backer->d_inode->i_mapping; @@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, page_cache_release(backpage); backpage = NULL; - if (!pagevec_add(mark_pvec, netpage)) - fscache_mark_pages_cached(op, mark_pvec); + fscache_mark_page_cached(op, netpage); page_cache_get(netpage); if (!pagevec_add(&lru_pvec, netpage)) __pagevec_lru_add_file(&lru_pvec); + /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); page_cache_release(netpage); netpage = NULL; @@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* submit the apparently valid pages to the backing fs to be read from * disk */ if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages, - &pagevec); + ret2 = cachefiles_read_backing_file(object, op, &backpages); if (ret2 == -ENOMEM || ret2 == -EINTR) ret = ret2; } - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - _leave(" = %d [nr=%u%s]", ret, *nr_pages, list_empty(pages) ? " empty" : ""); return ret; @@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, { struct cachefiles_object *object; struct cachefiles_cache *cache; - struct pagevec pagevec; int ret; object = container_of(op->op.object, @@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, _enter("%p,{%lx},", object, page->index); ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) { - pagevec_init(&pagevec, 0); - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); - } else { + if (ret == 0) + fscache_mark_page_cached(op, page); + else ret = -ENOBUFS; - } _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3f7a59bfa7ad..d7c663cfc923 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -914,6 +914,40 @@ done: } EXPORT_SYMBOL(__fscache_uncache_page); +/** + * fscache_mark_page_cached - Mark a page as being cached + * @op: The retrieval op pages are being marked for + * @page: The page to be marked + * + * Mark a netfs page as being cached. After this is called, the netfs + * must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_marks); +#endif + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + + if (cookie->def->mark_page_cached) + cookie->def->mark_page_cached(cookie->netfs_data, + op->mapping, page); +} +EXPORT_SYMBOL(fscache_mark_page_cached); + /** * fscache_mark_pages_cached - Mark pages as being cached * @op: The retrieval op pages are being marked for @@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page); void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec) { - struct fscache_cookie *cookie = op->op.object->cookie; unsigned long loop; -#ifdef CONFIG_FSCACHE_STATS - atomic_add(pagevec->nr, &fscache_n_marks); -#endif - - for (loop = 0; loop < pagevec->nr; loop++) { - struct page *page = pagevec->pages[loop]; - - _debug("- mark %p{%lx}", page, page->index); - if (TestSetPageFsCache(page)) { - static bool once_only; - if (!once_only) { - once_only = true; - printk(KERN_WARNING "FS-Cache:" - " Cookie type %s marked page %lx" - " multiple times\n", - cookie->def->name, page->index); - } - } - } + for (loop = 0; loop < pagevec->nr; loop++) + fscache_mark_page_cached(op, pagevec->pages[loop]); - if (cookie->def->mark_pages_cached) - cookie->def->mark_pages_cached(cookie->netfs_data, - op->mapping, pagevec); pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce31408b1e47..9879183b55d8 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_io_error(struct fscache_cache *cache); +extern void fscache_mark_page_cached(struct fscache_retrieval *op, + struct page *page); + extern void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9ec20dec3353..f4b6353543bf 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -135,14 +135,14 @@ struct fscache_cookie_def { */ void (*put_context)(void *cookie_netfs_data, void *context); - /* indicate pages that now have cache metadata retained - * - this function should mark the specified pages as now being cached - * - the pages will have been marked with PG_fscache before this is + /* indicate page that now have cache metadata retained + * - this function should mark the specified page as now being cached + * - the page will have been marked with PG_fscache before this is * called, so this is optional */ - void (*mark_pages_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct pagevec *cached_pvec); + void (*mark_page_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct page *page); /* indicate the cookie is no longer cached * - this function is called when the backing store currently caching -- cgit v1.2.3-59-g8ed1b From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads Make fscache_relinquish_cookie() log a warning and wait if there are any outstanding reads left on the cookie it was given. Signed-off-by: David Howells --- fs/fscache/cookie.c | 18 ++++++++++++++---- fs/fscache/operation.c | 10 ++++++++-- include/linux/fscache-cache.h | 1 + 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 0666996adf80..66be9eccede0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -442,22 +442,32 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; +try_again: spin_lock(&cookie->lock); /* break links with all the active objects */ while (!hlist_empty(&cookie->backing_objects)) { + int n_reads; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); _debug("RELEASE OBJ%x", object->debug_id); - if (atomic_read(&object->n_reads)) { + set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); + n_reads = atomic_read(&object->n_reads); + if (n_reads) { + int n_ops = object->n_ops; + int n_in_progress = object->n_in_progress; spin_unlock(&cookie->lock); printk(KERN_ERR "FS-Cache:" - " Cookie '%s' still has %d outstanding reads\n", - cookie->def->name, atomic_read(&object->n_reads)); - BUG(); + " Cookie '%s' still has %d outstanding reads (%d,%d)\n", + cookie->def->name, + n_reads, n_ops, n_in_progress); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + printk("Wait finished\n"); + goto try_again; } /* detach each cache object from the object cookie */ diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 30afdfa7aec7..c857ab824d6e 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -340,8 +340,14 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { + if (atomic_dec_and_test(&object->n_reads)) { + clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, + &object->cookie->flags); + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_WAITING_ON_READS); + } + } /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 9879183b55d8..e3d6d939d959 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -301,6 +301,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +#define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ }; extern struct fscache_cookie fscache_fsdef_index; -- cgit v1.2.3-59-g8ed1b From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Fix operation state management and accounting Fix the state management of internal fscache operations and the accounting of what operations are in what states. This is done by: (1) Give struct fscache_operation a enum variable that directly represents the state it's currently in, rather than spreading this knowledge over a bunch of flags, who's processing the operation at the moment and whether it is queued or not. This makes it easier to write assertions to check the state at various points and to prevent invalid state transitions. (2) Add an 'operation complete' state and supply a function to indicate the completion of an operation (fscache_op_complete()) and make things call it. The final call to fscache_put_operation() can then check that an op in the appropriate state (complete or cancelled). (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better govern the state of an object: (a) The ->n_ops is now the number of extant operations on the object and is now decremented by fscache_put_operation() only. (b) The ->n_in_progress is simply the number of objects that have been taken off of the object's pending queue for the purposes of being run. This is decremented by fscache_op_complete() only. (c) The ->n_exclusive is the number of exclusive ops that have been submitted and queued or are in progress. It is decremented by fscache_op_complete() and by fscache_cancel_op(). fscache_put_operation() and fscache_operation_gc() now no longer try to clean up ->n_exclusive and ->n_in_progress. That was leading to double decrements against fscache_cancel_op(). fscache_cancel_op() now no longer decrements ->n_ops. That was leading to double decrements against fscache_put_operation(). fscache_submit_exclusive_op() now decides whether it has to queue an op based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter will persist in being true even after all preceding operations have been cancelled or completed. Furthermore, if an object is active and there are runnable ops against it, there must be at least one op running. (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and provide a function to record completion of the pages as they complete. When n_pages reaches 0, the operation is deemed to be complete and fscache_op_complete() is called. Add calls to fscache_retrieval_complete() anywhere we've finished with a page we've been given to read or allocate for. This includes places where we just return pages to the netfs for reading from the server and where accessing the cache fails and we discard the proposed netfs page. The bugs in the unfixed state management manifest themselves as oopses like the following where the operation completion gets out of sync with return of the cookie by the netfs. This is possible because the cache unlocks and returns all the netfs pages before recording its completion - which means that there's nothing to stop the netfs discarding them and returning the cookie. FS-Cache: Cookie 'NFS.fh' still has outstanding reads ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP CPU 1 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090 /DG965RY RIP: 0010:[] [] __fscache_relinquish_cookie+0x170/0x343 [fscache] RSP: 0018:ffff8800368cfb00 EFLAGS: 00010282 RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000 RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000 R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98 R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370 FS: 0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040) Stack: ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91 Call Trace: [] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs] [] nfs_clear_inode+0x3c/0x41 [nfs] [] nfs4_evict_inode+0x2f/0x33 [nfs] [] evict+0xa1/0x15c [] dispose_list+0x2c/0x38 [] prune_icache_sb+0x28c/0x29b [] prune_super+0xd5/0x140 [] shrink_slab+0x102/0x1ab [] balance_pgdat+0x2f2/0x595 [] ? process_timeout+0xb/0xb [] kswapd+0x270/0x289 [] ? __init_waitqueue_head+0x46/0x46 [] ? balance_pgdat+0x595/0x595 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- Documentation/filesystems/caching/backend-api.txt | 26 ++++++- Documentation/filesystems/caching/operations.txt | 2 +- fs/cachefiles/rdwr.c | 31 ++++++-- fs/fscache/object.c | 2 - fs/fscache/operation.c | 91 +++++++++++++++-------- fs/fscache/page.c | 25 ++++++- include/linux/fscache-cache.h | 37 +++++++-- 7 files changed, 164 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 382d52cdaf2d..f4769b9399df 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -419,7 +419,10 @@ performed on the denizens of the cache. These are held in a structure of type: If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS returned if possible or fscache_end_io() called with a suitable error - code.. + code. + + fscache_put_retrieval() should be called after a page or pages are dealt + with. This will complete the operation when all pages are dealt with. (*) Request pages be read from cache [mandatory]: @@ -526,6 +529,27 @@ FS-Cache provides some utilities that a cache backend may make use of: error value should be 0 if successful and an error otherwise. + (*) Record that one or more pages being retrieved or allocated have been dealt + with: + + void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages); + + This is called to record the fact that one or more pages have been dealt + with and are no longer the concern of this operation. When the number of + pages remaining in the operation reaches 0, the operation will be + completed. + + + (*) Record operation completion: + + void fscache_op_complete(struct fscache_operation *op); + + This is called to record the completion of an operation. This deducts + this operation from the parent object's run state, potentially permitting + one or more pending operations to start running. + + (*) Set highest store limit: void fscache_set_store_limit(struct fscache_object *object, diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt index b6b070c57cbf..bee2a5f93d60 100644 --- a/Documentation/filesystems/caching/operations.txt +++ b/Documentation/filesystems/caching/operations.txt @@ -174,7 +174,7 @@ Operations are used through the following procedure: necessary (the object might have died whilst the thread was waiting). When it has finished doing its processing, it should call - fscache_put_operation() on it. + fscache_op_complete() and fscache_put_operation() on it. (4) The operation holds an effective lock upon the object, preventing other exclusive ops conflicting until it is released. The operation can be diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index bf123d9c3206..93a0815e0498 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -197,6 +197,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) fscache_end_io(op, monitor->netfs_page, error); page_cache_release(monitor->netfs_page); + fscache_retrieval_complete(op, 1); fscache_put_retrieval(op); kfree(monitor); @@ -339,6 +340,7 @@ backing_page_already_uptodate: copy_highpage(netpage, backpage); fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); success: _debug("success"); @@ -360,6 +362,7 @@ read_error: goto out; io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); + fscache_retrieval_complete(op, 1); ret = -ENOBUFS; goto out; @@ -369,6 +372,7 @@ nomem_monitor: fscache_put_retrieval(monitor->op); kfree(monitor); nomem: + fscache_retrieval_complete(op, 1); _leave(" = -ENOMEM"); return -ENOMEM; } @@ -407,7 +411,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, _enter("{%p},{%lx},,,", object, page->index); if (!object->backer) - return -ENOBUFS; + goto enobufs; inode = object->backer->d_inode; ASSERT(S_ISREG(inode->i_mode)); @@ -416,7 +420,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -448,13 +452,19 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ fscache_mark_page_cached(op, page); + fscache_retrieval_complete(op, 1); ret = -ENODATA; } else { - ret = -ENOBUFS; + goto enobufs; } _leave(" = %d", ret); return ret; + +enobufs: + fscache_retrieval_complete(op, 1); + _leave(" = -ENOBUFS"); + return -ENOBUFS; } /* @@ -632,6 +642,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); page_cache_release(netpage); netpage = NULL; continue; @@ -659,6 +670,7 @@ out: list_for_each_entry_safe(netpage, _n, list, lru) { list_del(&netpage->lru); page_cache_release(netpage); + fscache_retrieval_complete(op, 1); } _leave(" = %d", ret); @@ -707,7 +719,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, *nr_pages); if (!object->backer) - return -ENOBUFS; + goto all_enobufs; space = 1; if (cachefiles_has_space(cache, 0, *nr_pages) < 0) @@ -720,7 +732,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto all_enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -760,7 +772,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, nrbackpages++; } else if (space && pagevec_add(&pagevec, page) == 0) { fscache_mark_pages_cached(op, &pagevec); + fscache_retrieval_complete(op, 1); ret = -ENODATA; + } else { + fscache_retrieval_complete(op, 1); } } @@ -781,6 +796,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, _leave(" = %d [nr=%u%s]", ret, *nr_pages, list_empty(pages) ? " empty" : ""); return ret; + +all_enobufs: + fscache_retrieval_complete(op, *nr_pages); + return -ENOBUFS; } /* @@ -815,6 +834,7 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, else ret = -ENOBUFS; + fscache_retrieval_complete(op, 1); _leave(" = %d", ret); return ret; } @@ -864,6 +884,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op, ret = -ENOBUFS; } + fscache_retrieval_complete(op, *nr_pages); _leave(" = %d", ret); return ret; } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b6b897c550ac..773bc798a416 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -587,8 +587,6 @@ static void fscache_object_available(struct fscache_object *object) if (object->n_in_progress == 0) { if (object->n_ops > 0) { ASSERTCMP(object->n_ops, >=, object->n_obj_ops); - ASSERTIF(object->n_ops > object->n_obj_ops, - !list_empty(&object->pending_ops)); fscache_start_operations(object); } else { ASSERT(list_empty(&object->pending_ops)); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c857ab824d6e..748f9553c2cb 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + + op->state = FSCACHE_OP_ST_IN_PROGRESS; object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -80,22 +84,23 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { - int ret; - _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); - ret = -ENOBUFS; + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ - if (object->n_ops > 1) { + if (object->n_in_progress > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); @@ -111,7 +116,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); - ret = 0; } else if (object->state == FSCACHE_OBJECT_CREATING) { op->object = object; object->n_ops++; @@ -119,14 +123,13 @@ int fscache_submit_exclusive_op(struct fscache_object *object, atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); - ret = 0; } else { /* not allowed to submit ops in any other state */ BUG(); } spin_unlock(&object->lock); - return ret; + return 0; } /* @@ -186,6 +189,7 @@ int fscache_submit_op(struct fscache_object *object, _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); @@ -196,6 +200,7 @@ int fscache_submit_op(struct fscache_object *object, ostate = object->state; smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; @@ -225,12 +230,15 @@ int fscache_submit_op(struct fscache_object *object, object->state == FSCACHE_OBJECT_LC_DYING || object->state == FSCACHE_OBJECT_WITHDRAWING) { fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -290,13 +298,18 @@ int fscache_cancel_op(struct fscache_operation *op) _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); + ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ret = -EBUSY; - if (!list_empty(&op->pend_link)) { + if (op->state == FSCACHE_OP_ST_PENDING) { + ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); - object->n_ops--; + op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) @@ -310,6 +323,37 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Record the completion of an in-progress operation. + */ +void fscache_op_complete(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + + _enter("OBJ%x", object->debug_id); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTCMP(object->n_in_progress, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_exclusive, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_in_progress, ==, 1); + + spin_lock(&object->lock); + + op->state = FSCACHE_OP_ST_COMPLETE; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + spin_unlock(&object->lock); + _leave(""); +} +EXPORT_SYMBOL(fscache_op_complete); + /* * release an operation * - queues pending ops if this is the last in-progress op @@ -328,8 +372,9 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) - BUG(); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + op->state, ==, FSCACHE_OP_ST_CANCELLED); + op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -365,16 +410,6 @@ void fscache_put_operation(struct fscache_operation *op) return; } - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); - ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) @@ -413,23 +448,14 @@ void fscache_operation_gc(struct work_struct *work) spin_unlock(&cache->op_gc_list_lock); object = op->object; + spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", object->debug_id, op->debug_id); fscache_stat(&fscache_n_op_gc); ASSERTCMP(atomic_read(&op->usage), ==, 0); - - spin_lock(&object->lock); - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD); ASSERTCMP(object->n_ops, >, 0); object->n_ops--; @@ -437,6 +463,7 @@ void fscache_operation_gc(struct work_struct *work) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); + kfree(op); } while (count++ < 20); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 248a12e22532..b38b13d2a555 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -162,6 +162,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } + fscache_op_complete(op); _leave(""); } @@ -223,6 +224,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); + ASSERTCMP(op->n_pages, ==, 0); + fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) fscache_put_context(op->op.object->cookie, op->context); @@ -320,6 +323,11 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, _debug("<<< GO"); check_if_dead: + if (op->op.state == FSCACHE_OP_ST_CANCELLED) { + fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [cancelled]"); + return -ENOBUFS; + } if (unlikely(fscache_object_is_dead(object))) { fscache_stat(stat_object_dead); return -ENOBUFS; @@ -364,6 +372,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + op->n_pages = 1; spin_lock(&cookie->lock); @@ -375,10 +384,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -425,6 +434,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -482,6 +493,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + op->n_pages = *nr_pages; spin_lock(&cookie->lock); @@ -491,10 +503,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, struct fscache_object, cookie_link); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -541,6 +553,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -583,6 +597,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + op->n_pages = 1; spin_lock(&cookie->lock); @@ -696,6 +711,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); + fscache_op_complete(&op->op); } else { fscache_enqueue_operation(&op->op); } @@ -710,6 +726,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); + fscache_op_complete(&op->op); _leave(""); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index e3d6d939d959..f5facd1d333f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq; typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +enum fscache_operation_state { + FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ + FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ + FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ + FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ + FSCACHE_OP_ST_COMPLETE, /* Op is complete */ + FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ + FSCACHE_OP_ST_DEAD /* Op is now dead */ +}; + struct fscache_operation { struct work_struct work; /* record for async ops */ struct list_head pend_link; /* link in object->pending_ops */ @@ -86,10 +96,10 @@ struct fscache_operation { #define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEAD 6 /* op is now dead */ -#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ +#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0x0070 /* flags to keep when repurposing an op */ + enum fscache_operation_state state; atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *); extern void fscache_put_operation(struct fscache_operation *); /** @@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, { INIT_WORK(&op->work, fscache_op_work_func); atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; op->release = release; @@ -138,6 +150,7 @@ struct fscache_retrieval { void *context; /* netfs read context (pinned) */ struct list_head to_do; /* list of things to be done by the backend */ unsigned long start_time; /* time at which retrieval started */ + unsigned n_pages; /* number of pages to be retrieved */ }; typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, @@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) fscache_enqueue_operation(&op->op); } +/** + * fscache_retrieval_complete - Record (partial) completion of a retrieval + * @op: The retrieval operation affected + * @n_pages: The number of pages to account for + */ +static inline void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages) +{ + op->n_pages -= n_pages; + if (op->n_pages <= 0) + fscache_op_complete(&op->op); +} + /** * fscache_put_retrieval - Drop a reference to a retrieval operation * @op: The retrieval operation affected + * @n_pages: The number of pages to account for * * Drop a reference to a retrieval operation. */ @@ -333,10 +360,10 @@ struct fscache_object { int debug_id; /* debugging ID */ int n_children; /* number of child objects */ - int n_ops; /* number of ops outstanding on object */ + int n_ops; /* number of extant ops on object */ int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued */ + int n_exclusive; /* number of exclusive ops queued or in progress */ atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ -- cgit v1.2.3-59-g8ed1b From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: FS-Cache: Provide proper invalidation Provide a proper invalidation method rather than relying on the netfs retiring the cookie it has and getting a new one. The problem with this is that isn't easy for the netfs to make sure that it has completed/cancelled all its outstanding storage and retrieval operations on the cookie it is retiring. Instead, have the cache provide an invalidation method that will cancel or wait for all currently outstanding operations before invalidating the cache, and will cause new operations to queue up behind that. Whilst invalidation is in progress, some requests will be rejected until the cache can stack a barrier on the operation queue to cause new operations to be deferred behind it. Signed-off-by: David Howells --- Documentation/filesystems/caching/backend-api.txt | 12 ++++ Documentation/filesystems/caching/netfs-api.txt | 46 ++++++++++++--- Documentation/filesystems/caching/object.txt | 23 +++++--- fs/fscache/cookie.c | 60 +++++++++++++++++++ fs/fscache/internal.h | 10 ++++ fs/fscache/object.c | 72 +++++++++++++++++++++++ fs/fscache/operation.c | 32 ++++++++++ fs/fscache/page.c | 51 ++++++++++++++++ fs/fscache/stats.c | 11 +++- include/linux/fscache-cache.h | 8 ++- include/linux/fscache.h | 38 ++++++++++++ 11 files changed, 345 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index f4769b9399df..d78bab9622c6 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -308,6 +308,18 @@ performed on the denizens of the cache. These are held in a structure of type: obtained by calling object->cookie->def->get_aux()/get_attr(). + (*) Invalidate data object [mandatory]: + + int (*invalidate_object)(struct fscache_operation *op) + + This is called to invalidate a data object (as pointed to by op->object). + All the data stored for this object should be discarded and an + attr_changed operation should be performed. The caller will follow up + with an object update operation. + + fscache_op_complete() must be called on op before returning. + + (*) Discard object [mandatory]: void (*drop_object)(struct fscache_object *object) diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 7cc6bf2871eb..97e6c0ecc5ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -35,8 +35,9 @@ This document contains the following sections: (12) Index and data file update (13) Miscellaneous cookie operations (14) Cookie unregistration - (15) Index and data file invalidation - (16) FS-Cache specific page flags. + (15) Index invalidation + (16) Data file invalidation + (17) FS-Cache specific page flags. ============================= @@ -767,13 +768,42 @@ the cookies for "child" indices, objects and pages have been relinquished first. -================================ -INDEX AND DATA FILE INVALIDATION -================================ +================== +INDEX INVALIDATION +================== + +There is no direct way to invalidate an index subtree. To do this, the caller +should relinquish and retire the cookie they have, and then acquire a new one. + + +====================== +DATA FILE INVALIDATION +====================== + +Sometimes it will be necessary to invalidate an object that contains data. +Typically this will be necessary when the server tells the netfs of a foreign +change - at which point the netfs has to throw away all the state it had for an +inode and reload from the server. + +To indicate that a cache object should be invalidated, the following function +can be called: + + void fscache_invalidate(struct fscache_cookie *cookie); + +This can be called with spinlocks held as it defers the work to a thread pool. +All extant storage, retrieval and attribute change ops at this point are +cancelled and discarded. Some future operations will be rejected until the +cache has had a chance to insert a barrier in the operations queue. After +that, operations will be queued again behind the invalidation operation. + +The invalidation operation will perform an attribute change operation and an +auxiliary data update operation as it is very likely these will have changed. + +Using the following function, the netfs can wait for the invalidation operation +to have reached a point at which it can start submitting ordinary operations +once again: -There is no direct way to invalidate an index subtree or a data file. To do -this, the caller should relinquish and retire the cookie they have, and then -acquire a new one. + void fscache_wait_on_invalidate(struct fscache_cookie *cookie); =========================== diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt index 58313348da87..100ff41127e4 100644 --- a/Documentation/filesystems/caching/object.txt +++ b/Documentation/filesystems/caching/object.txt @@ -216,7 +216,14 @@ servicing netfs requests: The normal running state. In this state, requests the netfs makes will be passed on to the cache. - (6) State FSCACHE_OBJECT_UPDATING. + (6) State FSCACHE_OBJECT_INVALIDATING. + + The object is undergoing invalidation. When the state comes here, it + discards all pending read, write and attribute change operations as it is + going to clear out the cache entirely and reinitialise it. It will then + continue to the FSCACHE_OBJECT_UPDATING state. + + (7) State FSCACHE_OBJECT_UPDATING. The state machine comes here to update the object in the cache from the netfs's records. This involves updating the auxiliary data that is used @@ -225,13 +232,13 @@ servicing netfs requests: And there are terminal states in which an object cleans itself up, deallocates memory and potentially deletes stuff from disk: - (7) State FSCACHE_OBJECT_LC_DYING. + (8) State FSCACHE_OBJECT_LC_DYING. The object comes here if it is dying because of a lookup or creation error. This would be due to a disk error or system error of some sort. Temporary data is cleaned up, and the parent is released. - (8) State FSCACHE_OBJECT_DYING. + (9) State FSCACHE_OBJECT_DYING. The object comes here if it is dying due to an error, because its parent cookie has been relinquished by the netfs or because the cache is being @@ -241,27 +248,27 @@ memory and potentially deletes stuff from disk: can destroy themselves. This object waits for all its children to go away before advancing to the next state. - (9) State FSCACHE_OBJECT_ABORT_INIT. +(10) State FSCACHE_OBJECT_ABORT_INIT. The object comes to this state if it was waiting on its parent in FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself so that the parent may proceed from the FSCACHE_OBJECT_DYING state. -(10) State FSCACHE_OBJECT_RELEASING. -(11) State FSCACHE_OBJECT_RECYCLING. +(11) State FSCACHE_OBJECT_RELEASING. +(12) State FSCACHE_OBJECT_RECYCLING. The object comes to one of these two states when dying once it is rid of all its children, if it is dying because the netfs relinquished its cookie. In the first state, the cached data is expected to persist, and in the second it will be deleted. -(12) State FSCACHE_OBJECT_WITHDRAWING. +(13) State FSCACHE_OBJECT_WITHDRAWING. The object transits to this state if the cache decides it wants to withdraw the object from service, perhaps to make space, but also due to error or just because the whole cache is being withdrawn. -(13) State FSCACHE_OBJECT_DEAD. +(14) State FSCACHE_OBJECT_DEAD. The object transits to this state when the in-memory object record is ready to be deleted. The object processor shouldn't ever see an object in diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 66be9eccede0..8dcb114758e3 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -369,6 +369,66 @@ cant_attach_object: return ret; } +/* + * Invalidate an object. Callable with spinlocks held. + */ +void __fscache_invalidate(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + + _enter("{%s}", cookie->def->name); + + fscache_stat(&fscache_n_invalidates); + + /* Only permit invalidation of data files. Invalidating an index will + * require the caller to release all its attachments to the tree rooted + * there, and if it's doing that, it may as well just retire the + * cookie. + */ + ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + + /* We will be updating the cookie too. */ + BUG_ON(!cookie->def->get_aux); + + /* If there's an object, we tell the object state machine to handle the + * invalidation on our behalf, otherwise there's nothing to do. + */ + if (!hlist_empty(&cookie->backing_objects)) { + spin_lock(&cookie->lock); + + if (!hlist_empty(&cookie->backing_objects) && + !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, + &cookie->flags)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + if (object->state < FSCACHE_OBJECT_DYING) + fscache_raise_event( + object, FSCACHE_OBJECT_EV_INVALIDATE); + } + + spin_unlock(&cookie->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_invalidate); + +/* + * Wait for object invalidation to complete. + */ +void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + _enter("%p", cookie); + + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, + fscache_wait_bit_interruptible, + TASK_UNINTERRUPTIBLE); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_wait_on_invalidate); + /* * update the index entries backing a cookie */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f6aad48d38a8..c81179303930 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -122,10 +122,16 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); extern int fscache_cancel_op(struct fscache_operation *); +extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); +/* + * page.c + */ +extern void fscache_invalidate_writes(struct fscache_cookie *); + /* * proc.c */ @@ -205,6 +211,9 @@ extern atomic_t fscache_n_acquires_ok; extern atomic_t fscache_n_acquires_nobufs; extern atomic_t fscache_n_acquires_oom; +extern atomic_t fscache_n_invalidates; +extern atomic_t fscache_n_invalidates_run; + extern atomic_t fscache_n_updates; extern atomic_t fscache_n_updates_null; extern atomic_t fscache_n_updates_run; @@ -237,6 +246,7 @@ extern atomic_t fscache_n_cop_alloc_object; extern atomic_t fscache_n_cop_lookup_object; extern atomic_t fscache_n_cop_lookup_complete; extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_invalidate_object; extern atomic_t fscache_n_cop_update_object; extern atomic_t fscache_n_cop_drop_object; extern atomic_t fscache_n_cop_put_object; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 773bc798a416..80b549141ea6 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,6 +14,7 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include +#include #include "internal.h" const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", @@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_CREATING] = "CRTN", [FSCACHE_OBJECT_AVAILABLE] = "AVBL", [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_INVALIDATING] = "INVL", [FSCACHE_OBJECT_UPDATING] = "UPDT", [FSCACHE_OBJECT_DYING] = "DYNG", [FSCACHE_OBJECT_LC_DYING] = "LCDY", @@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *); static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); +static void fscache_invalidate_object(struct fscache_object *); static void fscache_release_object(struct fscache_object *); static void fscache_withdraw_object(struct fscache_object *); static void fscache_enqueue_dependents(struct fscache_object *); @@ -78,6 +82,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object) spin_unlock(&parent->lock); } +/* + * Notify netfs of invalidation completion. + */ +static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) +{ + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); +} + /* * process events that have been sent to an object's state machine * - initiates parent lookup @@ -125,6 +138,16 @@ static void fscache_object_state_machine(struct fscache_object *object) case FSCACHE_OBJECT_ACTIVE: goto active_transit; + /* Invalidate an object on disk */ + case FSCACHE_OBJECT_INVALIDATING: + clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); + fscache_stat(&fscache_n_invalidates_run); + fscache_stat(&fscache_n_cop_invalidate_object); + fscache_invalidate_object(object); + fscache_stat_d(&fscache_n_cop_invalidate_object); + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + goto active_transit; + /* update the object metadata on disk */ case FSCACHE_OBJECT_UPDATING: clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); @@ -275,6 +298,9 @@ active_transit: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_UPDATE: new_state = FSCACHE_OBJECT_UPDATING; goto change_state; @@ -679,6 +705,7 @@ static void fscache_withdraw_object(struct fscache_object *object) if (object->cookie == cookie) { hlist_del_init(&object->cookie_link); object->cookie = NULL; + fscache_invalidation_complete(cookie); detached = true; } spin_unlock(&cookie->lock); @@ -888,3 +915,48 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, return result; } EXPORT_SYMBOL(fscache_check_aux); + +/* + * Asynchronously invalidate an object. + */ +static void fscache_invalidate_object(struct fscache_object *object) +{ + struct fscache_operation *op; + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x}", object->debug_id); + + /* Reject any new read/write ops and abort any that are pending. */ + fscache_invalidate_writes(cookie); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + + /* Now we have to wait for in-progress reads and writes */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [ENOMEM]"); + return; + } + + fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + if (fscache_submit_exclusive_op(object, op) < 0) + BUG(); + spin_unlock(&cookie->lock); + fscache_put_operation(op); + + /* Once we've completed the invalidation, we know there will be no data + * stored in the cache and thus we can reinstate the data-check-skip + * optimisation. + */ + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* We can allow read and write requests to come in once again. They'll + * queue up behind our exclusive invalidation operation. + */ + fscache_invalidation_complete(cookie); + _leave(""); +} diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 748f9553c2cb..c58dbe613266 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -323,6 +323,38 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Cancel all pending operations on an object + */ +void fscache_cancel_all_ops(struct fscache_object *object) +{ + struct fscache_operation *op; + + _enter("OBJ%x", object->debug_id); + + spin_lock(&object->lock); + + while (!list_empty(&object->pending_ops)) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + fscache_stat(&fscache_n_op_cancelled); + list_del_init(&op->pend_link); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->state = FSCACHE_OP_ST_CANCELLED; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + fscache_put_operation(op); + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); + _leave(""); +} + /* * Record the completion of an in-progress operation. */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index b38b13d2a555..7bf9d2557052 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -361,6 +361,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); @@ -483,6 +488,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(*nr_pages, >, 0); ASSERT(!list_empty(pages)); @@ -591,6 +601,11 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; @@ -730,6 +745,37 @@ superseded: _leave(""); } +/* + * Clear the pages pending writing for invalidation + */ +void fscache_invalidate_writes(struct fscache_cookie *cookie) +{ + struct page *page; + void *results[16]; + int n, i; + + _enter(""); + + while (spin_lock(&cookie->stores_lock), + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, + ARRAY_SIZE(results), + FSCACHE_COOKIE_PENDING_TAG), + n > 0) { + for (i = n - 1; i >= 0; i--) { + page = results[i]; + radix_tree_delete(&cookie->stores, page->index); + } + + spin_unlock(&cookie->stores_lock); + + for (i = n - 1; i >= 0; i--) + page_cache_release(results[i]); + } + + spin_unlock(&cookie->stores_lock); + _leave(""); +} + /* * request a page be stored in the cache * - returns: @@ -776,6 +822,11 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537f..51cdaee14109 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -80,6 +80,9 @@ atomic_t fscache_n_acquires_ok; atomic_t fscache_n_acquires_nobufs; atomic_t fscache_n_acquires_oom; +atomic_t fscache_n_invalidates; +atomic_t fscache_n_invalidates_run; + atomic_t fscache_n_updates; atomic_t fscache_n_updates_null; atomic_t fscache_n_updates_run; @@ -112,6 +115,7 @@ atomic_t fscache_n_cop_alloc_object; atomic_t fscache_n_cop_lookup_object; atomic_t fscache_n_cop_lookup_complete; atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_invalidate_object; atomic_t fscache_n_cop_update_object; atomic_t fscache_n_cop_drop_object; atomic_t fscache_n_cop_put_object; @@ -168,6 +172,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_object_created), atomic_read(&fscache_n_object_lookups_timed_out)); + seq_printf(m, "Invals : n=%u run=%u\n", + atomic_read(&fscache_n_invalidates), + atomic_read(&fscache_n_invalidates_run)); + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", atomic_read(&fscache_n_updates), atomic_read(&fscache_n_updates_null), @@ -246,7 +254,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_lookup_object), atomic_read(&fscache_n_cop_lookup_complete), atomic_read(&fscache_n_cop_grab_object)); - seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", + seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n", + atomic_read(&fscache_n_cop_invalidate_object), atomic_read(&fscache_n_cop_update_object), atomic_read(&fscache_n_cop_drop_object), atomic_read(&fscache_n_cop_put_object), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f5facd1d333f..1e454ad7a832 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -254,6 +254,9 @@ struct fscache_cache_ops { /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); + /* Invalidate an object */ + void (*invalidate_object)(struct fscache_operation *op); + /* discard the resources pinned by an object and effect retirement if * necessary */ void (*drop_object)(struct fscache_object *object); @@ -329,6 +332,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ #define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ +#define FSCACHE_COOKIE_INVALIDATING 7 /* T if cookie is being invalidated */ }; extern struct fscache_cookie fscache_fsdef_index; @@ -345,6 +349,7 @@ struct fscache_object { /* active states */ FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_INVALIDATING, /* object is invalidating */ FSCACHE_OBJECT_UPDATING, /* object is updating */ /* terminal states */ @@ -378,7 +383,8 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ +#define FSCACHE_OBJECT_EV_INVALIDATE 7 /* T if cache requested object invalidation */ +#define FSCACHE_OBJECT_EVENTS_MASK 0xff /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4b6353543bf..7a086235da4b 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); extern void __fscache_update_cookie(struct fscache_cookie *); extern int __fscache_attr_changed(struct fscache_cookie *); +extern void __fscache_invalidate(struct fscache_cookie *); +extern void __fscache_wait_on_invalidate(struct fscache_cookie *); extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie) return -ENOBUFS; } +/** + * fscache_invalidate - Notify cache that an object needs invalidation + * @cookie: The cookie representing the cache object + * + * Notify the cache that an object is needs to be invalidated and that it + * should abort any retrievals or stores it is doing on the cache. The object + * is then marked non-caching until such time as the invalidation is complete. + * + * This can be called with spinlocks held. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_invalidate(cookie); +} + +/** + * fscache_wait_on_invalidate - Wait for invalidation to complete + * @cookie: The cookie representing the cache object + * + * Wait for the invalidation of an object to complete. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_invalidate(cookie); +} + /** * fscache_reserve_space - Reserve data space for a cached object * @cookie: The cookie representing the cache object -- cgit v1.2.3-59-g8ed1b From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: VFS: Make more complete truncate operation available to CacheFiles Make a more complete truncate operation available to CacheFiles (including security checks and suchlike) so that it can use this to clear invalidated cache files. Signed-off-by: David Howells Acked-by: Al Viro --- fs/open.c | 50 +++++++++++++++++++++++++++----------------------- include/linux/fs.h | 1 + 2 files changed, 28 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/open.c b/fs/open.c index 182d8667b7bd..c819bbdab47f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return ret; } -static long do_sys_truncate(const char __user *pathname, loff_t length) +long vfs_truncate(struct path *path, loff_t length) { - struct path path; struct inode *inode; - int error; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; + long error; - error = user_path(pathname, &path); - if (error) - goto out; - inode = path.dentry->d_inode; + inode = path->dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ - error = -EISDIR; if (S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = -EINVAL; + return -EISDIR; if (!S_ISREG(inode->i_mode)) - goto dput_and_out; + return -EINVAL; - error = mnt_want_write(path.mnt); + error = mnt_want_write(path->mnt); if (error) - goto dput_and_out; + goto out; error = inode_permission(inode, MAY_WRITE); if (error) @@ -111,19 +100,34 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) - error = security_path_truncate(&path); + error = security_path_truncate(path); if (!error) - error = do_truncate(path.dentry, length, 0, NULL); + error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: - mnt_drop_write(path.mnt); -dput_and_out: - path_put(&path); + mnt_drop_write(path->mnt); out: return error; } +EXPORT_SYMBOL_GPL(vfs_truncate); + +static long do_sys_truncate(const char __user *pathname, loff_t length) +{ + struct path path; + int error; + + if (length < 0) /* sorry, but loff_t says... */ + return -EINVAL; + + error = user_path(pathname, &path); + if (!error) { + error = vfs_truncate(&path, length); + path_put(&path); + } + return error; +} SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..017a15b707e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1999,6 +1999,7 @@ struct filename { bool separate; /* should "name" be freed? */ }; +extern long vfs_truncate(struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int do_fallocate(struct file *file, int mode, loff_t offset, -- cgit v1.2.3-59-g8ed1b From 36a02de5d7981435931d4608ee3e510b752e072b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:46 +0000 Subject: FS-Cache: Convert the object event ID #defines into an enum Convert the fscache_object event IDs from #defines into an enum. Also add an extra label to the enum to carry the event count and redefine the event mask in terms of that. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 1e454ad7a832..73e68c8d5df4 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -337,6 +337,23 @@ struct fscache_cookie { extern struct fscache_cookie fscache_fsdef_index; +/* + * Event list for fscache_object::{event_mask,events} + */ +enum { + FSCACHE_OBJECT_EV_REQUEUE, /* T if object should be requeued */ + FSCACHE_OBJECT_EV_UPDATE, /* T if object should be updated */ + FSCACHE_OBJECT_EV_INVALIDATE, /* T if cache requested object invalidation */ + FSCACHE_OBJECT_EV_CLEARED, /* T if accessors all gone */ + FSCACHE_OBJECT_EV_ERROR, /* T if fatal error occurred during processing */ + FSCACHE_OBJECT_EV_RELEASE, /* T if netfs requested object release */ + FSCACHE_OBJECT_EV_RETIRE, /* T if netfs requested object retirement */ + FSCACHE_OBJECT_EV_WITHDRAW, /* T if cache requested object withdrawal */ + NR_FSCACHE_OBJECT_EVENTS +}; + +#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1) + /* * on-disk cache file or index handle */ @@ -376,15 +393,6 @@ struct fscache_object { unsigned long event_mask; /* events this object is interested in */ unsigned long events; /* events to be processed by this object * (order is important - using fls) */ -#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ -#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ -#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ -#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ -#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ -#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ -#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EV_INVALIDATE 7 /* T if cache requested object invalidation */ -#define FSCACHE_OBJECT_EVENTS_MASK 0xff /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ -- cgit v1.2.3-59-g8ed1b From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Dec 2012 20:03:13 +0000 Subject: FS-Cache: Mark cancellation of in-progress operation Mark as cancelled an operation that is in progress rather than pending at the time it is cancelled, and call fscache_complete_op() to cancel an operation so that blocked ops can be started. Signed-off-by: David Howells --- fs/cachefiles/interface.c | 2 +- fs/fscache/operation.c | 7 ++++--- fs/fscache/page.c | 10 +++++----- include/linux/fscache-cache.h | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 7a9d574b961c..746ce532e130 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -484,7 +484,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op) } } - fscache_op_complete(op); + fscache_op_complete(op, true); _leave(""); } diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 9e6b7d232bb1..36c59604130d 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -363,9 +363,9 @@ void fscache_cancel_all_ops(struct fscache_object *object) } /* - * Record the completion of an in-progress operation. + * Record the completion or cancellation of an in-progress operation. */ -void fscache_op_complete(struct fscache_operation *op) +void fscache_op_complete(struct fscache_operation *op, bool cancelled) { struct fscache_object *object = op->object; @@ -380,7 +380,8 @@ void fscache_op_complete(struct fscache_operation *op) spin_lock(&object->lock); - op->state = FSCACHE_OP_ST_COMPLETE; + op->state = cancelled ? + FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ef0218f5080d..8a92b9fabe83 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -171,7 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } - fscache_op_complete(op); + fscache_op_complete(op, true); _leave(""); } @@ -704,7 +704,7 @@ static void fscache_write_op(struct fscache_operation *_op) * exists, so we should just cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [inactive]"); return; } @@ -717,7 +717,7 @@ static void fscache_write_op(struct fscache_operation *_op) * cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", _op->flags, _op->state, object->state, object->flags); return; @@ -755,7 +755,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } else { fscache_enqueue_operation(&op->op); } @@ -770,7 +770,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); _leave(""); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 73e68c8d5df4..5dfa0aa216b6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); -extern void fscache_op_complete(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); /** @@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op, { op->n_pages -= n_pages; if (op->n_pages <= 0) - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } /** -- cgit v1.2.3-59-g8ed1b From d30357f2f0ec0bfb67fd39f8f76d22d02d78631e Mon Sep 17 00:00:00 2001 From: Marco Stornelli Date: Sat, 15 Dec 2012 11:59:20 +0100 Subject: vfs: drop vmtruncate Removed vmtruncate Signed-off-by: Marco Stornelli Signed-off-by: Al Viro --- fs/libfs.c | 2 -- include/linux/fs.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/fs/libfs.c b/fs/libfs.c index 35fc6e74cd88..916da8c4158b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -369,8 +369,6 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr) struct inode *inode = dentry->d_inode; int error; - WARN_ON_ONCE(inode->i_op->truncate); - error = inode_change_ok(inode, iattr); if (error) return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..a0c5ba57ffc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1565,7 +1565,6 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - void (*truncate) (struct inode *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); -- cgit v1.2.3-59-g8ed1b From 7898575fc81bd707ce0844cb06874d48e39bbe09 Mon Sep 17 00:00:00 2001 From: Marco Stornelli Date: Sat, 15 Dec 2012 12:00:02 +0100 Subject: mm: drop vmtruncate Removed vmtruncate Signed-off-by: Marco Stornelli Signed-off-by: Al Viro --- include/linux/mm.h | 1 - mm/truncate.c | 23 ----------------------- 2 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7f4f906190bd..63204078f72b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); -extern int vmtruncate(struct inode *inode, loff_t offset); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); diff --git a/mm/truncate.c b/mm/truncate.c index d51ce92d6e83..c75b736e54b7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -576,29 +576,6 @@ void truncate_setsize(struct inode *inode, loff_t newsize) } EXPORT_SYMBOL(truncate_setsize); -/** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @newsize: file offset to start truncating - * - * This function is deprecated and truncate_setsize or truncate_pagecache - * should be used instead, together with filesystem specific block truncation. - */ -int vmtruncate(struct inode *inode, loff_t newsize) -{ - int error; - - error = inode_newsize_ok(inode, newsize); - if (error) - return error; - - truncate_setsize(inode, newsize); - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -} -EXPORT_SYMBOL(vmtruncate); - /** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode -- cgit v1.2.3-59-g8ed1b From 471667391a92bf7bf2cd4ff31a3ad88e5dec934b Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Thu, 13 Dec 2012 12:22:39 +0100 Subject: vfs: Remove useless function prototypes Commit 8e22cc88d68ca1a46d7d582938f979eb640ed30f removes the (un)lock_super function definitions but forgets to remove their prototypes. Signed-off-by: Alessio Igor Bogani Signed-off-by: Al Viro --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a0c5ba57ffc5..05cd238ad941 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb) extern bool inode_owner_or_capable(const struct inode *inode); -/* not quite ready to be deprecated, but... */ -extern void lock_super(struct super_block *); -extern void unlock_super(struct super_block *); - /* * VFS helper functions.. */ -- cgit v1.2.3-59-g8ed1b From b9d6ba94b875192ef5e2dab92d72beea33b83c3d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 14:59:40 -0500 Subject: vfs: add a retry_estale helper function to handle retries on ESTALE This function is expected to be called from path-based syscalls to help them decide whether to try the lookup and call again in the event that they got an -ESTALE return back on an earier try. Currently, we only retry the call once on an ESTALE error, but in the event that we decide that that's not enough in the future, we should be able to change the logic in this helper without too much effort. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/namei.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 4bf19d8174ed..66542b644804 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) ((char *) name)[min(len, maxlen)] = '\0'; } +/** + * retry_estale - determine whether the caller should retry an operation + * @error: the error that would currently be returned + * @flags: flags being used for next lookup attempt + * + * Check to see if the error code was -ESTALE, and then determine whether + * to retry the call based on whether "flags" already has LOOKUP_REVAL set. + * + * Returns true if the caller should try the operation again. + */ +static inline bool +retry_estale(const long error, const unsigned int flags) +{ + return error == -ESTALE && !(flags & LOOKUP_REVAL); +} + #endif /* _LINUX_NAMEI_H */ -- cgit v1.2.3-59-g8ed1b From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:06 -0500 Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags passed in here are currently ignored. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- drivers/base/devtmpfs.c | 2 +- fs/namei.c | 21 ++++++++++++++++----- include/linux/namei.h | 4 ++-- 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 5b7d8ffbf890..baee994fe810 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -66,7 +66,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, struct dentry *dentry; int ret; - dentry = user_path_create(AT_FDCWD, pathname, &path, 1); + dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); ret = PTR_ERR(dentry); if (!IS_ERR(dentry)) { ret = spufs_create(&path, dentry, flags, mode, neighbor); diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 147d1a4dd269..17cf7cad601e 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -148,7 +148,7 @@ static int dev_mkdir(const char *name, umode_t mode) struct path path; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, 1); + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/namei.c b/fs/namei.c index 25a41e02984b..8f8e41f6eb52 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3030,12 +3030,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, return file; } -struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir) +struct dentry *kern_path_create(int dfd, const char *pathname, + struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; int err2; - int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); + int error; + bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); + + /* + * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any + * other flags passed in are ignored! + */ + lookup_flags &= LOOKUP_REVAL; + + error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd); if (error) return ERR_PTR(error); @@ -3099,13 +3109,14 @@ void done_path_create(struct path *path, struct dentry *dentry) } EXPORT_SYMBOL(done_path_create); -struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) +struct dentry *user_path_create(int dfd, const char __user *pathname, + struct path *path, unsigned int lookup_flags) { struct filename *tmp = getname(pathname); struct dentry *res; if (IS_ERR(tmp)) return ERR_CAST(tmp); - res = kern_path_create(dfd, tmp->name, path, is_dir); + res = kern_path_create(dfd, tmp->name, path, lookup_flags); putname(tmp); return res; } @@ -3228,7 +3239,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) struct path path; int error; - dentry = user_path_create(dfd, pathname, &path, 1); + dentry = user_path_create(dfd, pathname, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/include/linux/namei.h b/include/linux/namei.h index 66542b644804..e998c030061d 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, extern int kern_path(const char *, unsigned, struct path *); -extern struct dentry *kern_path_create(int, const char *, struct path *, int); -extern struct dentry *user_path_create(int, const char __user *, struct path *, int); +extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); +extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, -- cgit v1.2.3-59-g8ed1b