From 06b3db1b9bccdc8c2c743122a89745279e5ecc46 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:36 +0100 Subject: FS-Cache: Add main configuration option, module entry points and debugging Add the main configuration option, allowing FS-Cache to be selected; the module entry and exit functions and the debugging stuff used by these patches. The two configuration options added are: CONFIG_FSCACHE CONFIG_FSCACHE_DEBUG The first enables the facility, and the second makes the debugging statements enableable through the "debug" module parameter. The value of this parameter is a bitmask as described in: Documentation/filesystems/caching/fscache.txt The module can be loaded at this point, but all it will do at this point in the patch series is to start up the slow work facility and shut it down again. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/internal.h | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 fs/fscache/internal.h (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h new file mode 100644 index 000000000000..95dc92da7152 --- /dev/null +++ b/fs/fscache/internal.h @@ -0,0 +1,165 @@ +/* Internal definitions for FS-Cache + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Lock order, in the order in which multiple locks should be obtained: + * - fscache_addremove_sem + * - cookie->lock + * - cookie->parent->lock + * - cache->object_list_lock + * - object->lock + * - object->parent->lock + * - fscache_thread_lock + * + */ + +#include +#include + +#define FSCACHE_MIN_THREADS 4 +#define FSCACHE_MAX_THREADS 32 + +/* + * fsc-main.c + */ +extern unsigned fscache_defer_lookup; +extern unsigned fscache_defer_create; +extern unsigned fscache_debug; +extern struct kobject *fscache_root; + +/*****************************************************************************/ +/* + * debug tracing + */ +#define dbgprintk(FMT, ...) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + +/* make sure we maintain the format strings, even when debugging is disabled */ +static inline __attribute__((format(printf, 1, 2))) +void _dbprintk(const char *fmt, ...) +{ +} + +#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) + +#define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) + +#ifdef __KDEBUG +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_FSCACHE_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (__do_kdebug(ENTER)) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (__do_kdebug(LEAVE)) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (__do_kdebug(DEBUG)) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) +#endif + +/* + * determine whether a particular optional debugging point should be logged + * - we need to go through three steps to persuade cpp to correctly join the + * shorthand in FSCACHE_DEBUG_LEVEL with its prefix + */ +#define ____do_kdebug(LEVEL, POINT) \ + unlikely((fscache_debug & \ + (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) +#define ___do_kdebug(LEVEL, POINT) \ + ____do_kdebug(LEVEL, POINT) +#define __do_kdebug(POINT) \ + ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) + +#define FSCACHE_DEBUG_CACHE 0 +#define FSCACHE_DEBUG_COOKIE 1 +#define FSCACHE_DEBUG_PAGE 2 +#define FSCACHE_DEBUG_OPERATION 3 + +#define FSCACHE_POINT_ENTER 1 +#define FSCACHE_POINT_LEAVE 2 +#define FSCACHE_POINT_DEBUG 4 + +#ifndef FSCACHE_DEBUG_LEVEL +#define FSCACHE_DEBUG_LEVEL CACHE +#endif + +/* + * assertions + */ +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif /* assert or not */ -- cgit v1.2.3-59-g8ed1b From 7394daa8c61dfda4baa687f133748fa0b599b017 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:37 +0100 Subject: FS-Cache: Add use of /proc and presentation of statistics Make FS-Cache create its /proc interface and present various statistical information through it. Also provide the functions for updating this information. These features are enabled by: CONFIG_FSCACHE_PROC CONFIG_FSCACHE_STATS CONFIG_FSCACHE_HISTOGRAM The /proc directory for FS-Cache is also exported so that caching modules can add their own statistics there too. The FS-Cache module is loadable at this point, and the statistics files can be examined by userspace: cat /proc/fs/fscache/stats cat /proc/fs/fscache/histogram Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- Documentation/filesystems/caching/backend-api.txt | 6 - Documentation/filesystems/caching/fscache.txt | 12 +- fs/fscache/Kconfig | 34 ++++ fs/fscache/Makefile | 4 + fs/fscache/histogram.c | 109 +++++++++++ fs/fscache/internal.h | 127 +++++++++++++ fs/fscache/main.c | 7 + fs/fscache/proc.c | 68 +++++++ fs/fscache/stats.c | 212 ++++++++++++++++++++++ include/linux/fscache-cache.h | 4 - 10 files changed, 566 insertions(+), 17 deletions(-) create mode 100644 fs/fscache/histogram.c create mode 100644 fs/fscache/proc.c create mode 100644 fs/fscache/stats.c (limited to 'fs/fscache/internal.h') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 17723053aa91..382d52cdaf2d 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -100,12 +100,6 @@ A sysfs directory called /sys/fs/fscache// is created if CONFIG_SYSFS is enabled. This is accessible through the kobject struct fscache_cache::kobj and is for use by the cache as it sees fit. -The cache driver may create itself a directory named for the cache type in the -/proc/fs/fscache/ directory. This is available if CONFIG_FSCACHE_PROC is -enabled and is accessible through: - - struct proc_dir_entry *proc_fscache; - ======================== RELEVANT DATA STRUCTURES diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index a759d916273e..0a751f3c2c70 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -195,7 +195,6 @@ STATISTICAL INFORMATION If FS-Cache is compiled with the following options enabled: - CONFIG_FSCACHE_PROC=y (implied by the following two) CONFIG_FSCACHE_STATS=y CONFIG_FSCACHE_HISTOGRAM=y @@ -275,7 +274,7 @@ proc files. (*) /proc/fs/fscache/histogram cat /proc/fs/fscache/histogram - +HZ +TIME OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS + JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS ===== ===== ========= ========= ========= ========= ========= This shows the breakdown of the number of times each amount of time @@ -291,16 +290,16 @@ proc files. RETRIEVLS Time between beginning and end of a retrieval Each row shows the number of events that took a particular range of times. - Each step is 1 jiffy in size. The +HZ column indicates the particular - jiffy range covered, and the +TIME field the equivalent number of seconds. + Each step is 1 jiffy in size. The JIFS column indicates the particular + jiffy range covered, and the SECS field the equivalent number of seconds. ========= DEBUGGING ========= -The FS-Cache facility can have runtime debugging enabled by adjusting the value -in: +If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime +debugging enabled by adjusting the value in: /sys/module/fscache/parameters/debug @@ -327,4 +326,3 @@ the control file. For example: echo $((1|8|64)) >/sys/module/fscache/parameters/debug will turn on all function entry debugging. - diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 7c7bccd5eee4..9bbb8ce7bea0 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -11,6 +11,40 @@ config FSCACHE See Documentation/filesystems/caching/fscache.txt for more information. +config FSCACHE_STATS + bool "Gather statistical information on local caching" + depends on FSCACHE && PROC_FS + help + This option causes statistical information to be gathered on local + caching and exported through file: + + /proc/fs/fscache/stats + + The gathering of statistics adds a certain amount of overhead to + execution as there are a quite a few stats gathered, and on a + multi-CPU system these may be on cachelines that keep bouncing + between CPUs. On the other hand, the stats are very useful for + debugging purposes. Saying 'Y' here is recommended. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config FSCACHE_HISTOGRAM + bool "Gather latency information on local caching" + depends on FSCACHE && PROC_FS + help + This option causes latency information to be gathered on local + caching and exported through file: + + /proc/fs/fscache/histogram + + The generation of this histogram adds a certain amount of overhead to + execution as there are a number of points at which data is gathered, + and on a multi-CPU system these may be on cachelines that keep + bouncing between CPUs. On the other hand, the histogram may be + useful for debugging purposes. Saying 'N' here is recommended. + + See Documentation/filesystems/caching/fscache.txt for more information. + config FSCACHE_DEBUG bool "Debug FS-Cache" depends on FSCACHE diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index f8038b83e0ef..1384823a160c 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -5,4 +5,8 @@ fscache-y := \ main.o +fscache-$(CONFIG_PROC_FS) += proc.o +fscache-$(CONFIG_FSCACHE_STATS) += stats.o +fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o + obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c new file mode 100644 index 000000000000..bad496748a59 --- /dev/null +++ b/fs/fscache/histogram.c @@ -0,0 +1,109 @@ +/* FS-Cache latency histogram + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL THREAD +#include +#include +#include +#include "internal.h" + +atomic_t fscache_obj_instantiate_histogram[HZ]; +atomic_t fscache_objs_histogram[HZ]; +atomic_t fscache_ops_histogram[HZ]; +atomic_t fscache_retrieval_delay_histogram[HZ]; +atomic_t fscache_retrieval_histogram[HZ]; + +/* + * display the time-taken histogram + */ +static int fscache_histogram_show(struct seq_file *m, void *v) +{ + unsigned long index; + unsigned n[5], t; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS " + " RETRV DLY RETRIEVLS\n"); + return 0; + case 2: + seq_puts(m, "===== ===== ========= ========= =========" + " ========= =========\n"); + return 0; + default: + index = (unsigned long) v - 3; + n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]); + n[1] = atomic_read(&fscache_ops_histogram[index]); + n[2] = atomic_read(&fscache_objs_histogram[index]); + n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]); + n[4] = atomic_read(&fscache_retrieval_histogram[index]); + if (!(n[0] | n[1] | n[2] | n[3] | n[4])) + return 0; + + t = (index * 1000) / HZ; + + seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n", + index, t, n[0], n[1], n[2], n[3], n[4]); + return 0; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos) +{ + if ((unsigned long long)*_pos >= HZ + 2) + return NULL; + if (*_pos == 0) + *_pos = 1; + return (void *)(unsigned long) *_pos; +} + +/* + * move to the next line + */ +static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (unsigned long long)*pos > HZ + 2 ? + NULL : (void *)(unsigned long) *pos; +} + +/* + * clean up after reading + */ +static void fscache_histogram_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations fscache_histogram_ops = { + .start = fscache_histogram_start, + .stop = fscache_histogram_stop, + .next = fscache_histogram_next, + .show = fscache_histogram_show, +}; + +/* + * open "/proc/fs/fscache/histogram" to provide latency data + */ +static int fscache_histogram_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fscache_histogram_ops); +} + +const struct file_operations fscache_histogram_fops = { + .owner = THIS_MODULE, + .open = fscache_histogram_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 95dc92da7152..16f9f1f46e4d 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -27,6 +27,30 @@ #define FSCACHE_MIN_THREADS 4 #define FSCACHE_MAX_THREADS 32 +/* + * fsc-histogram.c + */ +#ifdef CONFIG_FSCACHE_HISTOGRAM +extern atomic_t fscache_obj_instantiate_histogram[HZ]; +extern atomic_t fscache_objs_histogram[HZ]; +extern atomic_t fscache_ops_histogram[HZ]; +extern atomic_t fscache_retrieval_delay_histogram[HZ]; +extern atomic_t fscache_retrieval_histogram[HZ]; + +static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) +{ + unsigned long jif = jiffies - start_jif; + if (jif >= HZ) + jif = HZ - 1; + atomic_inc(&histogram[jif]); +} + +extern const struct file_operations fscache_histogram_fops; + +#else +#define fscache_hist(hist, start_jif) do {} while (0) +#endif + /* * fsc-main.c */ @@ -35,6 +59,109 @@ extern unsigned fscache_defer_create; extern unsigned fscache_debug; extern struct kobject *fscache_root; +/* + * fsc-proc.c + */ +#ifdef CONFIG_PROC_FS +extern int __init fscache_proc_init(void); +extern void fscache_proc_cleanup(void); +#else +#define fscache_proc_init() (0) +#define fscache_proc_cleanup() do {} while (0) +#endif + +/* + * fsc-stats.c + */ +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; +extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS]; + +extern atomic_t fscache_n_op_pend; +extern atomic_t fscache_n_op_run; +extern atomic_t fscache_n_op_enqueue; +extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_release; +extern atomic_t fscache_n_op_gc; + +extern atomic_t fscache_n_attr_changed; +extern atomic_t fscache_n_attr_changed_ok; +extern atomic_t fscache_n_attr_changed_nobufs; +extern atomic_t fscache_n_attr_changed_nomem; +extern atomic_t fscache_n_attr_changed_calls; + +extern atomic_t fscache_n_allocs; +extern atomic_t fscache_n_allocs_ok; +extern atomic_t fscache_n_allocs_wait; +extern atomic_t fscache_n_allocs_nobufs; +extern atomic_t fscache_n_alloc_ops; +extern atomic_t fscache_n_alloc_op_waits; + +extern atomic_t fscache_n_retrievals; +extern atomic_t fscache_n_retrievals_ok; +extern atomic_t fscache_n_retrievals_wait; +extern atomic_t fscache_n_retrievals_nodata; +extern atomic_t fscache_n_retrievals_nobufs; +extern atomic_t fscache_n_retrievals_intr; +extern atomic_t fscache_n_retrievals_nomem; +extern atomic_t fscache_n_retrieval_ops; +extern atomic_t fscache_n_retrieval_op_waits; + +extern atomic_t fscache_n_stores; +extern atomic_t fscache_n_stores_ok; +extern atomic_t fscache_n_stores_again; +extern atomic_t fscache_n_stores_nobufs; +extern atomic_t fscache_n_stores_oom; +extern atomic_t fscache_n_store_ops; +extern atomic_t fscache_n_store_calls; + +extern atomic_t fscache_n_marks; +extern atomic_t fscache_n_uncaches; + +extern atomic_t fscache_n_acquires; +extern atomic_t fscache_n_acquires_null; +extern atomic_t fscache_n_acquires_no_cache; +extern atomic_t fscache_n_acquires_ok; +extern atomic_t fscache_n_acquires_nobufs; +extern atomic_t fscache_n_acquires_oom; + +extern atomic_t fscache_n_updates; +extern atomic_t fscache_n_updates_null; +extern atomic_t fscache_n_updates_run; + +extern atomic_t fscache_n_relinquishes; +extern atomic_t fscache_n_relinquishes_null; +extern atomic_t fscache_n_relinquishes_waitcrt; + +extern atomic_t fscache_n_cookie_index; +extern atomic_t fscache_n_cookie_data; +extern atomic_t fscache_n_cookie_special; + +extern atomic_t fscache_n_object_alloc; +extern atomic_t fscache_n_object_no_alloc; +extern atomic_t fscache_n_object_lookups; +extern atomic_t fscache_n_object_lookups_negative; +extern atomic_t fscache_n_object_lookups_positive; +extern atomic_t fscache_n_object_created; +extern atomic_t fscache_n_object_avail; +extern atomic_t fscache_n_object_dead; + +extern atomic_t fscache_n_checkaux_none; +extern atomic_t fscache_n_checkaux_okay; +extern atomic_t fscache_n_checkaux_update; +extern atomic_t fscache_n_checkaux_obsolete; + +static inline void fscache_stat(atomic_t *stat) +{ + atomic_inc(stat); +} + +extern const struct file_operations fscache_stats_fops; +#else + +#define fscache_stat(stat) do {} while (0) +#endif + /*****************************************************************************/ /* * debug tracing diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 76f7c69079c0..7c734b7fb18e 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -52,9 +52,15 @@ static int __init fscache_init(void) if (ret < 0) goto error_slow_work; + ret = fscache_proc_init(); + if (ret < 0) + goto error_proc; + printk(KERN_NOTICE "FS-Cache: Loaded\n"); return 0; +error_proc: + slow_work_unregister_user(); error_slow_work: return ret; } @@ -68,6 +74,7 @@ static void __exit fscache_exit(void) { _enter(""); + fscache_proc_cleanup(); slow_work_unregister_user(); printk(KERN_NOTICE "FS-Cache: Unloaded\n"); } diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c new file mode 100644 index 000000000000..beeab44bc31a --- /dev/null +++ b/fs/fscache/proc.c @@ -0,0 +1,68 @@ +/* FS-Cache statistics viewing interface + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL OPERATION +#include +#include +#include +#include "internal.h" + +/* + * initialise the /proc/fs/fscache/ directory + */ +int __init fscache_proc_init(void) +{ + _enter(""); + + if (!proc_mkdir("fs/fscache", NULL)) + goto error_dir; + +#ifdef CONFIG_FSCACHE_STATS + if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, + &fscache_stats_fops)) + goto error_stats; +#endif + +#ifdef CONFIG_FSCACHE_HISTOGRAM + if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, + &fscache_histogram_fops)) + goto error_histogram; +#endif + + _leave(" = 0"); + return 0; + +#ifdef CONFIG_FSCACHE_HISTOGRAM +error_histogram: +#endif +#ifdef CONFIG_FSCACHE_STATS + remove_proc_entry("fs/fscache/stats", NULL); +error_stats: +#endif + remove_proc_entry("fs/fscache", NULL); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/fscache/ directory + */ +void fscache_proc_cleanup(void) +{ +#ifdef CONFIG_FSCACHE_HISTOGRAM + remove_proc_entry("fs/fscache/histogram", NULL); +#endif +#ifdef CONFIG_FSCACHE_STATS + remove_proc_entry("fs/fscache/stats", NULL); +#endif + remove_proc_entry("fs/fscache", NULL); +} diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c new file mode 100644 index 000000000000..65deb99e756b --- /dev/null +++ b/fs/fscache/stats.c @@ -0,0 +1,212 @@ +/* FS-Cache statistics + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL THREAD +#include +#include +#include +#include "internal.h" + +/* + * operation counters + */ +atomic_t fscache_n_op_pend; +atomic_t fscache_n_op_run; +atomic_t fscache_n_op_enqueue; +atomic_t fscache_n_op_requeue; +atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_release; +atomic_t fscache_n_op_gc; + +atomic_t fscache_n_attr_changed; +atomic_t fscache_n_attr_changed_ok; +atomic_t fscache_n_attr_changed_nobufs; +atomic_t fscache_n_attr_changed_nomem; +atomic_t fscache_n_attr_changed_calls; + +atomic_t fscache_n_allocs; +atomic_t fscache_n_allocs_ok; +atomic_t fscache_n_allocs_wait; +atomic_t fscache_n_allocs_nobufs; +atomic_t fscache_n_alloc_ops; +atomic_t fscache_n_alloc_op_waits; + +atomic_t fscache_n_retrievals; +atomic_t fscache_n_retrievals_ok; +atomic_t fscache_n_retrievals_wait; +atomic_t fscache_n_retrievals_nodata; +atomic_t fscache_n_retrievals_nobufs; +atomic_t fscache_n_retrievals_intr; +atomic_t fscache_n_retrievals_nomem; +atomic_t fscache_n_retrieval_ops; +atomic_t fscache_n_retrieval_op_waits; + +atomic_t fscache_n_stores; +atomic_t fscache_n_stores_ok; +atomic_t fscache_n_stores_again; +atomic_t fscache_n_stores_nobufs; +atomic_t fscache_n_stores_oom; +atomic_t fscache_n_store_ops; +atomic_t fscache_n_store_calls; + +atomic_t fscache_n_marks; +atomic_t fscache_n_uncaches; + +atomic_t fscache_n_acquires; +atomic_t fscache_n_acquires_null; +atomic_t fscache_n_acquires_no_cache; +atomic_t fscache_n_acquires_ok; +atomic_t fscache_n_acquires_nobufs; +atomic_t fscache_n_acquires_oom; + +atomic_t fscache_n_updates; +atomic_t fscache_n_updates_null; +atomic_t fscache_n_updates_run; + +atomic_t fscache_n_relinquishes; +atomic_t fscache_n_relinquishes_null; +atomic_t fscache_n_relinquishes_waitcrt; + +atomic_t fscache_n_cookie_index; +atomic_t fscache_n_cookie_data; +atomic_t fscache_n_cookie_special; + +atomic_t fscache_n_object_alloc; +atomic_t fscache_n_object_no_alloc; +atomic_t fscache_n_object_lookups; +atomic_t fscache_n_object_lookups_negative; +atomic_t fscache_n_object_lookups_positive; +atomic_t fscache_n_object_created; +atomic_t fscache_n_object_avail; +atomic_t fscache_n_object_dead; + +atomic_t fscache_n_checkaux_none; +atomic_t fscache_n_checkaux_okay; +atomic_t fscache_n_checkaux_update; +atomic_t fscache_n_checkaux_obsolete; + +/* + * display the general statistics + */ +static int fscache_stats_show(struct seq_file *m, void *v) +{ + seq_puts(m, "FS-Cache statistics\n"); + + seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n", + atomic_read(&fscache_n_cookie_index), + atomic_read(&fscache_n_cookie_data), + atomic_read(&fscache_n_cookie_special)); + + seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n", + atomic_read(&fscache_n_object_alloc), + atomic_read(&fscache_n_object_no_alloc), + atomic_read(&fscache_n_object_avail), + atomic_read(&fscache_n_object_dead)); + seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n", + atomic_read(&fscache_n_checkaux_none), + atomic_read(&fscache_n_checkaux_okay), + atomic_read(&fscache_n_checkaux_update), + atomic_read(&fscache_n_checkaux_obsolete)); + + seq_printf(m, "Pages : mrk=%u unc=%u\n", + atomic_read(&fscache_n_marks), + atomic_read(&fscache_n_uncaches)); + + seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u" + " oom=%u\n", + atomic_read(&fscache_n_acquires), + atomic_read(&fscache_n_acquires_null), + atomic_read(&fscache_n_acquires_no_cache), + atomic_read(&fscache_n_acquires_ok), + atomic_read(&fscache_n_acquires_nobufs), + atomic_read(&fscache_n_acquires_oom)); + + seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", + atomic_read(&fscache_n_object_lookups), + atomic_read(&fscache_n_object_lookups_negative), + atomic_read(&fscache_n_object_lookups_positive), + atomic_read(&fscache_n_object_created)); + + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", + atomic_read(&fscache_n_updates), + atomic_read(&fscache_n_updates_null), + atomic_read(&fscache_n_updates_run)); + + seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", + atomic_read(&fscache_n_relinquishes), + atomic_read(&fscache_n_relinquishes_null), + atomic_read(&fscache_n_relinquishes_waitcrt)); + + seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", + atomic_read(&fscache_n_attr_changed), + atomic_read(&fscache_n_attr_changed_ok), + atomic_read(&fscache_n_attr_changed_nobufs), + atomic_read(&fscache_n_attr_changed_nomem), + atomic_read(&fscache_n_attr_changed_calls)); + + seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", + atomic_read(&fscache_n_allocs), + atomic_read(&fscache_n_allocs_ok), + atomic_read(&fscache_n_allocs_wait), + atomic_read(&fscache_n_allocs_nobufs)); + seq_printf(m, "Allocs : ops=%u owt=%u\n", + atomic_read(&fscache_n_alloc_ops), + atomic_read(&fscache_n_alloc_op_waits)); + + seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" + " int=%u oom=%u\n", + atomic_read(&fscache_n_retrievals), + atomic_read(&fscache_n_retrievals_ok), + atomic_read(&fscache_n_retrievals_wait), + atomic_read(&fscache_n_retrievals_nodata), + atomic_read(&fscache_n_retrievals_nobufs), + atomic_read(&fscache_n_retrievals_intr), + atomic_read(&fscache_n_retrievals_nomem)); + seq_printf(m, "Retrvls: ops=%u owt=%u\n", + atomic_read(&fscache_n_retrieval_ops), + atomic_read(&fscache_n_retrieval_op_waits)); + + seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", + atomic_read(&fscache_n_stores), + atomic_read(&fscache_n_stores_ok), + atomic_read(&fscache_n_stores_again), + atomic_read(&fscache_n_stores_nobufs), + atomic_read(&fscache_n_stores_oom)); + seq_printf(m, "Stores : ops=%u run=%u\n", + atomic_read(&fscache_n_store_ops), + atomic_read(&fscache_n_store_calls)); + + seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", + atomic_read(&fscache_n_op_pend), + atomic_read(&fscache_n_op_run), + atomic_read(&fscache_n_op_enqueue)); + seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", + atomic_read(&fscache_n_op_deferred_release), + atomic_read(&fscache_n_op_release), + atomic_read(&fscache_n_op_gc)); + return 0; +} + +/* + * open "/proc/fs/fscache/stats" allowing provision of a statistical summary + */ +static int fscache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, fscache_stats_show, NULL); +} + +const struct file_operations fscache_stats_fops = { + .owner = THIS_MODULE, + .open = fscache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index b2a9a484c4cf..84d3532dd3ea 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -29,10 +29,6 @@ struct fscache_cache_ops; struct fscache_object; struct fscache_operation; -#ifdef CONFIG_FSCACHE_PROC -extern struct proc_dir_entry *proc_fscache; -#endif - /* * cache tag definition */ -- cgit v1.2.3-59-g8ed1b From a6891645cf2ddd4778096848a864580e7258faba Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:37 +0100 Subject: FS-Cache: Root index definition Add a description of the root index of the cache for later patches to make use of. The root index is owned by FS-Cache itself. When a netfs requests caching facilities, FS-Cache will, if one doesn't already exist, create an entry in the root index with the key being the name of the netfs ("AFS" for example), and the auxiliary data holding the index structure version supplied by the netfs: FSDEF | +-----------+ | | NFS AFS [v=1] [v=1] If an entry with the appropriate name does already exist, the version is compared. If the version is different, the entire subtree from that entry will be discarded and a new entry created. The new entry will be an index, and a cookie referring to it will be passed to the netfs. This is then the root handle by which the netfs accesses the cache. It can create whatever objects it likes in that index, including further indices. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/Makefile | 1 + fs/fscache/fsdef.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fscache/internal.h | 6 +++ 3 files changed, 151 insertions(+) create mode 100644 fs/fscache/fsdef.c (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 1384823a160c..bc1f3b9d811a 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -3,6 +3,7 @@ # fscache-y := \ + fsdef.o \ main.o fscache-$(CONFIG_PROC_FS) += proc.o diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c new file mode 100644 index 000000000000..f5b4baee7352 --- /dev/null +++ b/fs/fscache/fsdef.c @@ -0,0 +1,144 @@ +/* Filesystem index definition + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL CACHE +#include +#include "internal.h" + +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static +enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + +/* + * The root index is owned by FS-Cache itself. + * + * When a netfs requests caching facilities, FS-Cache will, if one doesn't + * already exist, create an entry in the root index with the key being the name + * of the netfs ("AFS" for example), and the auxiliary data holding the index + * structure version supplied by the netfs: + * + * FSDEF + * | + * +-----------+ + * | | + * NFS AFS + * [v=1] [v=1] + * + * If an entry with the appropriate name does already exist, the version is + * compared. If the version is different, the entire subtree from that entry + * will be discarded and a new entry created. + * + * The new entry will be an index, and a cookie referring to it will be passed + * to the netfs. This is then the root handle by which the netfs accesses the + * cache. It can create whatever objects it likes in that index, including + * further indices. + */ +static struct fscache_cookie_def fscache_fsdef_index_def = { + .name = ".FS-Cache", + .type = FSCACHE_COOKIE_TYPE_INDEX, +}; + +struct fscache_cookie fscache_fsdef_index = { + .usage = ATOMIC_INIT(1), + .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), + .backing_objects = HLIST_HEAD_INIT, + .def = &fscache_fsdef_index_def, +}; +EXPORT_SYMBOL(fscache_fsdef_index); + +/* + * Definition of an entry in the root index. Each entry is an index, keyed to + * a specific netfs and only applicable to a particular version of the index + * structure used by that netfs. + */ +struct fscache_cookie_def fscache_fsdef_netfs_def = { + .name = "FSDEF.netfs", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = fscache_fsdef_netfs_get_key, + .get_aux = fscache_fsdef_netfs_get_aux, + .check_aux = fscache_fsdef_netfs_check_aux, +}; + +/* + * get the key data for an FSDEF index record - this is the name of the netfs + * for which this entry is created + */ +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned klen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + klen = strlen(netfs->name); + if (klen > bufmax) + return 0; + + memcpy(buffer, netfs->name, klen); + return klen; +} + +/* + * get the auxiliary data for an FSDEF index record - this is the index + * structure version number of the netfs for which this version is created + */ +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned dlen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + dlen = sizeof(uint32_t); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &netfs->version, dlen); + return dlen; +} + +/* + * check that the index structure version number stored in the auxiliary data + * matches the one the netfs gave us + */ +static enum fscache_checkaux fscache_fsdef_netfs_check_aux( + void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct fscache_netfs *netfs = cookie_netfs_data; + uint32_t version; + + _enter("{%s},,%hu", netfs->name, datalen); + + if (datalen != sizeof(version)) { + _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version)); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + memcpy(&version, data, sizeof(version)); + if (version != netfs->version) { + _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; +} diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 16f9f1f46e4d..4113af8d1660 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -27,6 +27,12 @@ #define FSCACHE_MIN_THREADS 4 #define FSCACHE_MAX_THREADS 32 +/* + * fsc-fsdef.c + */ +extern struct fscache_cookie fscache_fsdef_index; +extern struct fscache_cookie_def fscache_fsdef_netfs_def; + /* * fsc-histogram.c */ -- cgit v1.2.3-59-g8ed1b From 0e04d4cefcf4d8fbbdb2c50e93ad541582933fd2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:37 +0100 Subject: FS-Cache: Add cache tag handling Implement two features of FS-Cache: (1) The ability to request and release cache tags - names by which a cache may be known to a netfs, and thus selected for use. (2) An internal function by which a cache is selected by consulting the netfs, if the netfs wishes to be consulted. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/Makefile | 1 + fs/fscache/cache.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/fscache/internal.h | 20 ++++++ include/linux/fscache.h | 9 ++- 4 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 fs/fscache/cache.c (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index bc1f3b9d811a..556708bb9796 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -3,6 +3,7 @@ # fscache-y := \ + cache.o \ fsdef.o \ main.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c new file mode 100644 index 000000000000..1a28df36dd93 --- /dev/null +++ b/fs/fscache/cache.c @@ -0,0 +1,166 @@ +/* FS-Cache cache handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL CACHE +#include +#include +#include "internal.h" + +LIST_HEAD(fscache_cache_list); +DECLARE_RWSEM(fscache_addremove_sem); + +static LIST_HEAD(fscache_cache_tag_list); + +/* + * look up a cache tag + */ +struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name) +{ + struct fscache_cache_tag *tag, *xtag; + + /* firstly check for the existence of the tag under read lock */ + down_read(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_read(&fscache_addremove_sem); + return tag; + } + } + + up_read(&fscache_addremove_sem); + + /* the tag does not exist - create a candidate */ + xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL); + if (!xtag) + /* return a dummy tag if out of memory */ + return ERR_PTR(-ENOMEM); + + atomic_set(&xtag->usage, 1); + strcpy(xtag->name, name); + + /* write lock, search again and add if still not present */ + down_write(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_write(&fscache_addremove_sem); + kfree(xtag); + return tag; + } + } + + list_add_tail(&xtag->link, &fscache_cache_tag_list); + up_write(&fscache_addremove_sem); + return xtag; +} + +/* + * release a reference to a cache tag + */ +void __fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ + if (tag != ERR_PTR(-ENOMEM)) { + down_write(&fscache_addremove_sem); + + if (atomic_dec_and_test(&tag->usage)) + list_del_init(&tag->link); + else + tag = NULL; + + up_write(&fscache_addremove_sem); + + kfree(tag); + } +} + +/* + * select a cache in which to store an object + * - the cache addremove semaphore must be at least read-locked by the caller + * - the object will never be an index + */ +struct fscache_cache *fscache_select_cache_for_object( + struct fscache_cookie *cookie) +{ + struct fscache_cache_tag *tag; + struct fscache_object *object; + struct fscache_cache *cache; + + _enter(""); + + if (list_empty(&fscache_cache_list)) { + _leave(" = NULL [no cache]"); + return NULL; + } + + /* we check the parent to determine the cache to use */ + spin_lock(&cookie->lock); + + /* the first in the parent's backing list should be the preferred + * cache */ + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + cache = object->cache; + if (object->state >= FSCACHE_OBJECT_DYING || + test_bit(FSCACHE_IOERROR, &cache->flags)) + cache = NULL; + + spin_unlock(&cookie->lock); + _leave(" = %p [parent]", cache); + return cache; + } + + /* the parent is unbacked */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + /* cookie not an index and is unbacked */ + spin_unlock(&cookie->lock); + _leave(" = NULL [cookie ub,ni]"); + return NULL; + } + + spin_unlock(&cookie->lock); + + if (!cookie->def->select_cache) + goto no_preference; + + /* ask the netfs for its preference */ + tag = cookie->def->select_cache(cookie->parent->netfs_data, + cookie->netfs_data); + if (!tag) + goto no_preference; + + if (tag == ERR_PTR(-ENOMEM)) { + _leave(" = NULL [nomem tag]"); + return NULL; + } + + if (!tag->cache) { + _leave(" = NULL [unbacked tag]"); + return NULL; + } + + if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) + return NULL; + + _leave(" = %p [specific]", tag->cache); + return tag->cache; + +no_preference: + /* netfs has no preference - just select first cache */ + cache = list_entry(fscache_cache_list.next, + struct fscache_cache, link); + _leave(" = %p [first]", cache); + return cache; +} diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 4113af8d1660..0a2069afa417 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -27,6 +27,15 @@ #define FSCACHE_MIN_THREADS 4 #define FSCACHE_MAX_THREADS 32 +/* + * fsc-cache.c + */ +extern struct list_head fscache_cache_list; +extern struct rw_semaphore fscache_addremove_sem; + +extern struct fscache_cache *fscache_select_cache_for_object( + struct fscache_cookie *); + /* * fsc-fsdef.c */ @@ -168,6 +177,17 @@ extern const struct file_operations fscache_stats_fops; #define fscache_stat(stat) do {} while (0) #endif +/* + * raise an event on an object + * - if the event is not masked for that object, then the object is + * queued for attention by the thread pool. + */ +static inline void fscache_raise_event(struct fscache_object *object, + unsigned event) +{ + BUG(); // TODO +} + /*****************************************************************************/ /* * debug tracing diff --git a/include/linux/fscache.h b/include/linux/fscache.h index feb3b0e0af4d..9584c094d69f 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -173,6 +173,8 @@ struct fscache_netfs { * - these are undefined symbols when FS-Cache is not configured and the * optimiser takes care of not using them */ +extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); +extern void __fscache_release_cache_tag(struct fscache_cache_tag *); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -218,7 +220,10 @@ void fscache_unregister_netfs(struct fscache_netfs *netfs) static inline struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) { - return NULL; + if (fscache_available()) + return __fscache_lookup_cache_tag(name); + else + return NULL; } /** @@ -233,6 +238,8 @@ struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) static inline void fscache_release_cache_tag(struct fscache_cache_tag *tag) { + if (fscache_available()) + __fscache_release_cache_tag(tag); } /** -- cgit v1.2.3-59-g8ed1b From 955d00917f0c094e0f2fb88df967e980ab66b8ca Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:38 +0100 Subject: FS-Cache: Provide a slab for cookie allocation Provide a slab from which can be allocated the FS-Cache cookies that will be presented to the netfs. Also provide a slab constructor and a function to recursively discard a cookie and its ancestor chain. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/Makefile | 1 + fs/fscache/cookie.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fscache/internal.h | 8 ++++++++ fs/fscache/main.c | 15 ++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 fs/fscache/cookie.c (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 556708bb9796..f88ac1764ce3 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -4,6 +4,7 @@ fscache-y := \ cache.o \ + cookie.o \ fsdef.o \ main.o diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c new file mode 100644 index 000000000000..47fd75b832e1 --- /dev/null +++ b/fs/fscache/cookie.c @@ -0,0 +1,56 @@ +/* netfs cookie management + * + * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include +#include +#include "internal.h" + +struct kmem_cache *fscache_cookie_jar; + +/* + * initialise an cookie jar slab element prior to any use + */ +void fscache_cookie_init_once(void *_cookie) +{ + struct fscache_cookie *cookie = _cookie; + + memset(cookie, 0, sizeof(*cookie)); + spin_lock_init(&cookie->lock); + INIT_HLIST_HEAD(&cookie->backing_objects); +} + +/* + * destroy a cookie + */ +void __fscache_cookie_put(struct fscache_cookie *cookie) +{ + struct fscache_cookie *parent; + + _enter("%p", cookie); + + for (;;) { + _debug("FREE COOKIE %p", cookie); + parent = cookie->parent; + BUG_ON(!hlist_empty(&cookie->backing_objects)); + kmem_cache_free(fscache_cookie_jar, cookie); + + if (!parent) + break; + + cookie = parent; + BUG_ON(atomic_read(&cookie->usage) <= 0); + if (!atomic_dec_and_test(&cookie->usage)) + break; + } + + _leave(""); +} diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 0a2069afa417..4c6ba561e966 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -36,6 +36,14 @@ extern struct rw_semaphore fscache_addremove_sem; extern struct fscache_cache *fscache_select_cache_for_object( struct fscache_cookie *); +/* + * fsc-cookie.c + */ +extern struct kmem_cache *fscache_cookie_jar; + +extern void fscache_cookie_init_once(void *); +extern void __fscache_cookie_put(struct fscache_cookie *); + /* * fsc-fsdef.c */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c index c2f3e637725d..48b79d2deac1 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -56,6 +56,18 @@ static int __init fscache_init(void) if (ret < 0) goto error_proc; + fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", + sizeof(struct fscache_cookie), + 0, + 0, + fscache_cookie_init_once); + if (!fscache_cookie_jar) { + printk(KERN_NOTICE + "FS-Cache: Failed to allocate a cookie jar\n"); + ret = -ENOMEM; + goto error_cookie_jar; + } + fscache_root = kobject_create_and_add("fscache", kernel_kobj); if (!fscache_root) goto error_kobj; @@ -64,6 +76,8 @@ static int __init fscache_init(void) return 0; error_kobj: + kmem_cache_destroy(fscache_cookie_jar); +error_cookie_jar: fscache_proc_cleanup(); error_proc: slow_work_unregister_user(); @@ -81,6 +95,7 @@ static void __exit fscache_exit(void) _enter(""); kobject_put(fscache_root); + kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); slow_work_unregister_user(); printk(KERN_NOTICE "FS-Cache: Unloaded\n"); -- cgit v1.2.3-59-g8ed1b From 2868cbea72dc89ae0eb17693596b1dedaafff1c5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:38 +0100 Subject: FS-Cache: Bit waiting helpers Add helpers for use with wait_on_bit(). Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/internal.h | 3 +++ fs/fscache/main.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 4c6ba561e966..16389942a54e 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -82,6 +82,9 @@ extern unsigned fscache_defer_create; extern unsigned fscache_debug; extern struct kobject *fscache_root; +extern int fscache_wait_bit(void *); +extern int fscache_wait_bit_interruptible(void *); + /* * fsc-proc.c */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 48b79d2deac1..4de41b597499 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -102,3 +102,23 @@ static void __exit fscache_exit(void) } module_exit(fscache_exit); + +/* + * wait_on_bit() sleep function for uninterruptible waiting + */ +int fscache_wait_bit(void *flags) +{ + schedule(); + return 0; +} +EXPORT_SYMBOL(fscache_wait_bit); + +/* + * wait_on_bit() sleep function for interruptible waiting + */ +int fscache_wait_bit_interruptible(void *flags) +{ + schedule(); + return signal_pending(current); +} +EXPORT_SYMBOL(fscache_wait_bit_interruptible); -- cgit v1.2.3-59-g8ed1b From 36c9559022850f919269564a74bf17fdabf4bb30 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:38 +0100 Subject: FS-Cache: Object management state machine Implement the cache object management state machine. The following documentation is added to illuminate the working of this state machine. It will also be added as: Documentation/filesystems/caching/object.txt ==================================================== IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT ==================================================== ============== REPRESENTATION ============== FS-Cache maintains an in-kernel representation of each object that a netfs is currently interested in. Such objects are represented by the fscache_cookie struct and are referred to as cookies. FS-Cache also maintains a separate in-kernel representation of the objects that a cache backend is currently actively caching. Such objects are represented by the fscache_object struct. The cache backends allocate these upon request, and are expected to embed them in their own representations. These are referred to as objects. There is a 1:N relationship between cookies and objects. A cookie may be represented by multiple objects - an index may exist in more than one cache - or even by no objects (it may not be cached). Furthermore, both cookies and objects are hierarchical. The two hierarchies correspond, but the cookies tree is a superset of the union of the object trees of multiple caches: NETFS INDEX TREE : CACHE 1 : CACHE 2 : : : +-----------+ : +----------->| IObject | : +-----------+ | : +-----------+ : | ICookie |-------+ : | : +-----------+ | : | : +-----------+ | +------------------------------>| IObject | | : | : +-----------+ | : V : | | : +-----------+ : | V +----------->| IObject | : | +-----------+ | : +-----------+ : | | ICookie |-------+ : | : V +-----------+ | : | : +-----------+ | +------------------------------>| IObject | +-----+-----+ : | : +-----------+ | | : | : | V | : V : | +-----------+ | : +-----------+ : | | ICookie |------------------------->| IObject | : | +-----------+ | : +-----------+ : | | V : | : V | +-----------+ : | : +-----------+ | | ICookie |-------------------------------->| IObject | | +-----------+ : | : +-----------+ V | : V : | +-----------+ | : +-----------+ : | | DCookie |------------------------->| DObject | : | +-----------+ | : +-----------+ : | | : : | +-------+-------+ : : | | | : : | V V : : V +-----------+ +-----------+ : : +-----------+ | DCookie | | DCookie |------------------------>| DObject | +-----------+ +-----------+ : : +-----------+ : : In the above illustration, ICookie and IObject represent indices and DCookie and DObject represent data storage objects. Indices may have representation in multiple caches, but currently, non-index objects may not. Objects of any type may also be entirely unrepresented. As far as the netfs API goes, the netfs is only actually permitted to see pointers to the cookies. The cookies themselves and any objects attached to those cookies are hidden from it. =============================== OBJECT MANAGEMENT STATE MACHINE =============================== Within FS-Cache, each active object is managed by its own individual state machine. The state for an object is kept in the fscache_object struct, in object->state. A cookie may point to a set of objects that are in different states. Each state has an action associated with it that is invoked when the machine wakes up in that state. There are four logical sets of states: (1) Preparation: states that wait for the parent objects to become ready. The representations are hierarchical, and it is expected that an object must be created or accessed with respect to its parent object. (2) Initialisation: states that perform lookups in the cache and validate what's found and that create on disk any missing metadata. (3) Normal running: states that allow netfs operations on objects to proceed and that update the state of objects. (4) Termination: states that detach objects from their netfs cookies, that delete objects from disk, that handle disk and system errors and that free up in-memory resources. In most cases, transitioning between states is in response to signalled events. When a state has finished processing, it will usually set the mask of events in which it is interested (object->event_mask) and relinquish the worker thread. Then when an event is raised (by calling fscache_raise_event()), if the event is not masked, the object will be queued for processing (by calling fscache_enqueue_object()). PROVISION OF CPU TIME --------------------- The work to be done by the various states is given CPU time by the threads of the slow work facility (see Documentation/slow-work.txt). This is used in preference to the workqueue facility because: (1) Threads may be completely occupied for very long periods of time by a particular work item. These state actions may be doing sequences of synchronous, journalled disk accesses (lookup, mkdir, create, setxattr, getxattr, truncate, unlink, rmdir, rename). (2) Threads may do little actual work, but may rather spend a lot of time sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded workqueues don't necessarily have the right numbers of threads. LOCKING SIMPLIFICATION ---------------------- Because only one worker thread may be operating on any particular object's state machine at once, this simplifies the locking, particularly with respect to disconnecting the netfs's representation of a cache object (fscache_cookie) from the cache backend's representation (fscache_object) - which may be requested from either end. ================= THE SET OF STATES ================= The object state machine has a set of states that it can be in. There are preparation states in which the object sets itself up and waits for its parent object to transit to a state that allows access to its children: (1) State FSCACHE_OBJECT_INIT. Initialise the object and wait for the parent object to become active. In the cache, it is expected that it will not be possible to look an object up from the parent object, until that parent object itself has been looked up. There are initialisation states in which the object sets itself up and accesses disk for the object metadata: (2) State FSCACHE_OBJECT_LOOKING_UP. Look up the object on disk, using the parent as a starting point. FS-Cache expects the cache backend to probe the cache to see whether this object is represented there, and if it is, to see if it's valid (coherency management). The cache should call fscache_object_lookup_negative() to indicate lookup failure for whatever reason, and should call fscache_obtained_object() to indicate success. At the completion of lookup, FS-Cache will let the netfs go ahead with read operations, no matter whether the file is yet cached. If not yet cached, read operations will be immediately rejected with ENODATA until the first known page is uncached - as to that point there can be no data to be read out of the cache for that file that isn't currently also held in the pagecache. (3) State FSCACHE_OBJECT_CREATING. Create an object on disk, using the parent as a starting point. This happens if the lookup failed to find the object, or if the object's coherency data indicated what's on disk is out of date. In this state, FS-Cache expects the cache to create The cache should call fscache_obtained_object() if creation completes successfully, fscache_object_lookup_negative() otherwise. At the completion of creation, FS-Cache will start processing write operations the netfs has queued for an object. If creation failed, the write ops will be transparently discarded, and nothing recorded in the cache. There are some normal running states in which the object spends its time servicing netfs requests: (4) State FSCACHE_OBJECT_AVAILABLE. A transient state in which pending operations are started, child objects are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary lookup data is freed. (5) State FSCACHE_OBJECT_ACTIVE. The normal running state. In this state, requests the netfs makes will be passed on to the cache. (6) State FSCACHE_OBJECT_UPDATING. The state machine comes here to update the object in the cache from the netfs's records. This involves updating the auxiliary data that is used to maintain coherency. And there are terminal states in which an object cleans itself up, deallocates memory and potentially deletes stuff from disk: (7) State FSCACHE_OBJECT_LC_DYING. The object comes here if it is dying because of a lookup or creation error. This would be due to a disk error or system error of some sort. Temporary data is cleaned up, and the parent is released. (8) State FSCACHE_OBJECT_DYING. The object comes here if it is dying due to an error, because its parent cookie has been relinquished by the netfs or because the cache is being withdrawn. Any child objects waiting on this one are given CPU time so that they too can destroy themselves. This object waits for all its children to go away before advancing to the next state. (9) State FSCACHE_OBJECT_ABORT_INIT. The object comes to this state if it was waiting on its parent in FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself so that the parent may proceed from the FSCACHE_OBJECT_DYING state. (10) State FSCACHE_OBJECT_RELEASING. (11) State FSCACHE_OBJECT_RECYCLING. The object comes to one of these two states when dying once it is rid of all its children, if it is dying because the netfs relinquished its cookie. In the first state, the cached data is expected to persist, and in the second it will be deleted. (12) State FSCACHE_OBJECT_WITHDRAWING. The object transits to this state if the cache decides it wants to withdraw the object from service, perhaps to make space, but also due to error or just because the whole cache is being withdrawn. (13) State FSCACHE_OBJECT_DEAD. The object transits to this state when the in-memory object record is ready to be deleted. The object processor shouldn't ever see an object in this state. THE SET OF EVENTS ----------------- There are a number of events that can be raised to an object state machine: (*) FSCACHE_OBJECT_EV_UPDATE The netfs requested that an object be updated. The state machine will ask the cache backend to update the object, and the cache backend will ask the netfs for details of the change through its cookie definition ops. (*) FSCACHE_OBJECT_EV_CLEARED This is signalled in two circumstances: (a) when an object's last child object is dropped and (b) when the last operation outstanding on an object is completed. This is used to proceed from the dying state. (*) FSCACHE_OBJECT_EV_ERROR This is signalled when an I/O error occurs during the processing of some object. (*) FSCACHE_OBJECT_EV_RELEASE (*) FSCACHE_OBJECT_EV_RETIRE These are signalled when the netfs relinquishes a cookie it was using. The event selected depends on whether the netfs asks for the backing object to be retired (deleted) or retained. (*) FSCACHE_OBJECT_EV_WITHDRAW This is signalled when the cache backend wants to withdraw an object. This means that the object will have to be detached from the netfs's cookie. Because the withdrawing releasing/retiring events are all handled by the object state machine, it doesn't matter if there's a collision with both ends trying to sever the connection at the same time. The state machine can just pick which one it wants to honour, and that effects the other. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- Documentation/filesystems/caching/fscache.txt | 5 + Documentation/filesystems/caching/object.txt | 313 ++++++++++ fs/fscache/Makefile | 3 +- fs/fscache/internal.h | 26 +- fs/fscache/object.c | 810 ++++++++++++++++++++++++++ 5 files changed, 1155 insertions(+), 2 deletions(-) create mode 100644 Documentation/filesystems/caching/object.txt create mode 100644 fs/fscache/object.c (limited to 'fs/fscache/internal.h') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 0a751f3c2c70..9e94b9491d89 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -188,6 +188,11 @@ The cache backend API to FS-Cache can be found in: Documentation/filesystems/caching/backend-api.txt +A description of the internal representations and object state machine can be +found in: + + Documentation/filesystems/caching/object.txt + ======================= STATISTICAL INFORMATION diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt new file mode 100644 index 000000000000..e8b0a35d8fe5 --- /dev/null +++ b/Documentation/filesystems/caching/object.txt @@ -0,0 +1,313 @@ + ==================================================== + IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT + ==================================================== + +By: David Howells + +Contents: + + (*) Representation + + (*) Object management state machine. + + - Provision of cpu time. + - Locking simplification. + + (*) The set of states. + + (*) The set of events. + + +============== +REPRESENTATION +============== + +FS-Cache maintains an in-kernel representation of each object that a netfs is +currently interested in. Such objects are represented by the fscache_cookie +struct and are referred to as cookies. + +FS-Cache also maintains a separate in-kernel representation of the objects that +a cache backend is currently actively caching. Such objects are represented by +the fscache_object struct. The cache backends allocate these upon request, and +are expected to embed them in their own representations. These are referred to +as objects. + +There is a 1:N relationship between cookies and objects. A cookie may be +represented by multiple objects - an index may exist in more than one cache - +or even by no objects (it may not be cached). + +Furthermore, both cookies and objects are hierarchical. The two hierarchies +correspond, but the cookies tree is a superset of the union of the object trees +of multiple caches: + + NETFS INDEX TREE : CACHE 1 : CACHE 2 + : : + : +-----------+ : + +----------->| IObject | : + +-----------+ | : +-----------+ : + | ICookie |-------+ : | : + +-----------+ | : | : +-----------+ + | +------------------------------>| IObject | + | : | : +-----------+ + | : V : | + | : +-----------+ : | + V +----------->| IObject | : | + +-----------+ | : +-----------+ : | + | ICookie |-------+ : | : V + +-----------+ | : | : +-----------+ + | +------------------------------>| IObject | + +-----+-----+ : | : +-----------+ + | | : | : | + V | : V : | + +-----------+ | : +-----------+ : | + | ICookie |------------------------->| IObject | : | + +-----------+ | : +-----------+ : | + | V : | : V + | +-----------+ : | : +-----------+ + | | ICookie |-------------------------------->| IObject | + | +-----------+ : | : +-----------+ + V | : V : | + +-----------+ | : +-----------+ : | + | DCookie |------------------------->| DObject | : | + +-----------+ | : +-----------+ : | + | : : | + +-------+-------+ : : | + | | : : | + V V : : V + +-----------+ +-----------+ : : +-----------+ + | DCookie | | DCookie |------------------------>| DObject | + +-----------+ +-----------+ : : +-----------+ + : : + +In the above illustration, ICookie and IObject represent indices and DCookie +and DObject represent data storage objects. Indices may have representation in +multiple caches, but currently, non-index objects may not. Objects of any type +may also be entirely unrepresented. + +As far as the netfs API goes, the netfs is only actually permitted to see +pointers to the cookies. The cookies themselves and any objects attached to +those cookies are hidden from it. + + +=============================== +OBJECT MANAGEMENT STATE MACHINE +=============================== + +Within FS-Cache, each active object is managed by its own individual state +machine. The state for an object is kept in the fscache_object struct, in +object->state. A cookie may point to a set of objects that are in different +states. + +Each state has an action associated with it that is invoked when the machine +wakes up in that state. There are four logical sets of states: + + (1) Preparation: states that wait for the parent objects to become ready. The + representations are hierarchical, and it is expected that an object must + be created or accessed with respect to its parent object. + + (2) Initialisation: states that perform lookups in the cache and validate + what's found and that create on disk any missing metadata. + + (3) Normal running: states that allow netfs operations on objects to proceed + and that update the state of objects. + + (4) Termination: states that detach objects from their netfs cookies, that + delete objects from disk, that handle disk and system errors and that free + up in-memory resources. + + +In most cases, transitioning between states is in response to signalled events. +When a state has finished processing, it will usually set the mask of events in +which it is interested (object->event_mask) and relinquish the worker thread. +Then when an event is raised (by calling fscache_raise_event()), if the event +is not masked, the object will be queued for processing (by calling +fscache_enqueue_object()). + + +PROVISION OF CPU TIME +--------------------- + +The work to be done by the various states is given CPU time by the threads of +the slow work facility (see Documentation/slow-work.txt). This is used in +preference to the workqueue facility because: + + (1) Threads may be completely occupied for very long periods of time by a + particular work item. These state actions may be doing sequences of + synchronous, journalled disk accesses (lookup, mkdir, create, setxattr, + getxattr, truncate, unlink, rmdir, rename). + + (2) Threads may do little actual work, but may rather spend a lot of time + sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded + workqueues don't necessarily have the right numbers of threads. + + +LOCKING SIMPLIFICATION +---------------------- + +Because only one worker thread may be operating on any particular object's +state machine at once, this simplifies the locking, particularly with respect +to disconnecting the netfs's representation of a cache object (fscache_cookie) +from the cache backend's representation (fscache_object) - which may be +requested from either end. + + +================= +THE SET OF STATES +================= + +The object state machine has a set of states that it can be in. There are +preparation states in which the object sets itself up and waits for its parent +object to transit to a state that allows access to its children: + + (1) State FSCACHE_OBJECT_INIT. + + Initialise the object and wait for the parent object to become active. In + the cache, it is expected that it will not be possible to look an object + up from the parent object, until that parent object itself has been looked + up. + +There are initialisation states in which the object sets itself up and accesses +disk for the object metadata: + + (2) State FSCACHE_OBJECT_LOOKING_UP. + + Look up the object on disk, using the parent as a starting point. + FS-Cache expects the cache backend to probe the cache to see whether this + object is represented there, and if it is, to see if it's valid (coherency + management). + + The cache should call fscache_object_lookup_negative() to indicate lookup + failure for whatever reason, and should call fscache_obtained_object() to + indicate success. + + At the completion of lookup, FS-Cache will let the netfs go ahead with + read operations, no matter whether the file is yet cached. If not yet + cached, read operations will be immediately rejected with ENODATA until + the first known page is uncached - as to that point there can be no data + to be read out of the cache for that file that isn't currently also held + in the pagecache. + + (3) State FSCACHE_OBJECT_CREATING. + + Create an object on disk, using the parent as a starting point. This + happens if the lookup failed to find the object, or if the object's + coherency data indicated what's on disk is out of date. In this state, + FS-Cache expects the cache to create + + The cache should call fscache_obtained_object() if creation completes + successfully, fscache_object_lookup_negative() otherwise. + + At the completion of creation, FS-Cache will start processing write + operations the netfs has queued for an object. If creation failed, the + write ops will be transparently discarded, and nothing recorded in the + cache. + +There are some normal running states in which the object spends its time +servicing netfs requests: + + (4) State FSCACHE_OBJECT_AVAILABLE. + + A transient state in which pending operations are started, child objects + are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary + lookup data is freed. + + (5) State FSCACHE_OBJECT_ACTIVE. + + The normal running state. In this state, requests the netfs makes will be + passed on to the cache. + + (6) State FSCACHE_OBJECT_UPDATING. + + The state machine comes here to update the object in the cache from the + netfs's records. This involves updating the auxiliary data that is used + to maintain coherency. + +And there are terminal states in which an object cleans itself up, deallocates +memory and potentially deletes stuff from disk: + + (7) State FSCACHE_OBJECT_LC_DYING. + + The object comes here if it is dying because of a lookup or creation + error. This would be due to a disk error or system error of some sort. + Temporary data is cleaned up, and the parent is released. + + (8) State FSCACHE_OBJECT_DYING. + + The object comes here if it is dying due to an error, because its parent + cookie has been relinquished by the netfs or because the cache is being + withdrawn. + + Any child objects waiting on this one are given CPU time so that they too + can destroy themselves. This object waits for all its children to go away + before advancing to the next state. + + (9) State FSCACHE_OBJECT_ABORT_INIT. + + The object comes to this state if it was waiting on its parent in + FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself + so that the parent may proceed from the FSCACHE_OBJECT_DYING state. + +(10) State FSCACHE_OBJECT_RELEASING. +(11) State FSCACHE_OBJECT_RECYCLING. + + The object comes to one of these two states when dying once it is rid of + all its children, if it is dying because the netfs relinquished its + cookie. In the first state, the cached data is expected to persist, and + in the second it will be deleted. + +(12) State FSCACHE_OBJECT_WITHDRAWING. + + The object transits to this state if the cache decides it wants to + withdraw the object from service, perhaps to make space, but also due to + error or just because the whole cache is being withdrawn. + +(13) State FSCACHE_OBJECT_DEAD. + + The object transits to this state when the in-memory object record is + ready to be deleted. The object processor shouldn't ever see an object in + this state. + + +THE SET OF EVENTS +----------------- + +There are a number of events that can be raised to an object state machine: + + (*) FSCACHE_OBJECT_EV_UPDATE + + The netfs requested that an object be updated. The state machine will ask + the cache backend to update the object, and the cache backend will ask the + netfs for details of the change through its cookie definition ops. + + (*) FSCACHE_OBJECT_EV_CLEARED + + This is signalled in two circumstances: + + (a) when an object's last child object is dropped and + + (b) when the last operation outstanding on an object is completed. + + This is used to proceed from the dying state. + + (*) FSCACHE_OBJECT_EV_ERROR + + This is signalled when an I/O error occurs during the processing of some + object. + + (*) FSCACHE_OBJECT_EV_RELEASE + (*) FSCACHE_OBJECT_EV_RETIRE + + These are signalled when the netfs relinquishes a cookie it was using. + The event selected depends on whether the netfs asks for the backing + object to be retired (deleted) or retained. + + (*) FSCACHE_OBJECT_EV_WITHDRAW + + This is signalled when the cache backend wants to withdraw an object. + This means that the object will have to be detached from the netfs's + cookie. + +Because the withdrawing releasing/retiring events are all handled by the object +state machine, it doesn't matter if there's a collision with both ends trying +to sever the connection at the same time. The state machine can just pick +which one it wants to honour, and that effects the other. diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index ecf6946eaeb3..4420ac6ea10d 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -7,7 +7,8 @@ fscache-y := \ cookie.o \ fsdef.o \ main.o \ - netfs.o + netfs.o \ + object.o fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 16389942a54e..529f4de328c2 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -85,6 +85,18 @@ extern struct kobject *fscache_root; extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); +/* + * fsc-object.c + */ +extern void fscache_withdrawing_object(struct fscache_cache *, + struct fscache_object *); +extern void fscache_enqueue_object(struct fscache_object *); + +/* + * fsc-operation.c + */ +#define fscache_start_operations(obj) BUG() + /* * fsc-proc.c */ @@ -196,7 +208,19 @@ extern const struct file_operations fscache_stats_fops; static inline void fscache_raise_event(struct fscache_object *object, unsigned event) { - BUG(); // TODO + if (!test_and_set_bit(event, &object->events) && + test_bit(event, &object->event_mask)) + fscache_enqueue_object(object); +} + +/* + * drop a reference to a cookie + */ +static inline void fscache_cookie_put(struct fscache_cookie *cookie) +{ + BUG_ON(atomic_read(&cookie->usage) <= 0); + if (atomic_dec_and_test(&cookie->usage)) + __fscache_cookie_put(cookie); } /*****************************************************************************/ diff --git a/fs/fscache/object.c b/fs/fscache/object.c new file mode 100644 index 000000000000..392a41b1b79d --- /dev/null +++ b/fs/fscache/object.c @@ -0,0 +1,810 @@ +/* FS-Cache object state machine handler + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * See Documentation/filesystems/caching/object.txt for a description of the + * object state machine and the in-kernel representations. + */ + +#define FSCACHE_DEBUG_LEVEL COOKIE +#include +#include "internal.h" + +const char *fscache_object_states[] = { + [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", + [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", + [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", + [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", + [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", + [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", + [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", + [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", + [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", + [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", + [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", + [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", +}; +EXPORT_SYMBOL(fscache_object_states); + +static void fscache_object_slow_work_put_ref(struct slow_work *); +static int fscache_object_slow_work_get_ref(struct slow_work *); +static void fscache_object_slow_work_execute(struct slow_work *); +static void fscache_initialise_object(struct fscache_object *); +static void fscache_lookup_object(struct fscache_object *); +static void fscache_object_available(struct fscache_object *); +static void fscache_release_object(struct fscache_object *); +static void fscache_withdraw_object(struct fscache_object *); +static void fscache_enqueue_dependents(struct fscache_object *); +static void fscache_dequeue_object(struct fscache_object *); + +const struct slow_work_ops fscache_object_slow_work_ops = { + .get_ref = fscache_object_slow_work_get_ref, + .put_ref = fscache_object_slow_work_put_ref, + .execute = fscache_object_slow_work_execute, +}; +EXPORT_SYMBOL(fscache_object_slow_work_ops); + +/* + * we need to notify the parent when an op completes that we had outstanding + * upon it + */ +static inline void fscache_done_parent_op(struct fscache_object *object) +{ + struct fscache_object *parent = object->parent; + + _enter("OBJ%x {OBJ%x,%x}", + object->debug_id, parent->debug_id, parent->n_ops); + + spin_lock_nested(&parent->lock, 1); + parent->n_ops--; + parent->n_obj_ops--; + if (parent->n_ops == 0) + fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); + spin_unlock(&parent->lock); +} + +/* + * process events that have been sent to an object's state machine + * - initiates parent lookup + * - does object lookup + * - does object creation + * - does object recycling and retirement + * - does object withdrawal + */ +static void fscache_object_state_machine(struct fscache_object *object) +{ + enum fscache_object_state new_state; + + ASSERT(object != NULL); + + _enter("{OBJ%x,%s,%lx}", + object->debug_id, fscache_object_states[object->state], + object->events); + + switch (object->state) { + /* wait for the parent object to become ready */ + case FSCACHE_OBJECT_INIT: + object->event_mask = + ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + fscache_initialise_object(object); + goto done; + + /* look up the object metadata on disk */ + case FSCACHE_OBJECT_LOOKING_UP: + fscache_lookup_object(object); + goto lookup_transit; + + /* create the object metadata on disk */ + case FSCACHE_OBJECT_CREATING: + fscache_lookup_object(object); + goto lookup_transit; + + /* handle an object becoming available; start pending + * operations and queue dependent operations for processing */ + case FSCACHE_OBJECT_AVAILABLE: + fscache_object_available(object); + goto active_transit; + + /* normal running state */ + case FSCACHE_OBJECT_ACTIVE: + goto active_transit; + + /* update the object metadata on disk */ + case FSCACHE_OBJECT_UPDATING: + clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); + fscache_stat(&fscache_n_updates_run); + object->cache->ops->update_object(object); + goto active_transit; + + /* handle an object dying during lookup or creation */ + case FSCACHE_OBJECT_LC_DYING: + object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); + object->cache->ops->lookup_complete(object); + + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DYING; + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_CREATING); + spin_unlock(&object->lock); + + fscache_done_parent_op(object); + + /* wait for completion of all active operations on this object + * and the death of all child objects of this object */ + case FSCACHE_OBJECT_DYING: + dying: + clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); + spin_lock(&object->lock); + _debug("dying OBJ%x {%d,%d}", + object->debug_id, object->n_ops, object->n_children); + if (object->n_ops == 0 && object->n_children == 0) { + object->event_mask &= + ~(1 << FSCACHE_OBJECT_EV_CLEARED); + object->event_mask |= + (1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR); + } else { + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + object->event_mask |= + 1 << FSCACHE_OBJECT_EV_CLEARED; + } + spin_unlock(&object->lock); + fscache_enqueue_dependents(object); + goto terminal_transit; + + /* handle an abort during initialisation */ + case FSCACHE_OBJECT_ABORT_INIT: + _debug("handle abort init %lx", object->events); + object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); + + spin_lock(&object->lock); + fscache_dequeue_object(object); + + object->state = FSCACHE_OBJECT_DYING; + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, + &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_CREATING); + spin_unlock(&object->lock); + goto dying; + + /* handle the netfs releasing an object and possibly marking it + * obsolete too */ + case FSCACHE_OBJECT_RELEASING: + case FSCACHE_OBJECT_RECYCLING: + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + fscache_release_object(object); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DEAD; + spin_unlock(&object->lock); + fscache_stat(&fscache_n_object_dead); + goto terminal_transit; + + /* handle the parent cache of this object being withdrawn from + * active service */ + case FSCACHE_OBJECT_WITHDRAWING: + object->event_mask &= + ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_ERROR)); + fscache_withdraw_object(object); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_DEAD; + spin_unlock(&object->lock); + fscache_stat(&fscache_n_object_dead); + goto terminal_transit; + + /* complain about the object being woken up once it is + * deceased */ + case FSCACHE_OBJECT_DEAD: + printk(KERN_ERR "FS-Cache:" + " Unexpected event in dead state %lx\n", + object->events & object->event_mask); + BUG(); + + default: + printk(KERN_ERR "FS-Cache: Unknown object state %u\n", + object->state); + BUG(); + } + + /* determine the transition from a lookup state */ +lookup_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + case FSCACHE_OBJECT_EV_RETIRE: + case FSCACHE_OBJECT_EV_RELEASE: + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_LC_DYING; + goto change_state; + case FSCACHE_OBJECT_EV_REQUEUE: + goto done; + case -1: + goto done; /* sleep until event */ + default: + goto unsupported_event; + } + + /* determine the transition from an active state */ +active_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + case FSCACHE_OBJECT_EV_RETIRE: + case FSCACHE_OBJECT_EV_RELEASE: + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_DYING; + goto change_state; + case FSCACHE_OBJECT_EV_UPDATE: + new_state = FSCACHE_OBJECT_UPDATING; + goto change_state; + case -1: + new_state = FSCACHE_OBJECT_ACTIVE; + goto change_state; /* sleep until event */ + default: + goto unsupported_event; + } + + /* determine the transition from a terminal state */ +terminal_transit: + switch (fls(object->events & object->event_mask) - 1) { + case FSCACHE_OBJECT_EV_WITHDRAW: + new_state = FSCACHE_OBJECT_WITHDRAWING; + goto change_state; + case FSCACHE_OBJECT_EV_RETIRE: + new_state = FSCACHE_OBJECT_RECYCLING; + goto change_state; + case FSCACHE_OBJECT_EV_RELEASE: + new_state = FSCACHE_OBJECT_RELEASING; + goto change_state; + case FSCACHE_OBJECT_EV_ERROR: + new_state = FSCACHE_OBJECT_WITHDRAWING; + goto change_state; + case FSCACHE_OBJECT_EV_CLEARED: + new_state = FSCACHE_OBJECT_DYING; + goto change_state; + case -1: + goto done; /* sleep until event */ + default: + goto unsupported_event; + } + +change_state: + spin_lock(&object->lock); + object->state = new_state; + spin_unlock(&object->lock); + +done: + _leave(" [->%s]", fscache_object_states[object->state]); + return; + +unsupported_event: + printk(KERN_ERR "FS-Cache:" + " Unsupported event %lx [mask %lx] in state %s\n", + object->events, object->event_mask, + fscache_object_states[object->state]); + BUG(); +} + +/* + * execute an object + */ +static void fscache_object_slow_work_execute(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + unsigned long start; + + _enter("{OBJ%x}", object->debug_id); + + clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + + start = jiffies; + fscache_object_state_machine(object); + fscache_hist(fscache_objs_histogram, start); + if (object->events & object->event_mask) + fscache_enqueue_object(object); +} + +/* + * initialise an object + * - check the specified object's parent to see if we can make use of it + * immediately to do a creation + * - we may need to start the process of creating a parent and we need to wait + * for the parent's lookup and creation to complete if it's not there yet + * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the + * leaf-most cookies of the object and all its children + */ +static void fscache_initialise_object(struct fscache_object *object) +{ + struct fscache_object *parent; + + _enter(""); + ASSERT(object->cookie != NULL); + ASSERT(object->cookie->parent != NULL); + ASSERT(list_empty(&object->work.link)); + + if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | + (1 << FSCACHE_OBJECT_EV_RELEASE) | + (1 << FSCACHE_OBJECT_EV_RETIRE) | + (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { + _debug("abort init %lx", object->events); + spin_lock(&object->lock); + object->state = FSCACHE_OBJECT_ABORT_INIT; + spin_unlock(&object->lock); + return; + } + + spin_lock(&object->cookie->lock); + spin_lock_nested(&object->cookie->parent->lock, 1); + + parent = object->parent; + if (!parent) { + _debug("no parent"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + } else { + spin_lock(&object->lock); + spin_lock_nested(&parent->lock, 1); + _debug("parent %s", fscache_object_states[parent->state]); + + if (parent->state >= FSCACHE_OBJECT_DYING) { + _debug("bad parent"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { + _debug("wait"); + + /* we may get woken up in this state by child objects + * binding on to us, so we need to make sure we don't + * add ourself to the list multiple times */ + if (list_empty(&object->dep_link)) { + object->cache->ops->grab_object(object); + list_add(&object->dep_link, + &parent->dependents); + + /* fscache_acquire_non_index_cookie() uses this + * to wake the chain up */ + if (parent->state == FSCACHE_OBJECT_INIT) + fscache_enqueue_object(parent); + } + } else { + _debug("go"); + parent->n_ops++; + parent->n_obj_ops++; + object->lookup_jif = jiffies; + object->state = FSCACHE_OBJECT_LOOKING_UP; + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } + + spin_unlock(&parent->lock); + spin_unlock(&object->lock); + } + + spin_unlock(&object->cookie->parent->lock); + spin_unlock(&object->cookie->lock); + _leave(""); +} + +/* + * look an object up in the cache from which it was allocated + * - we hold an "access lock" on the parent object, so the parent object cannot + * be withdrawn by either party till we've finished + * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the + * leaf-most cookies of the object and all its children + */ +static void fscache_lookup_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + struct fscache_object *parent; + + _enter(""); + + parent = object->parent; + ASSERT(parent != NULL); + ASSERTCMP(parent->n_ops, >, 0); + ASSERTCMP(parent->n_obj_ops, >, 0); + + /* make sure the parent is still available */ + ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); + + if (parent->state >= FSCACHE_OBJECT_DYING || + test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + _debug("unavailable"); + set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); + _leave(""); + return; + } + + _debug("LOOKUP \"%s/%s\" in \"%s\"", + parent->cookie->def->name, cookie->def->name, + object->cache->tag->name); + + fscache_stat(&fscache_n_object_lookups); + object->cache->ops->lookup_object(object); + + if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) + set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + + _leave(""); +} + +/** + * fscache_object_lookup_negative - Note negative cookie lookup + * @object: Object pointing to cookie to mark + * + * Note negative lookup, permitting those waiting to read data from an already + * existing backing object to continue as there's no data for them to read. + */ +void fscache_object_lookup_negative(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x,%s}", + object->debug_id, fscache_object_states[object->state]); + + spin_lock(&object->lock); + if (object->state == FSCACHE_OBJECT_LOOKING_UP) { + fscache_stat(&fscache_n_object_lookups_negative); + + /* transit here to allow write requests to begin stacking up + * and read requests to begin returning ENODATA */ + object->state = FSCACHE_OBJECT_CREATING; + spin_unlock(&object->lock); + + set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + _debug("wake up lookup %p", &cookie->flags); + smp_mb__before_clear_bit(); + clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } else { + ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); + spin_unlock(&object->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(fscache_object_lookup_negative); + +/** + * fscache_obtained_object - Note successful object lookup or creation + * @object: Object pointing to cookie to mark + * + * Note successful lookup and/or creation, permitting those waiting to write + * data to a backing object to continue. + * + * Note that after calling this, an object's cookie may be relinquished by the + * netfs, and so must be accessed with object lock held. + */ +void fscache_obtained_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x,%s}", + object->debug_id, fscache_object_states[object->state]); + + /* if we were still looking up, then we must have a positive lookup + * result, in which case there may be data available */ + spin_lock(&object->lock); + if (object->state == FSCACHE_OBJECT_LOOKING_UP) { + fscache_stat(&fscache_n_object_lookups_positive); + + clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + object->state = FSCACHE_OBJECT_AVAILABLE; + spin_unlock(&object->lock); + + smp_mb__before_clear_bit(); + clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + } else { + ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); + fscache_stat(&fscache_n_object_created); + + object->state = FSCACHE_OBJECT_AVAILABLE; + spin_unlock(&object->lock); + set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + smp_wmb(); + } + + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); + + _leave(""); +} +EXPORT_SYMBOL(fscache_obtained_object); + +/* + * handle an object that has just become available + */ +static void fscache_object_available(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + spin_lock(&object->lock); + + if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) + wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); + + fscache_done_parent_op(object); + if (object->n_in_progress == 0) { + if (object->n_ops > 0) { + ASSERTCMP(object->n_ops, >=, object->n_obj_ops); + ASSERTIF(object->n_ops > object->n_obj_ops, + !list_empty(&object->pending_ops)); + fscache_start_operations(object); + } else { + ASSERT(list_empty(&object->pending_ops)); + } + } + spin_unlock(&object->lock); + + object->cache->ops->lookup_complete(object); + fscache_enqueue_dependents(object); + + fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); + fscache_stat(&fscache_n_object_avail); + + _leave(""); +} + +/* + * drop an object's attachments + */ +static void fscache_drop_object(struct fscache_object *object) +{ + struct fscache_object *parent = object->parent; + struct fscache_cache *cache = object->cache; + + _enter("{OBJ%x,%d}", object->debug_id, object->n_children); + + spin_lock(&cache->object_list_lock); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); + + cache->ops->drop_object(object); + + if (parent) { + _debug("release parent OBJ%x {%d}", + parent->debug_id, parent->n_children); + + spin_lock(&parent->lock); + parent->n_children--; + if (parent->n_children == 0) + fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); + spin_unlock(&parent->lock); + object->parent = NULL; + } + + /* this just shifts the object release to the slow work processor */ + object->cache->ops->put_object(object); + + _leave(""); +} + +/* + * release or recycle an object that the netfs has discarded + */ +static void fscache_release_object(struct fscache_object *object) +{ + _enter(""); + + fscache_drop_object(object); +} + +/* + * withdraw an object from active service + */ +static void fscache_withdraw_object(struct fscache_object *object) +{ + struct fscache_cookie *cookie; + bool detached; + + _enter(""); + + spin_lock(&object->lock); + cookie = object->cookie; + if (cookie) { + /* need to get the cookie lock before the object lock, starting + * from the object pointer */ + atomic_inc(&cookie->usage); + spin_unlock(&object->lock); + + detached = false; + spin_lock(&cookie->lock); + spin_lock(&object->lock); + + if (object->cookie == cookie) { + hlist_del_init(&object->cookie_link); + object->cookie = NULL; + detached = true; + } + spin_unlock(&cookie->lock); + fscache_cookie_put(cookie); + if (detached) + fscache_cookie_put(cookie); + } + + spin_unlock(&object->lock); + + fscache_drop_object(object); +} + +/* + * withdraw an object from active service at the behest of the cache + * - need break the links to a cached object cookie + * - called under two situations: + * (1) recycler decides to reclaim an in-use object + * (2) a cache is unmounted + * - have to take care as the cookie can be being relinquished by the netfs + * simultaneously + * - the object is pinned by the caller holding a refcount on it + */ +void fscache_withdrawing_object(struct fscache_cache *cache, + struct fscache_object *object) +{ + bool enqueue = false; + + _enter(",OBJ%x", object->debug_id); + + spin_lock(&object->lock); + if (object->state < FSCACHE_OBJECT_WITHDRAWING) { + object->state = FSCACHE_OBJECT_WITHDRAWING; + enqueue = true; + } + spin_unlock(&object->lock); + + if (enqueue) + fscache_enqueue_object(object); + + _leave(""); +} + +/* + * allow the slow work item processor to get a ref on an object + */ +static int fscache_object_slow_work_get_ref(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; +} + +/* + * allow the slow work item processor to discard a ref on a work item + */ +static void fscache_object_slow_work_put_ref(struct slow_work *work) +{ + struct fscache_object *object = + container_of(work, struct fscache_object, work); + + return object->cache->ops->put_object(object); +} + +/* + * enqueue an object for metadata-type processing + */ +void fscache_enqueue_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + slow_work_enqueue(&object->work); +} + +/* + * enqueue the dependents of an object for metadata-type processing + * - the caller must hold the object's lock + * - this may cause an already locked object to wind up being processed again + */ +static void fscache_enqueue_dependents(struct fscache_object *object) +{ + struct fscache_object *dep; + + _enter("{OBJ%x}", object->debug_id); + + if (list_empty(&object->dependents)) + return; + + spin_lock(&object->lock); + + while (!list_empty(&object->dependents)) { + dep = list_entry(object->dependents.next, + struct fscache_object, dep_link); + list_del_init(&dep->dep_link); + + + /* sort onto appropriate lists */ + fscache_enqueue_object(dep); + dep->cache->ops->put_object(dep); + + if (!list_empty(&object->dependents)) + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); +} + +/* + * remove an object from whatever queue it's waiting on + * - the caller must hold object->lock + */ +void fscache_dequeue_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + if (!list_empty(&object->dep_link)) { + spin_lock(&object->parent->lock); + list_del_init(&object->dep_link); + spin_unlock(&object->parent->lock); + } + + _leave(""); +} + +/** + * fscache_check_aux - Ask the netfs whether an object on disk is still valid + * @object: The object to ask about + * @data: The auxiliary data for the object + * @datalen: The size of the auxiliary data + * + * This function consults the netfs about the coherency state of an object + */ +enum fscache_checkaux fscache_check_aux(struct fscache_object *object, + const void *data, uint16_t datalen) +{ + enum fscache_checkaux result; + + if (!object->cookie->def->check_aux) { + fscache_stat(&fscache_n_checkaux_none); + return FSCACHE_CHECKAUX_OKAY; + } + + result = object->cookie->def->check_aux(object->cookie->netfs_data, + data, datalen); + switch (result) { + /* entry okay as is */ + case FSCACHE_CHECKAUX_OKAY: + fscache_stat(&fscache_n_checkaux_okay); + break; + + /* entry requires update */ + case FSCACHE_CHECKAUX_NEEDS_UPDATE: + fscache_stat(&fscache_n_checkaux_update); + break; + + /* entry requires deletion */ + case FSCACHE_CHECKAUX_OBSOLETE: + fscache_stat(&fscache_n_checkaux_obsolete); + break; + + default: + BUG(); + } + + return result; +} +EXPORT_SYMBOL(fscache_check_aux); -- cgit v1.2.3-59-g8ed1b From 952efe7b7840e1c726ae88222245e4efe6bd88f3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:39 +0100 Subject: FS-Cache: Add and document asynchronous operation handling Add and document asynchronous operation handling for use by FS-Cache's data storage and retrieval routines. The following documentation is added to: Documentation/filesystems/caching/operations.txt ================================ ASYNCHRONOUS OPERATIONS HANDLING ================================ ======== OVERVIEW ======== FS-Cache has an asynchronous operations handling facility that it uses for its data storage and retrieval routines. Its operations are represented by fscache_operation structs, though these are usually embedded into some other structure. This facility is available to and expected to be be used by the cache backends, and FS-Cache will create operations and pass them off to the appropriate cache backend for completion. To make use of this facility, should be #included. =============================== OPERATION RECORD INITIALISATION =============================== An operation is recorded in an fscache_operation struct: struct fscache_operation { union { struct work_struct fast_work; struct slow_work slow_work; }; unsigned long flags; fscache_operation_processor_t processor; ... }; Someone wanting to issue an operation should allocate something with this struct embedded in it. They should initialise it by calling: void fscache_operation_init(struct fscache_operation *op, fscache_operation_release_t release); with the operation to be initialised and the release function to use. The op->flags parameter should be set to indicate the CPU time provision and the exclusivity (see the Parameters section). The op->fast_work, op->slow_work and op->processor flags should be set as appropriate for the CPU time provision (see the Parameters section). FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the operation and waited for afterwards. ========== PARAMETERS ========== There are a number of parameters that can be set in the operation record's flag parameter. There are three options for the provision of CPU time in these operations: (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread may decide it wants to handle an operation itself without deferring it to another thread. This is, for example, used in read operations for calling readpages() on the backing filesystem in CacheFiles. Although readpages() does an asynchronous data fetch, the determination of whether pages exist is done synchronously - and the netfs does not proceed until this has been determined. If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags before submitting the operation, and the operating thread must wait for it to be cleared before proceeding: wait_on_bit(&op->flags, FSCACHE_OP_WAITING, fscache_wait_bit, TASK_UNINTERRUPTIBLE); (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it will be given to keventd to process. Such an operation is not permitted to sleep on I/O. This is, for example, used by CacheFiles to copy data from a backing fs page to a netfs page after the backing fs has read the page in. If this option is used, op->fast_work and op->processor must be initialised before submitting the operation: INIT_WORK(&op->fast_work, do_some_work); (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it will be given to the slow work facility to process. Such an operation is permitted to sleep on I/O. This is, for example, used by FS-Cache to handle background writes of pages that have just been fetched from a remote server. If this option is used, op->slow_work and op->processor must be initialised before submitting the operation: fscache_operation_init_slow(op, processor) Furthermore, operations may be one of two types: (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in conjunction with any other operation on the object being operated upon. An example of this is the attribute change operation, in which the file being written to may need truncation. (2) Shareable. Operations of this type may be running simultaneously. It's up to the operation implementation to prevent interference between other operations running at the same time. ========= PROCEDURE ========= Operations are used through the following procedure: (1) The submitting thread must allocate the operation and initialise it itself. Normally this would be part of a more specific structure with the generic op embedded within. (2) The submitting thread must then submit the operation for processing using one of the following two functions: int fscache_submit_op(struct fscache_object *object, struct fscache_operation *op); int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op); The first function should be used to submit non-exclusive ops and the second to submit exclusive ones. The caller must still set the FSCACHE_OP_EXCLUSIVE flag. If successful, both functions will assign the operation to the specified object and return 0. -ENOBUFS will be returned if the object specified is permanently unavailable. The operation manager will defer operations on an object that is still undergoing lookup or creation. The operation will also be deferred if an operation of conflicting exclusivity is in progress on the object. If the operation is asynchronous, the manager will retain a reference to it, so the caller should put their reference to it by passing it to: void fscache_put_operation(struct fscache_operation *op); (3) If the submitting thread wants to do the work itself, and has marked the operation with FSCACHE_OP_MYTHREAD, then it should monitor FSCACHE_OP_WAITING as described above and check the state of the object if necessary (the object might have died whilst the thread was waiting). When it has finished doing its processing, it should call fscache_put_operation() on it. (4) The operation holds an effective lock upon the object, preventing other exclusive ops conflicting until it is released. The operation can be enqueued for further immediate asynchronous processing by adjusting the CPU time provisioning option if necessary, eg: op->flags &= ~FSCACHE_OP_TYPE; op->flags |= ~FSCACHE_OP_FAST; and calling: void fscache_enqueue_operation(struct fscache_operation *op) This can be used to allow other things to have use of the worker thread pools. ===================== ASYNCHRONOUS CALLBACK ===================== When used in asynchronous mode, the worker thread pool will invoke the processor method with a pointer to the operation. This should then get at the container struct by using container_of(): static void fscache_write_op(struct fscache_operation *_op) { struct fscache_storage *op = container_of(_op, struct fscache_storage, op); ... } The caller holds a reference on the operation, and will invoke fscache_put_operation() when the processor function returns. The processor function is at liberty to call fscache_enqueue_operation() or to take extra references. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- Documentation/filesystems/caching/operations.txt | 213 +++++++++++ fs/fscache/Makefile | 3 +- fs/fscache/cache.c | 2 +- fs/fscache/internal.h | 8 +- fs/fscache/operation.c | 459 +++++++++++++++++++++++ 5 files changed, 682 insertions(+), 3 deletions(-) create mode 100644 Documentation/filesystems/caching/operations.txt create mode 100644 fs/fscache/operation.c (limited to 'fs/fscache/internal.h') diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt new file mode 100644 index 000000000000..b6b070c57cbf --- /dev/null +++ b/Documentation/filesystems/caching/operations.txt @@ -0,0 +1,213 @@ + ================================ + ASYNCHRONOUS OPERATIONS HANDLING + ================================ + +By: David Howells + +Contents: + + (*) Overview. + + (*) Operation record initialisation. + + (*) Parameters. + + (*) Procedure. + + (*) Asynchronous callback. + + +======== +OVERVIEW +======== + +FS-Cache has an asynchronous operations handling facility that it uses for its +data storage and retrieval routines. Its operations are represented by +fscache_operation structs, though these are usually embedded into some other +structure. + +This facility is available to and expected to be be used by the cache backends, +and FS-Cache will create operations and pass them off to the appropriate cache +backend for completion. + +To make use of this facility, should be #included. + + +=============================== +OPERATION RECORD INITIALISATION +=============================== + +An operation is recorded in an fscache_operation struct: + + struct fscache_operation { + union { + struct work_struct fast_work; + struct slow_work slow_work; + }; + unsigned long flags; + fscache_operation_processor_t processor; + ... + }; + +Someone wanting to issue an operation should allocate something with this +struct embedded in it. They should initialise it by calling: + + void fscache_operation_init(struct fscache_operation *op, + fscache_operation_release_t release); + +with the operation to be initialised and the release function to use. + +The op->flags parameter should be set to indicate the CPU time provision and +the exclusivity (see the Parameters section). + +The op->fast_work, op->slow_work and op->processor flags should be set as +appropriate for the CPU time provision (see the Parameters section). + +FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the +operation and waited for afterwards. + + +========== +PARAMETERS +========== + +There are a number of parameters that can be set in the operation record's flag +parameter. There are three options for the provision of CPU time in these +operations: + + (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread + may decide it wants to handle an operation itself without deferring it to + another thread. + + This is, for example, used in read operations for calling readpages() on + the backing filesystem in CacheFiles. Although readpages() does an + asynchronous data fetch, the determination of whether pages exist is done + synchronously - and the netfs does not proceed until this has been + determined. + + If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags + before submitting the operation, and the operating thread must wait for it + to be cleared before proceeding: + + wait_on_bit(&op->flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + + + (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it + will be given to keventd to process. Such an operation is not permitted + to sleep on I/O. + + This is, for example, used by CacheFiles to copy data from a backing fs + page to a netfs page after the backing fs has read the page in. + + If this option is used, op->fast_work and op->processor must be + initialised before submitting the operation: + + INIT_WORK(&op->fast_work, do_some_work); + + + (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it + will be given to the slow work facility to process. Such an operation is + permitted to sleep on I/O. + + This is, for example, used by FS-Cache to handle background writes of + pages that have just been fetched from a remote server. + + If this option is used, op->slow_work and op->processor must be + initialised before submitting the operation: + + fscache_operation_init_slow(op, processor) + + +Furthermore, operations may be one of two types: + + (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in + conjunction with any other operation on the object being operated upon. + + An example of this is the attribute change operation, in which the file + being written to may need truncation. + + (2) Shareable. Operations of this type may be running simultaneously. It's + up to the operation implementation to prevent interference between other + operations running at the same time. + + +========= +PROCEDURE +========= + +Operations are used through the following procedure: + + (1) The submitting thread must allocate the operation and initialise it + itself. Normally this would be part of a more specific structure with the + generic op embedded within. + + (2) The submitting thread must then submit the operation for processing using + one of the following two functions: + + int fscache_submit_op(struct fscache_object *object, + struct fscache_operation *op); + + int fscache_submit_exclusive_op(struct fscache_object *object, + struct fscache_operation *op); + + The first function should be used to submit non-exclusive ops and the + second to submit exclusive ones. The caller must still set the + FSCACHE_OP_EXCLUSIVE flag. + + If successful, both functions will assign the operation to the specified + object and return 0. -ENOBUFS will be returned if the object specified is + permanently unavailable. + + The operation manager will defer operations on an object that is still + undergoing lookup or creation. The operation will also be deferred if an + operation of conflicting exclusivity is in progress on the object. + + If the operation is asynchronous, the manager will retain a reference to + it, so the caller should put their reference to it by passing it to: + + void fscache_put_operation(struct fscache_operation *op); + + (3) If the submitting thread wants to do the work itself, and has marked the + operation with FSCACHE_OP_MYTHREAD, then it should monitor + FSCACHE_OP_WAITING as described above and check the state of the object if + necessary (the object might have died whilst the thread was waiting). + + When it has finished doing its processing, it should call + fscache_put_operation() on it. + + (4) The operation holds an effective lock upon the object, preventing other + exclusive ops conflicting until it is released. The operation can be + enqueued for further immediate asynchronous processing by adjusting the + CPU time provisioning option if necessary, eg: + + op->flags &= ~FSCACHE_OP_TYPE; + op->flags |= ~FSCACHE_OP_FAST; + + and calling: + + void fscache_enqueue_operation(struct fscache_operation *op) + + This can be used to allow other things to have use of the worker thread + pools. + + +===================== +ASYNCHRONOUS CALLBACK +===================== + +When used in asynchronous mode, the worker thread pool will invoke the +processor method with a pointer to the operation. This should then get at the +container struct by using container_of(): + + static void fscache_write_op(struct fscache_operation *_op) + { + struct fscache_storage *op = + container_of(_op, struct fscache_storage, op); + ... + } + +The caller holds a reference on the operation, and will invoke +fscache_put_operation() when the processor function returns. The processor +function is at liberty to call fscache_enqueue_operation() or to take extra +references. diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 4420ac6ea10d..6f82da2aa9d1 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -8,7 +8,8 @@ fscache-y := \ fsdef.o \ main.o \ netfs.o \ - object.o + object.o \ + operation.o fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 355172f785fb..e21985bbb1fb 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -194,7 +194,7 @@ void fscache_init_cache(struct fscache_cache *cache, vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va); va_end(va); - INIT_WORK(&cache->op_gc, NULL); + INIT_WORK(&cache->op_gc, fscache_operation_gc); INIT_LIST_HEAD(&cache->link); INIT_LIST_HEAD(&cache->object_list); INIT_LIST_HEAD(&cache->op_gc_list); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 529f4de328c2..014a830c8b37 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -95,7 +95,13 @@ extern void fscache_enqueue_object(struct fscache_object *); /* * fsc-operation.c */ -#define fscache_start_operations(obj) BUG() +extern int fscache_submit_exclusive_op(struct fscache_object *, + struct fscache_operation *); +extern int fscache_submit_op(struct fscache_object *, + struct fscache_operation *); +extern void fscache_abort_object(struct fscache_object *); +extern void fscache_start_operations(struct fscache_object *); +extern void fscache_operation_gc(struct work_struct *); /* * fsc-proc.c diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c new file mode 100644 index 000000000000..e7f8d53b8b6b --- /dev/null +++ b/fs/fscache/operation.c @@ -0,0 +1,459 @@ +/* FS-Cache worker operation management routines + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * See Documentation/filesystems/caching/operations.txt + */ + +#define FSCACHE_DEBUG_LEVEL OPERATION +#include +#include "internal.h" + +atomic_t fscache_op_debug_id; +EXPORT_SYMBOL(fscache_op_debug_id); + +/** + * fscache_enqueue_operation - Enqueue an operation for processing + * @op: The operation to enqueue + * + * Enqueue an operation for processing by the FS-Cache thread pool. + * + * This will get its own ref on the object. + */ +void fscache_enqueue_operation(struct fscache_operation *op) +{ + _enter("{OBJ%x OP%x,%u}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERT(op->processor != NULL); + ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); + ASSERTCMP(atomic_read(&op->usage), >, 0); + + if (list_empty(&op->pend_link)) { + switch (op->flags & FSCACHE_OP_TYPE) { + case FSCACHE_OP_FAST: + _debug("queue fast"); + atomic_inc(&op->usage); + if (!schedule_work(&op->fast_work)) + fscache_put_operation(op); + break; + case FSCACHE_OP_SLOW: + _debug("queue slow"); + slow_work_enqueue(&op->slow_work); + break; + case FSCACHE_OP_MYTHREAD: + _debug("queue for caller's attention"); + break; + default: + printk(KERN_ERR "FS-Cache: Unexpected op type %lx", + op->flags); + BUG(); + break; + } + fscache_stat(&fscache_n_op_enqueue); + } +} +EXPORT_SYMBOL(fscache_enqueue_operation); + +/* + * start an op running + */ +static void fscache_run_op(struct fscache_object *object, + struct fscache_operation *op) +{ + object->n_in_progress++; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + if (op->processor) + fscache_enqueue_operation(op); + fscache_stat(&fscache_n_op_run); +} + +/* + * submit an exclusive operation for an object + * - other ops are excluded from running simultaneously with this one + * - this gets any extra refs it needs on an op + */ +int fscache_submit_exclusive_op(struct fscache_object *object, + struct fscache_operation *op) +{ + int ret; + + _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + + spin_lock(&object->lock); + ASSERTCMP(object->n_ops, >=, object->n_in_progress); + ASSERTCMP(object->n_ops, >=, object->n_exclusive); + + ret = -ENOBUFS; + if (fscache_object_is_active(object)) { + op->object = object; + object->n_ops++; + object->n_exclusive++; /* reads and writes must wait */ + + if (object->n_ops > 0) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + } else if (!list_empty(&object->pending_ops)) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + fscache_start_operations(object); + } else { + ASSERTCMP(object->n_in_progress, ==, 0); + fscache_run_op(object, op); + } + + /* need to issue a new write op after this */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + ret = 0; + } else if (object->state == FSCACHE_OBJECT_CREATING) { + op->object = object; + object->n_ops++; + object->n_exclusive++; /* reads and writes must wait */ + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + ret = 0; + } else { + /* not allowed to submit ops in any other state */ + BUG(); + } + + spin_unlock(&object->lock); + return ret; +} + +/* + * report an unexpected submission + */ +static void fscache_report_unexpected_submission(struct fscache_object *object, + struct fscache_operation *op, + unsigned long ostate) +{ + static bool once_only; + struct fscache_operation *p; + unsigned n; + + if (once_only) + return; + once_only = true; + + kdebug("unexpected submission OP%x [OBJ%x %s]", + op->debug_id, object->debug_id, + fscache_object_states[object->state]); + kdebug("objstate=%s [%s]", + fscache_object_states[object->state], + fscache_object_states[ostate]); + kdebug("objflags=%lx", object->flags); + kdebug("objevent=%lx [%lx]", object->events, object->event_mask); + kdebug("ops=%u inp=%u exc=%u", + object->n_ops, object->n_in_progress, object->n_exclusive); + + if (!list_empty(&object->pending_ops)) { + n = 0; + list_for_each_entry(p, &object->pending_ops, pend_link) { + ASSERTCMP(p->object, ==, object); + kdebug("%p %p", op->processor, op->release); + n++; + } + + kdebug("n=%u", n); + } + + dump_stack(); +} + +/* + * submit an operation for an object + * - objects may be submitted only in the following states: + * - during object creation (write ops may be submitted) + * - whilst the object is active + * - after an I/O error incurred in one of the two above states (op rejected) + * - this gets any extra refs it needs on an op + */ +int fscache_submit_op(struct fscache_object *object, + struct fscache_operation *op) +{ + unsigned long ostate; + int ret; + + _enter("{OBJ%x OP%x},{%u}", + object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERTCMP(atomic_read(&op->usage), >, 0); + + spin_lock(&object->lock); + ASSERTCMP(object->n_ops, >=, object->n_in_progress); + ASSERTCMP(object->n_ops, >=, object->n_exclusive); + + ostate = object->state; + smp_rmb(); + + if (fscache_object_is_active(object)) { + op->object = object; + object->n_ops++; + + if (object->n_exclusive > 0) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + } else if (!list_empty(&object->pending_ops)) { + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + fscache_start_operations(object); + } else { + ASSERTCMP(object->n_exclusive, ==, 0); + fscache_run_op(object, op); + } + ret = 0; + } else if (object->state == FSCACHE_OBJECT_CREATING) { + op->object = object; + object->n_ops++; + atomic_inc(&op->usage); + list_add_tail(&op->pend_link, &object->pending_ops); + fscache_stat(&fscache_n_op_pend); + ret = 0; + } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + fscache_report_unexpected_submission(object, op, ostate); + ASSERT(!fscache_object_is_active(object)); + ret = -ENOBUFS; + } else { + ret = -ENOBUFS; + } + + spin_unlock(&object->lock); + return ret; +} + +/* + * queue an object for withdrawal on error, aborting all following asynchronous + * operations + */ +void fscache_abort_object(struct fscache_object *object) +{ + _enter("{OBJ%x}", object->debug_id); + + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); +} + +/* + * jump start the operation processing on an object + * - caller must hold object->lock + */ +void fscache_start_operations(struct fscache_object *object) +{ + struct fscache_operation *op; + bool stop = false; + + while (!list_empty(&object->pending_ops) && !stop) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + if (object->n_in_progress > 0) + break; + stop = true; + } + list_del_init(&op->pend_link); + object->n_in_progress++; + + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + if (op->processor) + fscache_enqueue_operation(op); + + /* the pending queue was holding a ref on the object */ + fscache_put_operation(op); + } + + ASSERTCMP(object->n_in_progress, <=, object->n_ops); + + _debug("woke %d ops on OBJ%x", + object->n_in_progress, object->debug_id); +} + +/* + * release an operation + * - queues pending ops if this is the last in-progress op + */ +void fscache_put_operation(struct fscache_operation *op) +{ + struct fscache_object *object; + struct fscache_cache *cache; + + _enter("{OBJ%x OP%x,%d}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERTCMP(atomic_read(&op->usage), >, 0); + + if (!atomic_dec_and_test(&op->usage)) + return; + + _debug("PUT OP"); + if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) + BUG(); + + fscache_stat(&fscache_n_op_release); + + if (op->release) { + op->release(op); + op->release = NULL; + } + + object = op->object; + + /* now... we may get called with the object spinlock held, so we + * complete the cleanup here only if we can immediately acquire the + * lock, and defer it otherwise */ + if (!spin_trylock(&object->lock)) { + _debug("defer put"); + fscache_stat(&fscache_n_op_deferred_release); + + cache = object->cache; + spin_lock(&cache->op_gc_list_lock); + list_add_tail(&op->pend_link, &cache->op_gc_list); + spin_unlock(&cache->op_gc_list_lock); + schedule_work(&cache->op_gc); + _leave(" [defer]"); + return; + } + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + ASSERTCMP(object->n_exclusive, >, 0); + object->n_exclusive--; + } + + ASSERTCMP(object->n_in_progress, >, 0); + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); + + spin_unlock(&object->lock); + + kfree(op); + _leave(" [done]"); +} +EXPORT_SYMBOL(fscache_put_operation); + +/* + * garbage collect operations that have had their release deferred + */ +void fscache_operation_gc(struct work_struct *work) +{ + struct fscache_operation *op; + struct fscache_object *object; + struct fscache_cache *cache = + container_of(work, struct fscache_cache, op_gc); + int count = 0; + + _enter(""); + + do { + spin_lock(&cache->op_gc_list_lock); + if (list_empty(&cache->op_gc_list)) { + spin_unlock(&cache->op_gc_list_lock); + break; + } + + op = list_entry(cache->op_gc_list.next, + struct fscache_operation, pend_link); + list_del(&op->pend_link); + spin_unlock(&cache->op_gc_list_lock); + + object = op->object; + + _debug("GC DEFERRED REL OBJ%x OP%x", + object->debug_id, op->debug_id); + fscache_stat(&fscache_n_op_gc); + + ASSERTCMP(atomic_read(&op->usage), ==, 0); + + spin_lock(&object->lock); + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { + ASSERTCMP(object->n_exclusive, >, 0); + object->n_exclusive--; + } + + ASSERTCMP(object->n_in_progress, >, 0); + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); + + spin_unlock(&object->lock); + + } while (count++ < 20); + + if (!list_empty(&cache->op_gc_list)) + schedule_work(&cache->op_gc); + + _leave(""); +} + +/* + * allow the slow work item processor to get a ref on an operation + */ +static int fscache_op_get_ref(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + atomic_inc(&op->usage); + return 0; +} + +/* + * allow the slow work item processor to discard a ref on an operation + */ +static void fscache_op_put_ref(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + + fscache_put_operation(op); +} + +/* + * execute an operation using the slow thread pool to provide processing context + * - the caller holds a ref to this object, so we don't need to hold one + */ +static void fscache_op_execute(struct slow_work *work) +{ + struct fscache_operation *op = + container_of(work, struct fscache_operation, slow_work); + unsigned long start; + + _enter("{OBJ%x OP%x,%d}", + op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + + ASSERT(op->processor != NULL); + start = jiffies; + op->processor(op); + fscache_hist(fscache_ops_histogram, start); + + _leave(""); +} + +const struct slow_work_ops fscache_op_slow_work_ops = { + .get_ref = fscache_op_get_ref, + .put_ref = fscache_op_put_ref, + .execute = fscache_op_execute, +}; -- cgit v1.2.3-59-g8ed1b From b510882281d56873e1194021643b7c325336f84f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 Apr 2009 16:42:39 +0100 Subject: FS-Cache: Implement data I/O part of netfs API Implement the data I/O part of the FS-Cache netfs API. The documentation and API header file were added in a previous patch. This patch implements the following functions for the netfs to call: (*) fscache_attr_changed(). Indicate that the object has changed its attributes. The only attribute currently recorded is the file size. Only pages within the set file size will be stored in the cache. This operation is submitted for asynchronous processing, and will return immediately. It will return -ENOMEM if an out of memory error is encountered, -ENOBUFS if the object is not actually cached, or 0 if the operation is successfully queued. (*) fscache_read_or_alloc_page(). (*) fscache_read_or_alloc_pages(). Request data be fetched from the disk, and allocate internal metadata to track the netfs pages and reserve disk space for unknown pages. These operations perform semi-asynchronous data reads. Upon returning they will indicate which pages they think can be retrieved from disk, and will have set in progress attempts to retrieve those pages. These will return, in order of preference, -ENOMEM on memory allocation error, -ERESTARTSYS if a signal interrupted proceedings, -ENODATA if one or more requested pages are not yet cached, -ENOBUFS if the object is not actually cached or if there isn't space for future pages to be cached on this object, or 0 if successful. In the case of the multipage function, the pages for which reads are set in progress will be removed from the list and the page count decreased appropriately. If any read operations should fail, the completion function will be given an error, and will also be passed contextual information to allow the netfs to fall back to querying the server for the absent pages. For each successful read, the page completion function will also be called. Any pages subsequently tracked by the cache will have PG_fscache set upon them on return. fscache_uncache_page() must be called for such pages. If supplied by the netfs, the mark_pages_cached() cookie op will be invoked for any pages now tracked. (*) fscache_alloc_page(). Allocate internal metadata to track a netfs page and reserve disk space. This will return -ENOMEM on memory allocation error, -ERESTARTSYS on signal, -ENOBUFS if the object isn't cached, or there isn't enough space in the cache, or 0 if successful. Any pages subsequently tracked by the cache will have PG_fscache set upon them on return. fscache_uncache_page() must be called for such pages. If supplied by the netfs, the mark_pages_cached() cookie op will be invoked for any pages now tracked. (*) fscache_write_page(). Request data be stored to disk. This may only be called on pages that have been read or alloc'd by the above three functions and have not yet been uncached. This will return -ENOMEM on memory allocation error, -ERESTARTSYS on signal, -ENOBUFS if the object isn't cached, or there isn't immediately enough space in the cache, or 0 if successful. On a successful return, this operation will have queued the page for asynchronous writing to the cache. The page will be returned with PG_fscache_write set until the write completes one way or another. The caller will not be notified if the write fails due to an I/O error. If that happens, the object will become available and all pending writes will be aborted. Note that the cache may batch up page writes, and so it may take a while to get around to writing them out. The caller must assume that until PG_fscache_write is cleared the page is use by the cache. Any changes made to the page may be reflected on disk. The page may even be under DMA. (*) fscache_uncache_page(). Indicate that the cache should stop tracking a page previously read or alloc'd from the cache. If the page was alloc'd only, but unwritten, it will not appear on disk. Signed-off-by: David Howells Acked-by: Steve Dickson Acked-by: Trond Myklebust Acked-by: Al Viro Tested-by: Daire Byrne --- fs/fscache/Makefile | 3 +- fs/fscache/internal.h | 21 ++ fs/fscache/page.c | 816 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fscache.h | 52 ++- 4 files changed, 886 insertions(+), 6 deletions(-) create mode 100644 fs/fscache/page.c (limited to 'fs/fscache/internal.h') diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 6f82da2aa9d1..91571b95aacc 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -9,7 +9,8 @@ fscache-y := \ main.o \ netfs.o \ object.o \ - operation.o + operation.o \ + page.o fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 014a830c8b37..e0cbd16f6dc9 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -229,6 +229,27 @@ static inline void fscache_cookie_put(struct fscache_cookie *cookie) __fscache_cookie_put(cookie); } +/* + * get an extra reference to a netfs retrieval context + */ +static inline +void *fscache_get_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->get_context) + cookie->def->get_context(cookie->netfs_data, context); + return context; +} + +/* + * release a reference to a netfs retrieval context + */ +static inline +void fscache_put_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->put_context) + cookie->def->put_context(cookie->netfs_data, context); +} + /*****************************************************************************/ /* * debug tracing diff --git a/fs/fscache/page.c b/fs/fscache/page.c new file mode 100644 index 000000000000..2568e0eb644f --- /dev/null +++ b/fs/fscache/page.c @@ -0,0 +1,816 @@ +/* Cache page management and data I/O routines + * + * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define FSCACHE_DEBUG_LEVEL PAGE +#include +#include +#include +#include +#include "internal.h" + +/* + * check to see if a page is being written to the cache + */ +bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) +{ + void *val; + + rcu_read_lock(); + val = radix_tree_lookup(&cookie->stores, page->index); + rcu_read_unlock(); + + return val != NULL; +} +EXPORT_SYMBOL(__fscache_check_page_write); + +/* + * wait for a page to finish being written to the cache + */ +void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) +{ + wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); + + wait_event(*wq, !__fscache_check_page_write(cookie, page)); +} +EXPORT_SYMBOL(__fscache_wait_on_page_write); + +/* + * note that a page has finished being written to the cache + */ +static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) +{ + struct page *xpage; + + spin_lock(&cookie->lock); + xpage = radix_tree_delete(&cookie->stores, page->index); + spin_unlock(&cookie->lock); + ASSERT(xpage != NULL); + + wake_up_bit(&cookie->flags, 0); +} + +/* + * actually apply the changed attributes to a cache object + */ +static void fscache_attr_changed_op(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + + _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); + + fscache_stat(&fscache_n_attr_changed_calls); + + if (fscache_object_is_active(object) && + object->cache->ops->attr_changed(object) < 0) + fscache_abort_object(object); + + _leave(""); +} + +/* + * notification that the attributes on an object have changed + */ +int __fscache_attr_changed(struct fscache_cookie *cookie) +{ + struct fscache_operation *op; + struct fscache_object *object; + + _enter("%p", cookie); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + + fscache_stat(&fscache_n_attr_changed); + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_stat(&fscache_n_attr_changed_nomem); + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + fscache_operation_init(op, NULL); + fscache_operation_init_slow(op, fscache_attr_changed_op); + op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_exclusive_op(object, op) < 0) + goto nobufs; + spin_unlock(&cookie->lock); + fscache_stat(&fscache_n_attr_changed_ok); + fscache_put_operation(op); + _leave(" = 0"); + return 0; + +nobufs: + spin_unlock(&cookie->lock); + kfree(op); + fscache_stat(&fscache_n_attr_changed_nobufs); + _leave(" = %d", -ENOBUFS); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_attr_changed); + +/* + * handle secondary execution given to a retrieval op on behalf of the + * cache + */ +static void fscache_retrieval_work(struct work_struct *work) +{ + struct fscache_retrieval *op = + container_of(work, struct fscache_retrieval, op.fast_work); + unsigned long start; + + _enter("{OP%x}", op->op.debug_id); + + start = jiffies; + op->op.processor(&op->op); + fscache_hist(fscache_ops_histogram, start); + fscache_put_operation(&op->op); +} + +/* + * release a retrieval op reference + */ +static void fscache_release_retrieval_op(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + _enter("{OP%x}", op->op.debug_id); + + fscache_hist(fscache_retrieval_histogram, op->start_time); + if (op->context) + fscache_put_context(op->op.object->cookie, op->context); + + _leave(""); +} + +/* + * allocate a retrieval op + */ +static struct fscache_retrieval *fscache_alloc_retrieval( + struct address_space *mapping, + fscache_rw_complete_t end_io_func, + void *context) +{ + struct fscache_retrieval *op; + + /* allocate a retrieval operation and attempt to submit it */ + op = kzalloc(sizeof(*op), GFP_NOIO); + if (!op) { + fscache_stat(&fscache_n_retrievals_nomem); + return NULL; + } + + fscache_operation_init(&op->op, fscache_release_retrieval_op); + op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); + op->mapping = mapping; + op->end_io_func = end_io_func; + op->context = context; + op->start_time = jiffies; + INIT_WORK(&op->op.fast_work, fscache_retrieval_work); + INIT_LIST_HEAD(&op->to_do); + return op; +} + +/* + * wait for a deferred lookup to complete + */ +static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) +{ + unsigned long jif; + + _enter(""); + + if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) { + _leave(" = 0 [imm]"); + return 0; + } + + fscache_stat(&fscache_n_retrievals_wait); + + jif = jiffies; + if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, + fscache_wait_bit_interruptible, + TASK_INTERRUPTIBLE) != 0) { + fscache_stat(&fscache_n_retrievals_intr); + _leave(" = -ERESTARTSYS"); + return -ERESTARTSYS; + } + + ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)); + + smp_rmb(); + fscache_hist(fscache_retrieval_delay_histogram, jif); + _leave(" = 0 [dly]"); + return 0; +} + +/* + * read a page from the cache or allocate a block in which to store it + * - we return: + * -ENOMEM - out of memory, nothing done + * -ERESTARTSYS - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * -ENODATA - no data available in the backing object for this block + * 0 - dispatched a read - it'll call end_io_func() when finished + */ +int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,%p,,,", cookie, page); + + fscache_stat(&fscache_n_retrievals); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(page->mapping, end_io_func, context); + if (!op) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_retrieval_ops); + + /* pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure */ + fscache_get_context(object->cookie, op->context); + + /* we wait for the operation to become active, and then process it + * *here*, in this thread, and not in the thread pool */ + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_retrieval_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { + ret = object->cache->ops->allocate_page(op, page, gfp); + if (ret == 0) + ret = -ENODATA; + } else { + ret = object->cache->ops->read_or_alloc_page(op, page, gfp); + } + + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_retrievals_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_read_or_alloc_page); + +/* + * read a list of page from the cache or allocate a block in which to store + * them + * - we return: + * -ENOMEM - out of memory, some pages may be being read + * -ERESTARTSYS - interrupted, some pages may be being read + * -ENOBUFS - no backing object or space available in which to cache any + * pages not being read + * -ENODATA - no data available in the backing object for some or all of + * the pages + * 0 - dispatched a read on all pages + * + * end_io_func() will be called for each page read from the cache as it is + * finishes being read + * + * any pages for which a read is dispatched will be removed from pages and + * nr_pages + */ +int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + fscache_pages_retrieval_func_t func; + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,,%d,,,", cookie, *nr_pages); + + fscache_stat(&fscache_n_retrievals); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(*nr_pages, >, 0); + ASSERT(!list_empty(pages)); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(mapping, end_io_func, context); + if (!op) + return -ENOMEM; + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_retrieval_ops); + + /* pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure */ + fscache_get_context(object->cookie, op->context); + + /* we wait for the operation to become active, and then process it + * *here*, in this thread, and not in the thread pool */ + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_retrieval_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) + func = object->cache->ops->allocate_pages; + else + func = object->cache->ops->read_or_alloc_pages; + ret = func(op, pages, nr_pages, gfp); + + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_retrievals_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_read_or_alloc_pages); + +/* + * allocate a block in the cache on which to store a page + * - we return: + * -ENOMEM - out of memory, nothing done + * -ERESTARTSYS - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * 0 - block allocated + */ +int __fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + int ret; + + _enter("%p,%p,,,", cookie, page); + + fscache_stat(&fscache_n_allocs); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(page->mapping, NULL, NULL); + if (!op) + return -ENOMEM; + + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock; + spin_unlock(&cookie->lock); + + fscache_stat(&fscache_n_alloc_ops); + + if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { + _debug(">>> WT"); + fscache_stat(&fscache_n_alloc_op_waits); + wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + _debug("<<< GO"); + } + + /* ask the cache to honour the operation */ + ret = object->cache->ops->allocate_page(op, page, gfp); + + if (ret < 0) + fscache_stat(&fscache_n_allocs_nobufs); + else + fscache_stat(&fscache_n_allocs_ok); + + fscache_put_retrieval(op); + _leave(" = %d", ret); + return ret; + +nobufs_unlock: + spin_unlock(&cookie->lock); + kfree(op); +nobufs: + fscache_stat(&fscache_n_allocs_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_alloc_page); + +/* + * release a write op reference + */ +static void fscache_release_write_op(struct fscache_operation *_op) +{ + _enter("{OP%x}", _op->debug_id); +} + +/* + * perform the background storage of a page into the cache + */ +static void fscache_write_op(struct fscache_operation *_op) +{ + struct fscache_storage *op = + container_of(_op, struct fscache_storage, op); + struct fscache_object *object = op->op.object; + struct fscache_cookie *cookie = object->cookie; + struct page *page; + unsigned n; + void *results[1]; + int ret; + + _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); + + spin_lock(&cookie->lock); + spin_lock(&object->lock); + + if (!fscache_object_is_active(object)) { + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + _leave(""); + return; + } + + fscache_stat(&fscache_n_store_calls); + + /* find a page to store */ + page = NULL; + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, + FSCACHE_COOKIE_PENDING_TAG); + if (n != 1) + goto superseded; + page = results[0]; + _debug("gang %d [%lx]", n, page->index); + if (page->index > op->store_limit) + goto superseded; + + radix_tree_tag_clear(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + + if (page) { + ret = object->cache->ops->write_page(op, page); + fscache_end_page_write(cookie, page); + page_cache_release(page); + if (ret < 0) + fscache_abort_object(object); + else + fscache_enqueue_operation(&op->op); + } + + _leave(""); + return; + +superseded: + /* this writer is going away and there aren't any more things to + * write */ + _debug("cease"); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + _leave(""); +} + +/* + * request a page be stored in the cache + * - returns: + * -ENOMEM - out of memory, nothing done + * -ENOBUFS - no backing object available in which to cache the page + * 0 - dispatched a write - it'll call end_io_func() when finished + * + * if the cookie still has a backing object at this point, that object can be + * in one of a few states with respect to storage processing: + * + * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is + * set) + * + * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred + * fill op) + * + * (b) writes deferred till post-creation (mark page for writing and + * return immediately) + * + * (2) negative lookup, object created, initial fill being made from netfs + * (FSCACHE_COOKIE_INITIAL_FILL is set) + * + * (a) fill point not yet reached this page (mark page for writing and + * return) + * + * (b) fill point passed this page (queue op to store this page) + * + * (3) object extant (queue op to store this page) + * + * any other state is invalid + */ +int __fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct fscache_storage *op; + struct fscache_object *object; + int ret; + + _enter("%p,%x,", cookie, (u32) page->flags); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERT(PageFsCache(page)); + + fscache_stat(&fscache_n_stores); + + op = kzalloc(sizeof(*op), GFP_NOIO); + if (!op) + goto nomem; + + fscache_operation_init(&op->op, fscache_release_write_op); + fscache_operation_init_slow(&op->op, fscache_write_op); + op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); + + ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + if (ret < 0) + goto nomem_free; + + ret = -ENOBUFS; + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) + goto nobufs; + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto nobufs; + + /* add the page to the pending-storage radix tree on the backing + * object */ + spin_lock(&object->lock); + + _debug("store limit %llx", (unsigned long long) object->store_limit); + + ret = radix_tree_insert(&cookie->stores, page->index, page); + if (ret < 0) { + if (ret == -EEXIST) + goto already_queued; + _debug("insert failed %d", ret); + goto nobufs_unlock_obj; + } + + radix_tree_tag_set(&cookie->stores, page->index, + FSCACHE_COOKIE_PENDING_TAG); + page_cache_get(page); + + /* we only want one writer at a time, but we do need to queue new + * writers after exclusive ops */ + if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) + goto already_pending; + + spin_unlock(&object->lock); + + op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); + op->store_limit = object->store_limit; + + if (fscache_submit_op(object, &op->op) < 0) + goto submit_failed; + + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + fscache_stat(&fscache_n_store_ops); + fscache_stat(&fscache_n_stores_ok); + + /* the slow work queue now carries its own ref on the object */ + fscache_put_operation(&op->op); + _leave(" = 0"); + return 0; + +already_queued: + fscache_stat(&fscache_n_stores_again); +already_pending: + spin_unlock(&object->lock); + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + kfree(op); + fscache_stat(&fscache_n_stores_ok); + _leave(" = 0"); + return 0; + +submit_failed: + radix_tree_delete(&cookie->stores, page->index); + page_cache_release(page); + ret = -ENOBUFS; + goto nobufs; + +nobufs_unlock_obj: + spin_unlock(&object->lock); +nobufs: + spin_unlock(&cookie->lock); + radix_tree_preload_end(); + kfree(op); + fscache_stat(&fscache_n_stores_nobufs); + _leave(" = -ENOBUFS"); + return -ENOBUFS; + +nomem_free: + kfree(op); +nomem: + fscache_stat(&fscache_n_stores_oom); + _leave(" = -ENOMEM"); + return -ENOMEM; +} +EXPORT_SYMBOL(__fscache_write_page); + +/* + * remove a page from the cache + */ +void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) +{ + struct fscache_object *object; + + _enter(",%p", page); + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + ASSERTCMP(page, !=, NULL); + + fscache_stat(&fscache_n_uncaches); + + /* cache withdrawal may beat us to it */ + if (!PageFsCache(page)) + goto done; + + /* get the object */ + spin_lock(&cookie->lock); + + if (hlist_empty(&cookie->backing_objects)) { + ClearPageFsCache(page); + goto done_unlock; + } + + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + /* there might now be stuff on disk we could read */ + clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* only invoke the cache backend if we managed to mark the page + * uncached here; this deals with synchronisation vs withdrawal */ + if (TestClearPageFsCache(page) && + object->cache->ops->uncache_page) { + /* the cache backend releases the cookie lock */ + object->cache->ops->uncache_page(object, page); + goto done; + } + +done_unlock: + spin_unlock(&cookie->lock); +done: + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_page); + +/** + * fscache_mark_pages_cached - Mark pages as being cached + * @op: The retrieval op pages are being marked for + * @pagevec: The pages to be marked + * + * Mark a bunch of netfs pages as being cached. After this is called, + * the netfs must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_pages_cached(struct fscache_retrieval *op, + struct pagevec *pagevec) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + unsigned long loop; + +#ifdef CONFIG_FSCACHE_STATS + atomic_add(pagevec->nr, &fscache_n_marks); +#endif + + for (loop = 0; loop < pagevec->nr; loop++) { + struct page *page = pagevec->pages[loop]; + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + } + + if (cookie->def->mark_pages_cached) + cookie->def->mark_pages_cached(cookie->netfs_data, + op->mapping, pagevec); + pagevec_reinit(pagevec); +} +EXPORT_SYMBOL(fscache_mark_pages_cached); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 245b48646efa..6d8ee466e0a0 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -184,6 +184,24 @@ extern struct fscache_cookie *__fscache_acquire_cookie( void *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); extern void __fscache_update_cookie(struct fscache_cookie *); +extern int __fscache_attr_changed(struct fscache_cookie *); +extern int __fscache_read_or_alloc_page(struct fscache_cookie *, + struct page *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, + struct address_space *, + struct list_head *, + unsigned *, + fscache_rw_complete_t, + void *, + gfp_t); +extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); +extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); +extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); +extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); +extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -361,7 +379,10 @@ void fscache_unpin_cookie(struct fscache_cookie *cookie) static inline int fscache_attr_changed(struct fscache_cookie *cookie) { - return -ENOBUFS; + if (fscache_cookie_valid(cookie)) + return __fscache_attr_changed(cookie); + else + return -ENOBUFS; } /** @@ -418,7 +439,11 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie, void *context, gfp_t gfp) { - return -ENOBUFS; + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_page(cookie, page, end_io_func, + context, gfp); + else + return -ENOBUFS; } /** @@ -464,7 +489,12 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, void *context, gfp_t gfp) { - return -ENOBUFS; + if (fscache_cookie_valid(cookie)) + return __fscache_read_or_alloc_pages(cookie, mapping, pages, + nr_pages, end_io_func, + context, gfp); + else + return -ENOBUFS; } /** @@ -490,7 +520,10 @@ int fscache_alloc_page(struct fscache_cookie *cookie, struct page *page, gfp_t gfp) { - return -ENOBUFS; + if (fscache_cookie_valid(cookie)) + return __fscache_alloc_page(cookie, page, gfp); + else + return -ENOBUFS; } /** @@ -516,7 +549,10 @@ int fscache_write_page(struct fscache_cookie *cookie, struct page *page, gfp_t gfp) { - return -ENOBUFS; + if (fscache_cookie_valid(cookie)) + return __fscache_write_page(cookie, page, gfp); + else + return -ENOBUFS; } /** @@ -537,6 +573,8 @@ static inline void fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) { + if (fscache_cookie_valid(cookie)) + __fscache_uncache_page(cookie, page); } /** @@ -553,6 +591,8 @@ static inline bool fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) { + if (fscache_cookie_valid(cookie)) + return __fscache_check_page_write(cookie, page); return false; } @@ -571,6 +611,8 @@ static inline void fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) { + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_page_write(cookie, page); } #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3-59-g8ed1b