aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking43
-rw-r--r--Documentation/filesystems/f2fs.txt9
-rw-r--r--Documentation/filesystems/jfs.txt2
-rw-r--r--Documentation/filesystems/porting6
-rw-r--r--Documentation/filesystems/proc.txt7
-rw-r--r--Documentation/filesystems/qnx6.txt2
-rw-r--r--Documentation/filesystems/vfat.txt2
-rw-r--r--Documentation/filesystems/vfs.txt73
8 files changed, 92 insertions, 52 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 0706d32a61e6..fe7afe225381 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -11,10 +11,8 @@ be able to use diff(1).
prototypes:
int (*d_revalidate)(struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
- int (*d_hash)(const struct dentry *, const struct inode *,
- struct qstr *);
- int (*d_compare)(const struct dentry *, const struct inode *,
- const struct dentry *, const struct inode *,
+ int (*d_hash)(const struct dentry *, struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
@@ -66,6 +64,7 @@ prototypes:
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode, int *opened);
+ int (*tmpfile) (struct inode *, struct dentry *, umode_t);
locking rules:
all may block
@@ -93,6 +92,7 @@ removexattr: yes
fiemap: no
update_time: no
atomic_open: yes
+tmpfile: no
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
@@ -189,7 +189,7 @@ prototypes:
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
- int (*invalidatepage) (struct page *, unsigned long);
+ void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
@@ -310,8 +310,8 @@ filesystems and by the swapper. The latter will eventually go away. Please,
keep it that way and don't breed new callers.
->invalidatepage() is called when the filesystem must attempt to drop
-some or all of the buffers from the page when it is being truncated. It
-returns zero on success. If ->invalidatepage is zero, the kernel uses
+some or all of the buffers from the page when it is being truncated. It
+returns zero on success. If ->invalidatepage is zero, the kernel uses
block_invalidatepage() instead.
->releasepage() is called when the kernel is about to try to drop the
@@ -344,25 +344,38 @@ prototypes:
locking rules:
- file_lock_lock may block
+ inode->i_lock may block
fl_copy_lock: yes no
fl_release_private: maybe no
----------------------- lock_manager_operations ---------------------------
prototypes:
int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
+ unsigned long (*lm_owner_key)(struct file_lock *);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, struct file_lock *, int);
void (*lm_break)(struct file_lock *); /* break_lease callback */
int (*lm_change)(struct file_lock **, int);
locking rules:
- file_lock_lock may block
-lm_compare_owner: yes no
-lm_notify: yes no
-lm_grant: no no
-lm_break: yes no
-lm_change yes no
+
+ inode->i_lock blocked_lock_lock may block
+lm_compare_owner: yes[1] maybe no
+lm_owner_key yes[1] yes no
+lm_notify: yes yes no
+lm_grant: no no no
+lm_break: yes no no
+lm_change yes no no
+
+[1]: ->lm_compare_owner and ->lm_owner_key are generally called with
+*an* inode->i_lock held. It may not be the i_lock of the inode
+associated with either file_lock argument! This is the case with deadlock
+detection, since the code has to chase down the owners of locks that may
+be entirely unrelated to the one on which the lock is being acquired.
+For deadlock detection however, the blocked_lock_lock is also held. The
+fact that these locks are held ensures that the file_locks do not
+disappear out from under you while doing the comparison or generating an
+owner key.
--------------------------- buffer_head -----------------------------------
prototypes:
@@ -414,7 +427,7 @@ prototypes:
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- int (*readdir) (struct file *, void *, filldir_t);
+ int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bd3c56c67380..b91e2f26b672 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -98,8 +98,13 @@ Cleaning Overhead
MOUNT OPTIONS
================================================================================
-background_gc_off Turn off cleaning operations, namely garbage collection,
- triggered in background when I/O subsystem is idle.
+background_gc=%s Turn on/off cleaning operations, namely garbage
+ collection, triggered in background when I/O subsystem is
+ idle. If background_gc=on, it will turn on the garbage
+ collection and if background_gc=off, garbage collection
+ will be truned off.
+ Default value for this option is on. So garbage
+ collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine
discard Issue discard/TRIM commands when a segment is cleaned.
no_heap Disable heap-style segment allocation which finds free
diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt
index f7433355394a..41fd757997b3 100644
--- a/Documentation/filesystems/jfs.txt
+++ b/Documentation/filesystems/jfs.txt
@@ -42,7 +42,7 @@ nodiscard(*) block device when blocks are freed. This is useful for SSD
devices and sparse/thinly-provisioned LUNs. The FITRIM ioctl
command is also available together with the nodiscard option.
The value of minlen specifies the minimum blockcount, when
- a TRIM command to the block device is considered usefull.
+ a TRIM command to the block device is considered useful.
When no value is given to the discard option, it defaults to
64 blocks, which means 256KiB in JFS.
The minlen value of discard overrides the minlen value given
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 4db22f6491e0..206a1bdc7321 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -445,3 +445,9 @@ object doesn't exist. It's remote/distributed ones that might care...
[mandatory]
FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
in your dentry operations instead.
+--
+[mandatory]
+ vfs_readdir() is gone; switch to iterate_dir() instead
+--
+[mandatory]
+ ->readdir() is gone now; switch to ->iterate()
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fd8d0d594fc7..fcc22c982a25 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -473,7 +473,8 @@ This file is only present if the CONFIG_MMU kernel configuration option is
enabled.
The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
-bits on both physical and virtual pages associated with a process.
+bits on both physical and virtual pages associated with a process, and the
+soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details).
To clear the bits for all the pages associated with the process
> echo 1 > /proc/PID/clear_refs
@@ -482,6 +483,10 @@ To clear the bits for the anonymous pages associated with the process
To clear the bits for the file mapped pages associated with the process
> echo 3 > /proc/PID/clear_refs
+
+To clear the soft-dirty bit
+ > echo 4 > /proc/PID/clear_refs
+
Any other value written to /proc/PID/clear_refs will have no effect.
The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt
index e59f2f09f56e..99e90184a72f 100644
--- a/Documentation/filesystems/qnx6.txt
+++ b/Documentation/filesystems/qnx6.txt
@@ -148,7 +148,7 @@ smaller than addressing space in the bitmap.
Bitmap system area
------------------
-The bitmap itself is devided into three parts.
+The bitmap itself is divided into three parts.
First the system area, that is split into two halfs.
Then userspace.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 4a93e98b290a..aa1f459fa6cf 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -307,7 +307,7 @@ the following:
<proceeding files...>
<slot #3, id = 0x43, characters = "h is long">
- <slot #2, id = 0x02, characters = "xtension whic">
+ <slot #2, id = 0x02, characters = "xtension which">
<slot #1, id = 0x01, characters = "My Big File.E">
<directory entry, name = "MYBIGFIL.EXT">
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index bc4b06b3160a..f93a88250a44 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -360,6 +360,8 @@ struct inode_operations {
int (*removexattr) (struct dentry *, const char *);
void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *,
+ int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+} ____cacheline_aligned;
struct file *, unsigned open_flag,
umode_t create_mode, int *opened);
};
@@ -472,6 +474,9 @@ otherwise noted.
component is negative or needs lookup. Cached positive dentries are
still handled by f_op->open().
+ tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to
+ atomically creating, opening and unlinking a file in given directory.
+
The Address Space Object
========================
@@ -549,12 +554,11 @@ struct address_space_operations
-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. As of kernel 2.6.22, the following members are defined:
+your filesystem. The following members are defined:
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
- int (*sync_page)(struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
@@ -566,7 +570,7 @@ struct address_space_operations {
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
- int (*invalidatepage) (struct page *, unsigned long);
+ void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
@@ -576,6 +580,9 @@ struct address_space_operations {
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
+ int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+ unsigned long);
+ void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
int (*swap_activate)(struct file *);
int (*swap_deactivate)(struct file *);
@@ -607,13 +614,6 @@ struct address_space_operations {
In this case, the page will be relocated, relocked and if
that all succeeds, ->readpage will be called again.
- sync_page: called by the VM to notify the backing store to perform all
- queued I/O operations for a page. I/O operations for other pages
- associated with this address_space object may also be performed.
-
- This function is optional and is called only for pages with
- PG_Writeback set while waiting for the writeback to complete.
-
writepages: called by the VM to write out pages associated with the
address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then
the writeback_control will specify a range of pages that must be
@@ -685,14 +685,14 @@ struct address_space_operations {
invalidatepage: If a page has PagePrivate set, then invalidatepage
will be called when part or all of the page is to be removed
from the address space. This generally corresponds to either a
- truncation or a complete invalidation of the address space
- (in the latter case 'offset' will always be 0).
- Any private data associated with the page should be updated
- to reflect this truncation. If offset is 0, then
- the private data should be released, because the page
- must be able to be completely discarded. This may be done by
- calling the ->releasepage function, but in this case the
- release MUST succeed.
+ truncation, punch hole or a complete invalidation of the address
+ space (in the latter case 'offset' will always be 0 and 'length'
+ will be PAGE_CACHE_SIZE). Any private data associated with the page
+ should be updated to reflect this truncation. If offset is 0 and
+ length is PAGE_CACHE_SIZE, then the private data should be released,
+ because the page must be able to be completely discarded. This may
+ be done by calling the ->releasepage function, but in this case the
+ release MUST succeed.
releasepage: releasepage is called on PagePrivate pages to indicate
that the page should be freed if possible. ->releasepage
@@ -742,6 +742,20 @@ struct address_space_operations {
prevent redirtying the page, it is kept locked during the whole
operation.
+ is_partially_uptodate: Called by the VM when reading a file through the
+ pagecache when the underlying blocksize != pagesize. If the required
+ block is up to date then the read can complete without needing the IO
+ to bring the whole page up to date.
+
+ is_dirty_writeback: Called by the VM when attempting to reclaim a page.
+ The VM uses dirty and writeback information to determine if it needs
+ to stall to allow flushers a chance to complete some IO. Ordinarily
+ it can use PageDirty and PageWriteback but some filesystems have
+ more complex state (unstable pages in NFS prevent reclaim) or
+ do not set those flags due to locking problems (jbd). This callback
+ allows a filesystem to indicate to the VM if a page should be
+ treated as dirty or writeback for the purposes of stalling.
+
error_remove_page: normally set to generic_error_remove_page if truncation
is ok for this address space. Used for memory failure handling.
Setting this implies you deal with pages going away under you,
@@ -777,7 +791,7 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- int (*readdir) (struct file *, void *, filldir_t);
+ int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
@@ -815,7 +829,7 @@ otherwise noted.
aio_write: called by io_submit(2) and other asynchronous I/O operations
- readdir: called when the VFS needs to read the directory contents
+ iterate: called when the VFS needs to read the directory contents
poll: called by the VFS when a process wants to check if there is
activity on this file and (optionally) go to sleep until there
@@ -901,10 +915,8 @@ defined:
struct dentry_operations {
int (*d_revalidate)(struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
- int (*d_hash)(const struct dentry *, const struct inode *,
- struct qstr *);
- int (*d_compare)(const struct dentry *, const struct inode *,
- const struct dentry *, const struct inode *,
+ int (*d_hash)(const struct dentry *, struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
void (*d_release)(struct dentry *);
@@ -949,25 +961,24 @@ struct dentry_operations {
d_hash: called when the VFS adds a dentry to the hash table. The first
dentry passed to d_hash is the parent directory that the name is
- to be hashed into. The inode is the dentry's inode.
+ to be hashed into.
Same locking and synchronisation rules as d_compare regarding
what is safe to dereference etc.
d_compare: called to compare a dentry name with a given name. The first
dentry is the parent of the dentry to be compared, the second is
- the parent's inode, then the dentry and inode (may be NULL) of the
- child dentry. len and name string are properties of the dentry to be
- compared. qstr is the name to compare it with.
+ the child dentry. len and name string are properties of the dentry
+ to be compared. qstr is the name to compare it with.
Must be constant and idempotent, and should not take locks if
- possible, and should not or store into the dentry or inodes.
- Should not dereference pointers outside the dentry or inodes without
+ possible, and should not or store into the dentry.
+ Should not dereference pointers outside the dentry without
lots of care (eg. d_parent, d_inode, d_name should not be used).
However, our vfsmount is pinned, and RCU held, so the dentries and
inodes won't disappear, neither will our sb or filesystem module.
- ->i_sb and ->d_sb may be used.
+ ->d_sb may be used.
It is a tricky calling convention because it needs to be called under
"rcu-walk", ie. without any locks or references on things.