8 files changed, 92 insertions, 52 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 0706d32a61e6..fe7afe225381 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -11,10 +11,8 @@ be able to use diff(1).
 prototypes:
 	int (*d_revalidate)(struct dentry *, unsigned int);
 	int (*d_weak_revalidate)(struct dentry *, unsigned int);
-	int (*d_hash)(const struct dentry *, const struct inode *,
-			struct qstr *);
-	int (*d_compare)(const struct dentry *, const struct inode *,
-			const struct dentry *, const struct inode *,
+	int (*d_hash)(const struct dentry *, struct qstr *);
+	int (*d_compare)(const struct dentry *, const struct dentry *,
 			unsigned int, const char *, const struct qstr *);
 	int (*d_delete)(struct dentry *);
 	void (*d_release)(struct dentry *);
@@ -66,6 +64,7 @@ prototypes:
 	int (*atomic_open)(struct inode *, struct dentry *,
 				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
+	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 
 locking rules:
 	all may block
@@ -93,6 +92,7 @@ removexattr:	yes
 fiemap:		no
 update_time:	no
 atomic_open:	yes
+tmpfile:	no
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
@@ -189,7 +189,7 @@ prototypes:
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 	sector_t (*bmap)(struct address_space *, sector_t);
-	int (*invalidatepage) (struct page *, unsigned long);
+	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
 	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
@@ -310,8 +310,8 @@ filesystems and by the swapper. The latter will eventually go away.  Please,
 keep it that way and don't breed new callers.
 
 	->invalidatepage() is called when the filesystem must attempt to drop
-some or all of the buffers from the page when it is being truncated.  It
-returns zero on success.  If ->invalidatepage is zero, the kernel uses
+some or all of the buffers from the page when it is being truncated. It
+returns zero on success. If ->invalidatepage is zero, the kernel uses
 block_invalidatepage() instead.
 
 	->releasepage() is called when the kernel is about to try to drop the
@@ -344,25 +344,38 @@ prototypes:
 
 
 locking rules:
-			file_lock_lock	may block
+			inode->i_lock	may block
 fl_copy_lock:		yes		no
 fl_release_private:	maybe		no
 
 ----------------------- lock_manager_operations ---------------------------
 prototypes:
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
+	unsigned long (*lm_owner_key)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);  /* unblock callback */
 	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
 	void (*lm_break)(struct file_lock *); /* break_lease callback */
 	int (*lm_change)(struct file_lock **, int);
 
 locking rules:
-			file_lock_lock	may block
-lm_compare_owner:	yes		no
-lm_notify:		yes		no
-lm_grant:		no		no
-lm_break:		yes		no
-lm_change		yes		no
+
+			inode->i_lock	blocked_lock_lock	may block
+lm_compare_owner:	yes[1]		maybe			no
+lm_owner_key		yes[1]		yes			no
+lm_notify:		yes		yes			no
+lm_grant:		no		no			no
+lm_break:		yes		no			no
+lm_change		yes		no			no
+
+[1]:	->lm_compare_owner and ->lm_owner_key are generally called with
+*an* inode->i_lock held. It may not be the i_lock of the inode
+associated with either file_lock argument! This is the case with deadlock
+detection, since the code has to chase down the owners of locks that may
+be entirely unrelated to the one on which the lock is being acquired.
+For deadlock detection however, the blocked_lock_lock is also held. The
+fact that these locks are held ensures that the file_locks do not
+disappear out from under you while doing the comparison or generating an
+owner key.
 
 --------------------------- buffer_head -----------------------------------
 prototypes:
@@ -414,7 +427,7 @@ prototypes:
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-	int (*readdir) (struct file *, void *, filldir_t);
+	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bd3c56c67380..b91e2f26b672 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -98,8 +98,13 @@ Cleaning Overhead
 MOUNT OPTIONS
 ================================================================================
 
-background_gc_off      Turn off cleaning operations, namely garbage collection,
-		       triggered in background when I/O subsystem is idle.
+background_gc=%s       Turn on/off cleaning operations, namely garbage
+                       collection, triggered in background when I/O subsystem is
+                       idle. If background_gc=on, it will turn on the garbage
+                       collection and if background_gc=off, garbage collection
+                       will be truned off.
+                       Default value for this option is on. So garbage
+                       collection is on by default.
 disable_roll_forward   Disable the roll-forward recovery routine
 discard                Issue discard/TRIM commands when a segment is cleaned.
 no_heap                Disable heap-style segment allocation which finds free
diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt
index f7433355394a..41fd757997b3 100644
--- a/Documentation/filesystems/jfs.txt
+++ b/Documentation/filesystems/jfs.txt
@@ -42,7 +42,7 @@ nodiscard(*)	block device when blocks are freed. This is useful for SSD
 		devices and sparse/thinly-provisioned LUNs.  The FITRIM ioctl
 		command is also available together with the nodiscard option.
 		The value of minlen specifies the minimum blockcount, when
-		a TRIM command to the block device is considered usefull.
+		a TRIM command to the block device is considered useful.
 		When no value is given to the discard option, it defaults to
 		64 blocks, which means 256KiB in JFS.
 		The minlen value of discard overrides the minlen value given
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 4db22f6491e0..206a1bdc7321 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -445,3 +445,9 @@ object doesn't exist.  It's remote/distributed ones that might care...
 [mandatory]
 	FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
 in your dentry operations instead.
+--
+[mandatory]
+	vfs_readdir() is gone; switch to iterate_dir() instead
+--
+[mandatory]
+	->readdir() is gone now; switch to ->iterate()
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fd8d0d594fc7..fcc22c982a25 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -473,7 +473,8 @@ This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
 The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
-bits on both physical and virtual pages associated with a process.
+bits on both physical and virtual pages associated with a process, and the
+soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details).
 To clear the bits for all the pages associated with the process
     > echo 1 > /proc/PID/clear_refs
 
@@ -482,6 +483,10 @@ To clear the bits for the anonymous pages associated with the process
 
 To clear the bits for the file mapped pages associated with the process
     > echo 3 > /proc/PID/clear_refs
+
+To clear the soft-dirty bit
+    > echo 4 > /proc/PID/clear_refs
+
 Any other value written to /proc/PID/clear_refs will have no effect.
 
 The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt
index e59f2f09f56e..99e90184a72f 100644
--- a/Documentation/filesystems/qnx6.txt
+++ b/Documentation/filesystems/qnx6.txt
@@ -148,7 +148,7 @@ smaller than addressing space in the bitmap.
 Bitmap system area
 ------------------
 
-The bitmap itself is devided into three parts.
+The bitmap itself is divided into three parts.
 First the system area, that is split into two halfs.
 Then userspace.
 
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 4a93e98b290a..aa1f459fa6cf 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -307,7 +307,7 @@ the following:
 
                 <proceeding files...>
                 <slot #3, id = 0x43, characters = "h is long">
-                <slot #2, id = 0x02, characters = "xtension whic">
+                <slot #2, id = 0x02, characters = "xtension which">
                 <slot #1, id = 0x01, characters = "My Big File.E">
                 <directory entry, name = "MYBIGFIL.EXT">
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index bc4b06b3160a..f93a88250a44 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -360,6 +360,8 @@ struct inode_operations {
 	int (*removexattr) (struct dentry *, const char *);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
+	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+} ____cacheline_aligned;
 				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 };
@@ -472,6 +474,9 @@ otherwise noted.
   	component is negative or needs lookup.  Cached positive dentries are
   	still handled by f_op->open().
 
+  tmpfile: called in the end of O_TMPFILE open().  Optional, equivalent to
+	atomically creating, opening and unlinking a file in given directory.
+
 The Address Space Object
 ========================
 
@@ -549,12 +554,11 @@ struct address_space_operations
 -------------------------------
 
 This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. As of kernel 2.6.22, the following members are defined:
+your filesystem. The following members are defined:
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
-	int (*sync_page)(struct page *);
 	int (*writepages)(struct address_space *, struct writeback_control *);
 	int (*set_page_dirty)(struct page *page);
 	int (*readpages)(struct file *filp, struct address_space *mapping,
@@ -566,7 +570,7 @@ struct address_space_operations {
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 	sector_t (*bmap)(struct address_space *, sector_t);
-	int (*invalidatepage) (struct page *, unsigned long);
+	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
@@ -576,6 +580,9 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+					unsigned long);
+	void (*is_dirty_writeback) (struct page *, bool *, bool *);
 	int (*error_remove_page) (struct mapping *mapping, struct page *page);
 	int (*swap_activate)(struct file *);
 	int (*swap_deactivate)(struct file *);
@@ -607,13 +614,6 @@ struct address_space_operations {
        In this case, the page will be relocated, relocked and if
        that all succeeds, ->readpage will be called again.
 
-  sync_page: called by the VM to notify the backing store to perform all
-  	queued I/O operations for a page. I/O operations for other pages
-	associated with this address_space object may also be performed.
-
-	This function is optional and is called only for pages with
-  	PG_Writeback set while waiting for the writeback to complete.
-
   writepages: called by the VM to write out pages associated with the
   	address_space object.  If wbc->sync_mode is WBC_SYNC_ALL, then
   	the writeback_control will specify a range of pages that must be
@@ -685,14 +685,14 @@ struct address_space_operations {
   invalidatepage: If a page has PagePrivate set, then invalidatepage
         will be called when part or all of the page is to be removed
 	from the address space.  This generally corresponds to either a
-	truncation or a complete invalidation of the address space
-	(in the latter case 'offset' will always be 0).
-	Any private data associated with the page should be updated
-	to reflect this truncation.  If offset is 0, then
-	the private data should be released, because the page
-	must be able to be completely discarded.  This may be done by
-        calling the ->releasepage function, but in this case the
-        release MUST succeed.
+	truncation, punch hole  or a complete invalidation of the address
+	space (in the latter case 'offset' will always be 0 and 'length'
+	will be PAGE_CACHE_SIZE). Any private data associated with the page
+	should be updated to reflect this truncation.  If offset is 0 and
+	length is PAGE_CACHE_SIZE, then the private data should be released,
+	because the page must be able to be completely discarded.  This may
+	be done by calling the ->releasepage function, but in this case the
+	release MUST succeed.
 
   releasepage: releasepage is called on PagePrivate pages to indicate
         that the page should be freed if possible.  ->releasepage
@@ -742,6 +742,20 @@ struct address_space_operations {
   	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
+  is_partially_uptodate: Called by the VM when reading a file through the
+	pagecache when the underlying blocksize != pagesize. If the required
+	block is up to date then the read can complete without needing the IO
+	to bring the whole page up to date.
+
+  is_dirty_writeback: Called by the VM when attempting to reclaim a page.
+	The VM uses dirty and writeback information to determine if it needs
+	to stall to allow flushers a chance to complete some IO. Ordinarily
+	it can use PageDirty and PageWriteback but some filesystems have
+	more complex state (unstable pages in NFS prevent reclaim) or
+	do not set those flags due to locking problems (jbd). This callback
+	allows a filesystem to indicate to the VM if a page should be
+	treated as dirty or writeback for the purposes of stalling.
+
   error_remove_page: normally set to generic_error_remove_page if truncation
 	is ok for this address space. Used for memory failure handling.
 	Setting this implies you deal with pages going away under you,
@@ -777,7 +791,7 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-	int (*readdir) (struct file *, void *, filldir_t);
+	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
@@ -815,7 +829,7 @@ otherwise noted.
 
   aio_write: called by io_submit(2) and other asynchronous I/O operations
 
-  readdir: called when the VFS needs to read the directory contents
+  iterate: called when the VFS needs to read the directory contents
 
   poll: called by the VFS when a process wants to check if there is
 	activity on this file and (optionally) go to sleep until there
@@ -901,10 +915,8 @@ defined:
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, unsigned int);
 	int (*d_weak_revalidate)(struct dentry *, unsigned int);
-	int (*d_hash)(const struct dentry *, const struct inode *,
-			struct qstr *);
-	int (*d_compare)(const struct dentry *, const struct inode *,
-			const struct dentry *, const struct inode *,
+	int (*d_hash)(const struct dentry *, struct qstr *);
+	int (*d_compare)(const struct dentry *, const struct dentry *,
 			unsigned int, const char *, const struct qstr *);
 	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
@@ -949,25 +961,24 @@ struct dentry_operations {
 
   d_hash: called when the VFS adds a dentry to the hash table. The first
 	dentry passed to d_hash is the parent directory that the name is
-	to be hashed into. The inode is the dentry's inode.
+	to be hashed into.
 
 	Same locking and synchronisation rules as d_compare regarding
 	what is safe to dereference etc.
 
   d_compare: called to compare a dentry name with a given name. The first
 	dentry is the parent of the dentry to be compared, the second is
-	the parent's inode, then the dentry and inode (may be NULL) of the
-	child dentry. len and name string are properties of the dentry to be
-	compared. qstr is the name to compare it with.
+	the child dentry. len and name string are properties of the dentry
+	to be compared. qstr is the name to compare it with.
 
 	Must be constant and idempotent, and should not take locks if
-	possible, and should not or store into the dentry or inodes.
-	Should not dereference pointers outside the dentry or inodes without
+	possible, and should not or store into the dentry.
+	Should not dereference pointers outside the dentry without
 	lots of care (eg.  d_parent, d_inode, d_name should not be used).
 
 	However, our vfsmount is pinned, and RCU held, so the dentries and
 	inodes won't disappear, neither will our sb or filesystem module.
-	->i_sb and ->d_sb may be used.
+	->d_sb may be used.
 
 	It is a tricky calling convention because it needs to be called under
 	"rcu-walk", ie. without any locks or references on things.