aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_inode.c18
-rw-r--r--fs/xfs/xfs_ioctl.c108
-rw-r--r--fs/xfs/xfs_iops.c5
4 files changed, 34 insertions, 99 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 403c90309a8f..00db81eac80d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1173,7 +1173,7 @@ xfs_file_llseek(
* Locking for serialisation of IO during page faults. This results in a lock
* ordering of:
*
- * mmap_sem (MM)
+ * mmap_lock (MM)
* sb_start_pagefault(vfs, freeze)
* i_mmaplock (XFS - truncate serialisation)
* page_lock (MM)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 64f5f9a440ae..9aea7d68d8ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -145,17 +145,17 @@ xfs_ilock_attr_map_shared(
*
* i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
*
- * mmap_sem locking order:
+ * mmap_lock locking order:
*
- * i_rwsem -> page lock -> mmap_sem
- * mmap_sem -> i_mmap_lock -> page_lock
+ * i_rwsem -> page lock -> mmap_lock
+ * mmap_lock -> i_mmap_lock -> page_lock
*
- * The difference in mmap_sem locking order mean that we cannot hold the
+ * The difference in mmap_lock locking order mean that we cannot hold the
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
* in get_user_pages() to map the user pages into the kernel address space for
* direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- * page faults already hold the mmap_sem.
+ * page faults already hold the mmap_lock.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
* take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
@@ -1630,7 +1630,7 @@ xfs_release(
return 0;
/*
* If we can't get the iolock just skip truncating the blocks
- * past EOF because we could deadlock with the mmap_sem
+ * past EOF because we could deadlock with the mmap_lock
* otherwise. We'll get another chance to drop them once the
* last reference to the inode is dropped, so we'll never leak
* blocks permanently.
@@ -2634,8 +2634,10 @@ xfs_ifree_cluster(
error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
mp->m_bsize * igeo->blocks_per_cluster,
XBF_UNMAPPED, &bp);
- if (error)
+ if (error) {
+ xfs_perag_put(pag);
return error;
+ }
/*
* This buffer may not have been correctly initialised as we
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a40f88cf3ab7..a190212ca85d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1236,64 +1236,26 @@ xfs_ioctl_setattr_xflags(
return 0;
}
-/*
- * If we are changing DAX flags, we have to ensure the file is clean and any
- * cached objects in the address space are invalidated and removed. This
- * requires us to lock out other IO and page faults similar to a truncate
- * operation. The locks need to be held until the transaction has been committed
- * so that the cache invalidation is atomic with respect to the DAX flag
- * manipulation.
- */
-static int
-xfs_ioctl_setattr_dax_invalidate(
+static void
+xfs_ioctl_setattr_prepare_dax(
struct xfs_inode *ip,
- struct fsxattr *fa,
- int *join_flags)
+ struct fsxattr *fa)
{
- struct inode *inode = VFS_I(ip);
- struct super_block *sb = inode->i_sb;
- int error;
-
- *join_flags = 0;
-
- /*
- * It is only valid to set the DAX flag on regular files and
- * directories on filesystems where the block size is equal to the page
- * size. On directories it serves as an inherited hint so we don't
- * have to check the device for dax support or flush pagecache.
- */
- if (fa->fsx_xflags & FS_XFLAG_DAX) {
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
-
- if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
- return -EINVAL;
- }
-
- /* If the DAX state is not changing, we have nothing to do here. */
- if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
- return 0;
- if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
- return 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
if (S_ISDIR(inode->i_mode))
- return 0;
-
- /* lock, flush and invalidate mapping in preparation for flag change */
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
- error = filemap_write_and_wait(inode->i_mapping);
- if (error)
- goto out_unlock;
- error = invalidate_inode_pages2(inode->i_mapping);
- if (error)
- goto out_unlock;
-
- *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
- return 0;
+ return;
-out_unlock:
- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
- return error;
+ if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
+ (mp->m_flags & XFS_MOUNT_DAX_NEVER))
+ return;
+ if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
+ !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) ||
+ (!(fa->fsx_xflags & FS_XFLAG_DAX) &&
+ (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)))
+ d_mark_dontcache(inode);
}
/*
@@ -1301,17 +1263,10 @@ out_unlock:
* have permission to do so. On success, return a clean transaction and the
* inode locked exclusively ready for further operation specific checks. On
* failure, return an error without modifying or locking the inode.
- *
- * The inode might already be IO locked on call. If this is the case, it is
- * indicated in @join_flags and we take full responsibility for ensuring they
- * are unlocked from now on. Hence if we have an error here, we still have to
- * unlock them. Otherwise, once they are joined to the transaction, they will
- * be unlocked on commit/cancel.
*/
static struct xfs_trans *
xfs_ioctl_setattr_get_trans(
- struct xfs_inode *ip,
- int join_flags)
+ struct xfs_inode *ip)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
@@ -1328,8 +1283,7 @@ xfs_ioctl_setattr_get_trans(
goto out_unlock;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
- join_flags = 0;
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
* CAP_FOWNER overrides the following restrictions:
@@ -1350,8 +1304,6 @@ xfs_ioctl_setattr_get_trans(
out_cancel:
xfs_trans_cancel(tp);
out_unlock:
- if (join_flags)
- xfs_iunlock(ip, join_flags);
return ERR_PTR(error);
}
@@ -1476,7 +1428,6 @@ xfs_ioctl_setattr(
struct xfs_dquot *pdqp = NULL;
struct xfs_dquot *olddquot = NULL;
int code;
- int join_flags = 0;
trace_xfs_ioctl_setattr(ip);
@@ -1500,18 +1451,9 @@ xfs_ioctl_setattr(
return code;
}
- /*
- * Changing DAX config may require inode locking for mapping
- * invalidation. These need to be held all the way to transaction commit
- * or cancel time, so need to be passed through to
- * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
- * appropriately.
- */
- code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
- if (code)
- goto error_free_dquots;
+ xfs_ioctl_setattr_prepare_dax(ip, fa);
- tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+ tp = xfs_ioctl_setattr_get_trans(ip);
if (IS_ERR(tp)) {
code = PTR_ERR(tp);
goto error_free_dquots;
@@ -1639,7 +1581,6 @@ xfs_ioc_setxflags(
struct fsxattr fa;
struct fsxattr old_fa;
unsigned int flags;
- int join_flags = 0;
int error;
if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1656,18 +1597,9 @@ xfs_ioc_setxflags(
if (error)
return error;
- /*
- * Changing DAX config may require inode locking for mapping
- * invalidation. These need to be held all the way to transaction commit
- * or cancel time, so need to be passed through to
- * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
- * appropriately.
- */
- error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
- if (error)
- goto out_drop_write;
+ xfs_ioctl_setattr_prepare_dax(ip, &fa);
- tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
+ tp = xfs_ioctl_setattr_get_trans(ip);
if (IS_ERR(tp)) {
error = PTR_ERR(tp);
goto out_drop_write;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d66528fa3657..80a13c8561d8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -25,13 +25,14 @@
#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/iversion.h>
+#include <linux/fiemap.h>
/*
- * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * Directories have different lock order w.r.t. mmap_lock compared to regular
* files. This is due to readdir potentially triggering page faults on a user
* buffer inside filldir(), and this happens with the ilock on the directory
* held. For regular files, the lock order is the other way around - the
- * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * mmap_lock is taken during the page fault, and then we lock the ilock to do
* block mapping. Hence we need a different class for the directory ilock so
* that lockdep can tell them apart.
*/