aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@linaro.org>2021-12-09 13:57:57 +0100
committerDaniel Lezcano <daniel.lezcano@linaro.org>2021-12-09 13:57:57 +0100
commit8632987380765dee716d460640aa58d58d52998e (patch)
treef8f4a55fc95c1060f97d64ee4d6db5c9693bb794 /fs/xfs/xfs_inode.c
parentclocksource/drivers/exynos_mct: Fix silly typo resulting in checkpatch warning (diff)
parentreset: Add of_reset_control_get_optional_exclusive() (diff)
downloadwireguard-linux-8632987380765dee716d460640aa58d58d52998e.tar.xz
wireguard-linux-8632987380765dee716d460640aa58d58d52998e.zip
Merge branch 'reset/of-get-optional-exclusive' of git://git.pengutronix.de/pza/linux into timers/drivers/next
"Add optional variant of of_reset_control_get_exclusive(). If the requested reset is not specified in the device tree, this function returns NULL instead of an error." This dependency is needed for the Generic Timer Module (a.k.a OSTM) support for RZ/G2L. Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c235
1 files changed, 143 insertions, 92 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 990b72ae3635..64b9bf334806 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -36,7 +36,7 @@
#include "xfs_reflink.h"
#include "xfs_ag.h"
-kmem_zone_t *xfs_inode_zone;
+struct kmem_cache *xfs_inode_cache;
/*
* Used in xfs_itruncate_extents(). This is the maximum number of extents
@@ -132,7 +132,7 @@ xfs_ilock_attr_map_shared(
/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
- * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
+ * multi-reader locks: invalidate_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
@@ -140,23 +140,23 @@ xfs_ilock_attr_map_shared(
*
* Basic locking order:
*
- * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
*
* mmap_lock locking order:
*
* i_rwsem -> page lock -> mmap_lock
- * mmap_lock -> i_mmap_lock -> page_lock
+ * mmap_lock -> invalidate_lock -> page_lock
*
* The difference in mmap_lock locking order mean that we cannot hold the
- * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
- * in get_user_pages() to map the user pages into the kernel address space for
- * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- * page faults already hold the mmap_lock.
+ * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+ * can fault in pages during copy in/out (for buffered IO) or require the
+ * mmap_lock in get_user_pages() to map the user pages into the kernel address
+ * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+ * fault because page faults already hold the mmap_lock.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
- * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
- * taken in places where we need to invalidate the page cache in a race
+ * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+ * both taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
@@ -188,10 +188,13 @@ xfs_ilock(
XFS_IOLOCK_DEP(lock_flags));
}
- if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
- else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ }
if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@ -240,10 +243,10 @@ xfs_ilock_nowait(
}
if (lock_flags & XFS_MMAPLOCK_EXCL) {
- if (!mrtryupdate(&ip->i_mmaplock))
+ if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
- if (!mrtryaccess(&ip->i_mmaplock))
+ if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
}
@@ -258,9 +261,9 @@ xfs_ilock_nowait(
out_undo_mmaplock:
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
@@ -307,9 +310,9 @@ xfs_iunlock(
up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_ILOCK_EXCL)
mrunlock_excl(&ip->i_lock);
@@ -335,7 +338,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrdemote(&ip->i_mmaplock);
+ downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_IOLOCK_EXCL)
downgrade_write(&VFS_I(ip)->i_rwsem);
@@ -343,9 +346,29 @@ xfs_ilock_demote(
}
#if defined(DEBUG) || defined(XFS_WARN)
-int
+static inline bool
+__xfs_rwsem_islocked(
+ struct rw_semaphore *rwsem,
+ bool shared)
+{
+ if (!debug_locks)
+ return rwsem_is_locked(rwsem);
+
+ if (!shared)
+ return lockdep_is_held_type(rwsem, 0);
+
+ /*
+ * We are checking that the lock is held at least in shared
+ * mode but don't care that it might be held exclusively
+ * (i.e. shared | excl). Hence we check if the lock is held
+ * in any mode rather than an explicit shared mode.
+ */
+ return lockdep_is_held_type(rwsem, -1);
+}
+
+bool
xfs_isilocked(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
uint lock_flags)
{
if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@ -355,20 +378,17 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- if (!(lock_flags & XFS_MMAPLOCK_SHARED))
- return !!ip->i_mmaplock.mr_writer;
- return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
- if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
- if (!(lock_flags & XFS_IOLOCK_SHARED))
- return !debug_locks ||
- lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
- return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+ if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
ASSERT(0);
- return 0;
+ return false;
}
#endif
@@ -532,12 +552,10 @@ again:
}
/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the mmaplock or the ilock, but not more than one type at a time. If we lock
- * more than one at a time, lockdep will report false positives saying we have
- * violated locking orders. The iolock must be double-locked separately since
- * we use i_rwsem for that. We now support taking one lock EXCL and the other
- * SHARED.
+ * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+ * mmaplock must be double-locked separately since we use i_rwsem and
+ * invalidate_lock for that. We now support taking one lock EXCL and the
+ * other SHARED.
*/
void
xfs_lock_two_inodes(
@@ -546,8 +564,6 @@ xfs_lock_two_inodes(
struct xfs_inode *ip1,
uint ip1_mode)
{
- struct xfs_inode *temp;
- uint mode_temp;
int attempts = 0;
struct xfs_log_item *lp;
@@ -555,24 +571,13 @@ xfs_lock_two_inodes(
ASSERT(hweight32(ip1_mode) == 1);
ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
- temp = ip0;
- ip0 = ip1;
- ip1 = temp;
- mode_temp = ip0_mode;
- ip0_mode = ip1_mode;
- ip1_mode = mode_temp;
+ swap(ip0, ip1);
+ swap(ip0_mode, ip1_mode);
}
again:
@@ -663,7 +668,7 @@ xfs_lookup(
trace_xfs_lookup(dp, name);
- if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ if (xfs_is_shutdown(dp->i_mount))
return -EIO;
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@ -705,7 +710,7 @@ xfs_inode_inherit_flags(
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(mode)) {
if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+ xfs_has_realtime(ip->i_mount))
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSIZE;
@@ -826,8 +831,7 @@ xfs_init_new_inode(
inode->i_rdev = rdev;
ip->i_projid = prid;
- if (dir && !(dir->i_mode & S_ISGID) &&
- (mp->m_flags & XFS_MOUNT_GRPID)) {
+ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
inode_fsuid_set(inode, mnt_userns);
inode->i_gid = dir->i_gid;
inode->i_mode = mode;
@@ -857,7 +861,7 @@ xfs_init_new_inode(
ip->i_extsize = 0;
ip->i_diflags = 0;
- if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+ if (xfs_has_v3inodes(mp)) {
inode_set_iversion(inode, 1);
ip->i_cowextsize = 0;
ip->i_crtime = tv;
@@ -897,7 +901,7 @@ xfs_init_new_inode(
* this saves us from needing to run a separate transaction to set the
* fork offset in the immediate future.
*/
- if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
+ if (init_xattrs && xfs_has_attr(mp)) {
ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
}
@@ -976,7 +980,7 @@ xfs_create(
trace_xfs_create(dp, name);
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1068,7 +1072,7 @@ xfs_create(
* create transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
/*
@@ -1130,7 +1134,7 @@ xfs_create_tmpfile(
uint resblks;
xfs_ino_t ino;
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1160,7 +1164,7 @@ xfs_create_tmpfile(
if (error)
goto out_trans_cancel;
- if (mp->m_flags & XFS_MOUNT_WSYNC)
+ if (xfs_has_wsync(mp))
xfs_trans_set_sync(tp);
/*
@@ -1220,7 +1224,7 @@ xfs_link(
ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_qm_dqattach(sip);
@@ -1294,7 +1298,7 @@ xfs_link(
* link transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
return xfs_trans_commit(tp);
@@ -1435,10 +1439,10 @@ xfs_release(
return 0;
/* If this is a read-only mount, don't do this (would generate I/O) */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
+ if (xfs_is_readonly(mp))
return 0;
- if (!XFS_FORCED_SHUTDOWN(mp)) {
+ if (!xfs_is_shutdown(mp)) {
int truncated;
/*
@@ -1521,7 +1525,7 @@ xfs_inactive_truncate(
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(xfs_is_shutdown(mp));
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1592,7 +1596,7 @@ xfs_inactive_ifree(
"Failed to remove inode(s) from unlinked list. "
"Please free space, unmount and run xfs_repair.");
} else {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(xfs_is_shutdown(mp));
}
return error;
}
@@ -1628,7 +1632,7 @@ xfs_inactive_ifree(
* might do that, we need to make sure. Otherwise the
* inode might be lost for a long time or forever.
*/
- if (!XFS_FORCED_SHUTDOWN(mp)) {
+ if (!xfs_is_shutdown(mp)) {
xfs_notice(mp, "%s: xfs_ifree returned error %d",
__func__, error);
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
@@ -1655,6 +1659,59 @@ xfs_inactive_ifree(
}
/*
+ * Returns true if we need to update the on-disk metadata before we can free
+ * the memory used by this inode. Updates include freeing post-eof
+ * preallocations; freeing COW staging extents; and marking the inode free in
+ * the inobt if it is on the unlinked list.
+ */
+bool
+xfs_inode_needs_inactive(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+
+ /*
+ * If the inode is already free, then there can be nothing
+ * to clean up here.
+ */
+ if (VFS_I(ip)->i_mode == 0)
+ return false;
+
+ /* If this is a read-only mount, don't do this (would generate I/O) */
+ if (xfs_is_readonly(mp))
+ return false;
+
+ /* If the log isn't running, push inodes straight to reclaim. */
+ if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
+ return false;
+
+ /* Metadata inodes require explicit resource cleanup. */
+ if (xfs_is_metadata_inode(ip))
+ return false;
+
+ /* Want to clean out the cow blocks if there are any. */
+ if (cow_ifp && cow_ifp->if_bytes > 0)
+ return true;
+
+ /* Unlinked files must be freed. */
+ if (VFS_I(ip)->i_nlink == 0)
+ return true;
+
+ /*
+ * This file isn't being freed, so check if there are post-eof blocks
+ * to free. @force is true because we are evicting an inode from the
+ * cache. Post-eof blocks must be freed, lest we end up with broken
+ * free space accounting.
+ *
+ * Note: don't bother with iolock here since lockdep complains about
+ * acquiring it in reclaim context. We have the only reference to the
+ * inode at this point anyways.
+ */
+ return xfs_can_free_eofblocks(ip, true);
+}
+
+/*
* xfs_inactive
*
* This is called when the vnode reference count for the vnode
@@ -1683,7 +1740,7 @@ xfs_inactive(
ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
/* If this is a read-only mount, don't do this (would generate I/O) */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
+ if (xfs_is_readonly(mp))
goto out;
/* Metadata inodes require explicit resource cleanup. */
@@ -1958,7 +2015,7 @@ xfs_iunlink_destroy(
rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
xfs_iunlink_free_item, &freed_anything);
- ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount));
+ ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount));
}
/*
@@ -2703,7 +2760,7 @@ xfs_remove(
trace_xfs_remove(dp, name);
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_qm_dqattach(dp);
@@ -2802,7 +2859,7 @@ xfs_remove(
* remove transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
@@ -2879,7 +2936,7 @@ xfs_finish_rename(
* If this is a synchronous mount, make sure that the rename transaction
* goes to disk before returning to the user.
*/
- if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
xfs_trans_set_sync(tp);
return xfs_trans_commit(tp);
@@ -3462,7 +3519,7 @@ xfs_iflush(
* happen but we need to still do it to ensure backwards compatibility
* with old kernels that predate logging all inode changes.
*/
- if (!xfs_sb_version_has_v3inode(&mp->m_sb))
+ if (!xfs_has_v3inodes(mp))
ip->i_flushiter++;
/*
@@ -3484,7 +3541,7 @@ xfs_iflush(
xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
/* Wrap, we never let the log put out DI_MAX_FLUSH */
- if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
+ if (!xfs_has_v3inodes(mp)) {
if (ip->i_flushiter == DI_MAX_FLUSH)
ip->i_flushiter = 0;
}
@@ -3603,7 +3660,7 @@ xfs_iflush_cluster(
* AIL, leaving a dirty/unpinned inode attached to the buffer
* that otherwise looks like it should be flushed.
*/
- if (XFS_FORCED_SHUTDOWN(mp)) {
+ if (xfs_is_shutdown(mp)) {
xfs_iunpin_wait(ip);
xfs_iflush_abort(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -3741,11 +3798,8 @@ xfs_ilock2_io_mmap(
ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
if (ret)
return ret;
- if (ip1 == ip2)
- xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- else
- xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
- ip2, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
return 0;
}
@@ -3755,12 +3809,9 @@ xfs_iunlock2_io_mmap(
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
- bool same_inode = (ip1 == ip2);
-
- xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
- if (!same_inode)
- xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
inode_unlock(VFS_I(ip2));
- if (!same_inode)
+ if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}