aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2022-01-15 12:09:44 -0800
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2022-01-15 12:09:44 -0800
commit762f99f4f3cb41a775b5157dd761217beba65873 (patch)
tree2e93410a577dfff1202ea5b0d109460b97650e58 /fs/xfs/xfs_inode.c
parentInput: zinitix - make sure the IRQ is allocated before it gets enabled (diff)
parentInput: ti_am335x_tsc - fix a typo in a comment (diff)
downloadwireguard-linux-762f99f4f3cb41a775b5157dd761217beba65873.tar.xz
wireguard-linux-762f99f4f3cb41a775b5157dd761217beba65873.zip
Merge branch 'next' into for-linus
Prepare input updates for 5.17 merge window.
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c472
1 files changed, 258 insertions, 214 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e4c2da4566f1..a4f6f034fb81 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -11,7 +11,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
@@ -35,6 +34,7 @@
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
+#include "xfs_ag.h"
kmem_zone_t *xfs_inode_zone;
@@ -45,7 +45,8 @@ kmem_zone_t *xfs_inode_zone;
#define XFS_ITRUNC_MAX_EXTENTS 2
STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
-STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
+STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *);
/*
* helper function to extract extent size hint from inode
@@ -131,7 +132,7 @@ xfs_ilock_attr_map_shared(
/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
- * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
+ * multi-reader locks: invalidate_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
@@ -139,23 +140,23 @@ xfs_ilock_attr_map_shared(
*
* Basic locking order:
*
- * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
*
* mmap_lock locking order:
*
* i_rwsem -> page lock -> mmap_lock
- * mmap_lock -> i_mmap_lock -> page_lock
+ * mmap_lock -> invalidate_lock -> page_lock
*
* The difference in mmap_lock locking order mean that we cannot hold the
- * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
- * in get_user_pages() to map the user pages into the kernel address space for
- * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- * page faults already hold the mmap_lock.
+ * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+ * can fault in pages during copy in/out (for buffered IO) or require the
+ * mmap_lock in get_user_pages() to map the user pages into the kernel address
+ * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+ * fault because page faults already hold the mmap_lock.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
- * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
- * taken in places where we need to invalidate the page cache in a race
+ * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+ * both taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
@@ -187,10 +188,13 @@ xfs_ilock(
XFS_IOLOCK_DEP(lock_flags));
}
- if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
- else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ }
if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@ -239,10 +243,10 @@ xfs_ilock_nowait(
}
if (lock_flags & XFS_MMAPLOCK_EXCL) {
- if (!mrtryupdate(&ip->i_mmaplock))
+ if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
- if (!mrtryaccess(&ip->i_mmaplock))
+ if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
}
@@ -257,9 +261,9 @@ xfs_ilock_nowait(
out_undo_mmaplock:
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
@@ -306,9 +310,9 @@ xfs_iunlock(
up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_ILOCK_EXCL)
mrunlock_excl(&ip->i_lock);
@@ -334,7 +338,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrdemote(&ip->i_mmaplock);
+ downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_IOLOCK_EXCL)
downgrade_write(&VFS_I(ip)->i_rwsem);
@@ -342,9 +346,29 @@ xfs_ilock_demote(
}
#if defined(DEBUG) || defined(XFS_WARN)
-int
+static inline bool
+__xfs_rwsem_islocked(
+ struct rw_semaphore *rwsem,
+ bool shared)
+{
+ if (!debug_locks)
+ return rwsem_is_locked(rwsem);
+
+ if (!shared)
+ return lockdep_is_held_type(rwsem, 0);
+
+ /*
+ * We are checking that the lock is held at least in shared
+ * mode but don't care that it might be held exclusively
+ * (i.e. shared | excl). Hence we check if the lock is held
+ * in any mode rather than an explicit shared mode.
+ */
+ return lockdep_is_held_type(rwsem, -1);
+}
+
+bool
xfs_isilocked(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
uint lock_flags)
{
if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@ -354,20 +378,17 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- if (!(lock_flags & XFS_MMAPLOCK_SHARED))
- return !!ip->i_mmaplock.mr_writer;
- return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
- if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
- if (!(lock_flags & XFS_IOLOCK_SHARED))
- return !debug_locks ||
- lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
- return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+ if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
ASSERT(0);
- return 0;
+ return false;
}
#endif
@@ -531,12 +552,10 @@ again:
}
/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the mmaplock or the ilock, but not more than one type at a time. If we lock
- * more than one at a time, lockdep will report false positives saying we have
- * violated locking orders. The iolock must be double-locked separately since
- * we use i_rwsem for that. We now support taking one lock EXCL and the other
- * SHARED.
+ * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+ * mmaplock must be double-locked separately since we use i_rwsem and
+ * invalidate_lock for that. We now support taking one lock EXCL and the
+ * other SHARED.
*/
void
xfs_lock_two_inodes(
@@ -554,15 +573,8 @@ xfs_lock_two_inodes(
ASSERT(hweight32(ip1_mode) == 1);
ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
@@ -662,7 +674,7 @@ xfs_lookup(
trace_xfs_lookup(dp, name);
- if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ if (xfs_is_shutdown(dp->i_mount))
return -EIO;
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@ -704,7 +716,7 @@ xfs_inode_inherit_flags(
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(mode)) {
if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+ xfs_has_realtime(ip->i_mount))
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSIZE;
@@ -778,7 +790,7 @@ xfs_inode_inherit_flags2(
* Initialise a newly allocated inode and return the in-core inode to the
* caller locked exclusively.
*/
-static int
+int
xfs_init_new_inode(
struct user_namespace *mnt_userns,
struct xfs_trans *tp,
@@ -825,8 +837,7 @@ xfs_init_new_inode(
inode->i_rdev = rdev;
ip->i_projid = prid;
- if (dir && !(dir->i_mode & S_ISGID) &&
- (mp->m_flags & XFS_MOUNT_GRPID)) {
+ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
inode_fsuid_set(inode, mnt_userns);
inode->i_gid = dir->i_gid;
inode->i_mode = mode;
@@ -856,7 +867,7 @@ xfs_init_new_inode(
ip->i_extsize = 0;
ip->i_diflags = 0;
- if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+ if (xfs_has_v3inodes(mp)) {
inode_set_iversion(inode, 1);
ip->i_cowextsize = 0;
ip->i_crtime = tv;
@@ -877,7 +888,7 @@ xfs_init_new_inode(
xfs_inode_inherit_flags(ip, pip);
if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
xfs_inode_inherit_flags2(ip, pip);
- /* FALLTHROUGH */
+ fallthrough;
case S_IFLNK:
ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
ip->i_df.if_bytes = 0;
@@ -896,7 +907,7 @@ xfs_init_new_inode(
* this saves us from needing to run a separate transaction to set the
* fork offset in the immediate future.
*/
- if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
+ if (init_xattrs && xfs_has_attr(mp)) {
ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
}
@@ -915,57 +926,6 @@ xfs_init_new_inode(
}
/*
- * Allocates a new inode from disk and return a pointer to the incore copy. This
- * routine will internally commit the current transaction and allocate a new one
- * if we needed to allocate more on-disk free inodes to perform the requested
- * operation.
- *
- * If we are allocating quota inodes, we do not have a parent inode to attach to
- * or associate with (i.e. dp == NULL) because they are not linked into the
- * directory structure - they are attached directly to the superblock - and so
- * have no parent.
- */
-int
-xfs_dir_ialloc(
- struct user_namespace *mnt_userns,
- struct xfs_trans **tpp,
- struct xfs_inode *dp,
- umode_t mode,
- xfs_nlink_t nlink,
- dev_t rdev,
- prid_t prid,
- bool init_xattrs,
- struct xfs_inode **ipp)
-{
- struct xfs_buf *agibp;
- xfs_ino_t parent_ino = dp ? dp->i_ino : 0;
- xfs_ino_t ino;
- int error;
-
- ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-
- /*
- * Call the space management code to pick the on-disk inode to be
- * allocated.
- */
- error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp);
- if (error)
- return error;
-
- if (!agibp)
- return -ENOSPC;
-
- /* Allocate an inode from the selected AG */
- error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino);
- if (error)
- return error;
- ASSERT(ino != NULLFSINO);
-
- return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev,
- prid, init_xattrs, ipp);
-}
-
-/*
* Decrement the link count on an inode & log the change. If this causes the
* link count to go to zero, move the inode to AGI unlinked list so that it can
* be freed when the last active reference goes away via xfs_inactive().
@@ -1022,10 +982,11 @@ xfs_create(
struct xfs_dquot *pdqp = NULL;
struct xfs_trans_res *tres;
uint resblks;
+ xfs_ino_t ino;
trace_xfs_create(dp, name);
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1078,14 +1039,16 @@ xfs_create(
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev,
- prid, init_xattrs, &ip);
+ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ if (!error)
+ error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+ is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
if (error)
goto out_trans_cancel;
/*
* Now we join the directory inode to the transaction. We do not do it
- * earlier because xfs_dir_ialloc might commit the previous transaction
+ * earlier because xfs_dialloc might commit the previous transaction
* (and release all the locks). An error from here on will result in
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
@@ -1115,7 +1078,7 @@ xfs_create(
* create transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
/*
@@ -1175,8 +1138,9 @@ xfs_create_tmpfile(
struct xfs_dquot *pdqp = NULL;
struct xfs_trans_res *tres;
uint resblks;
+ xfs_ino_t ino;
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1199,12 +1163,14 @@ xfs_create_tmpfile(
if (error)
goto out_release_dquots;
- error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid,
- false, &ip);
+ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ if (!error)
+ error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
+ 0, 0, prid, false, &ip);
if (error)
goto out_trans_cancel;
- if (mp->m_flags & XFS_MOUNT_WSYNC)
+ if (xfs_has_wsync(mp))
xfs_trans_set_sync(tp);
/*
@@ -1264,7 +1230,7 @@ xfs_link(
ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_qm_dqattach(sip);
@@ -1315,7 +1281,11 @@ xfs_link(
* Handle initial link state of O_TMPFILE inode
*/
if (VFS_I(sip)->i_nlink == 0) {
- error = xfs_iunlink_remove(tp, sip);
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, sip);
+ xfs_perag_put(pag);
if (error)
goto error_return;
}
@@ -1334,7 +1304,7 @@ xfs_link(
* link transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
return xfs_trans_commit(tp);
@@ -1475,10 +1445,10 @@ xfs_release(
return 0;
/* If this is a read-only mount, don't do this (would generate I/O) */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
+ if (xfs_is_readonly(mp))
return 0;
- if (!XFS_FORCED_SHUTDOWN(mp)) {
+ if (!xfs_is_shutdown(mp)) {
int truncated;
/*
@@ -1561,7 +1531,7 @@ xfs_inactive_truncate(
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(xfs_is_shutdown(mp));
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1632,7 +1602,7 @@ xfs_inactive_ifree(
"Failed to remove inode(s) from unlinked list. "
"Please free space, unmount and run xfs_repair.");
} else {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(xfs_is_shutdown(mp));
}
return error;
}
@@ -1668,7 +1638,7 @@ xfs_inactive_ifree(
* might do that, we need to make sure. Otherwise the
* inode might be lost for a long time or forever.
*/
- if (!XFS_FORCED_SHUTDOWN(mp)) {
+ if (!xfs_is_shutdown(mp)) {
xfs_notice(mp, "%s: xfs_ifree returned error %d",
__func__, error);
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
@@ -1695,6 +1665,59 @@ xfs_inactive_ifree(
}
/*
+ * Returns true if we need to update the on-disk metadata before we can free
+ * the memory used by this inode. Updates include freeing post-eof
+ * preallocations; freeing COW staging extents; and marking the inode free in
+ * the inobt if it is on the unlinked list.
+ */
+bool
+xfs_inode_needs_inactive(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+
+ /*
+ * If the inode is already free, then there can be nothing
+ * to clean up here.
+ */
+ if (VFS_I(ip)->i_mode == 0)
+ return false;
+
+ /* If this is a read-only mount, don't do this (would generate I/O) */
+ if (xfs_is_readonly(mp))
+ return false;
+
+ /* If the log isn't running, push inodes straight to reclaim. */
+ if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
+ return false;
+
+ /* Metadata inodes require explicit resource cleanup. */
+ if (xfs_is_metadata_inode(ip))
+ return false;
+
+ /* Want to clean out the cow blocks if there are any. */
+ if (cow_ifp && cow_ifp->if_bytes > 0)
+ return true;
+
+ /* Unlinked files must be freed. */
+ if (VFS_I(ip)->i_nlink == 0)
+ return true;
+
+ /*
+ * This file isn't being freed, so check if there are post-eof blocks
+ * to free. @force is true because we are evicting an inode from the
+ * cache. Post-eof blocks must be freed, lest we end up with broken
+ * free space accounting.
+ *
+ * Note: don't bother with iolock here since lockdep complains about
+ * acquiring it in reclaim context. We have the only reference to the
+ * inode at this point anyways.
+ */
+ return xfs_can_free_eofblocks(ip, true);
+}
+
+/*
* xfs_inactive
*
* This is called when the vnode reference count for the vnode
@@ -1716,19 +1739,19 @@ xfs_inactive(
*/
if (VFS_I(ip)->i_mode == 0) {
ASSERT(ip->i_df.if_broot_bytes == 0);
- return;
+ goto out;
}
mp = ip->i_mount;
ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
/* If this is a read-only mount, don't do this (would generate I/O) */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return;
+ if (xfs_is_readonly(mp))
+ goto out;
/* Metadata inodes require explicit resource cleanup. */
if (xfs_is_metadata_inode(ip))
- return;
+ goto out;
/* Try to clean out the cow blocks if there are any. */
if (xfs_inode_has_cow_data(ip))
@@ -1747,7 +1770,7 @@ xfs_inactive(
if (xfs_can_free_eofblocks(ip, true))
xfs_free_eofblocks(ip);
- return;
+ goto out;
}
if (S_ISREG(VFS_I(ip)->i_mode) &&
@@ -1757,14 +1780,14 @@ xfs_inactive(
error = xfs_qm_dqattach(ip);
if (error)
- return;
+ goto out;
if (S_ISLNK(VFS_I(ip)->i_mode))
error = xfs_inactive_symlink(ip);
else if (truncate)
error = xfs_inactive_truncate(ip);
if (error)
- return;
+ goto out;
/*
* If there are attributes associated with the file then blow them away
@@ -1774,7 +1797,7 @@ xfs_inactive(
if (XFS_IFORK_Q(ip)) {
error = xfs_attr_inactive(ip);
if (error)
- return;
+ goto out;
}
ASSERT(!ip->i_afp);
@@ -1783,12 +1806,12 @@ xfs_inactive(
/*
* Free the inode.
*/
- error = xfs_inactive_ifree(ip);
- if (error)
- return;
+ xfs_inactive_ifree(ip);
+out:
/*
- * Release the dquots held by inode, if any.
+ * We're done making metadata updates for this inode, so we can release
+ * the attached dquots.
*/
xfs_qm_dqdetach(ip);
}
@@ -1998,7 +2021,7 @@ xfs_iunlink_destroy(
rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
xfs_iunlink_free_item, &freed_anything);
- ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount));
+ ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount));
}
/*
@@ -2008,7 +2031,7 @@ xfs_iunlink_destroy(
STATIC int
xfs_iunlink_update_bucket(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
struct xfs_buf *agibp,
unsigned int bucket_index,
xfs_agino_t new_agino)
@@ -2017,10 +2040,10 @@ xfs_iunlink_update_bucket(
xfs_agino_t old_value;
int offset;
- ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino));
+ ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino));
old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index,
+ trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
old_value, new_agino);
/*
@@ -2044,7 +2067,7 @@ xfs_iunlink_update_bucket(
STATIC void
xfs_iunlink_update_dinode(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
xfs_agino_t agino,
struct xfs_buf *ibp,
struct xfs_dinode *dip,
@@ -2054,9 +2077,9 @@ xfs_iunlink_update_dinode(
struct xfs_mount *mp = tp->t_mountp;
int offset;
- ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
+ ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
- trace_xfs_iunlink_update_dinode(mp, agno, agino,
+ trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino,
be32_to_cpu(dip->di_next_unlinked), next_agino);
dip->di_next_unlinked = cpu_to_be32(next_agino);
@@ -2074,7 +2097,7 @@ STATIC int
xfs_iunlink_update_inode(
struct xfs_trans *tp,
struct xfs_inode *ip,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
xfs_agino_t next_agino,
xfs_agino_t *old_next_agino)
{
@@ -2084,7 +2107,7 @@ xfs_iunlink_update_inode(
xfs_agino_t old_value;
int error;
- ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
+ ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
if (error)
@@ -2093,7 +2116,7 @@ xfs_iunlink_update_inode(
/* Make sure the old pointer isn't garbage. */
old_value = be32_to_cpu(dip->di_next_unlinked);
- if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
+ if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) {
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
@@ -2116,7 +2139,7 @@ xfs_iunlink_update_inode(
}
/* Ok, update the new pointer. */
- xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino),
+ xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
ibp, dip, &ip->i_imap, next_agino);
return 0;
out:
@@ -2137,10 +2160,10 @@ xfs_iunlink(
struct xfs_inode *ip)
{
struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
struct xfs_agi *agi;
struct xfs_buf *agibp;
xfs_agino_t next_agino;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
int error;
@@ -2149,10 +2172,12 @@ xfs_iunlink(
ASSERT(VFS_I(ip)->i_mode != 0);
trace_xfs_iunlink(ip);
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
/* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(mp, tp, agno, &agibp);
+ error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
if (error)
- return error;
+ goto out;
agi = agibp->b_addr;
/*
@@ -2162,9 +2187,10 @@ xfs_iunlink(
*/
next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
if (next_agino == agino ||
- !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+ !xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) {
xfs_buf_mark_corrupt(agibp);
- return -EFSCORRUPTED;
+ error = -EFSCORRUPTED;
+ goto out;
}
if (next_agino != NULLAGINO) {
@@ -2174,23 +2200,26 @@ xfs_iunlink(
* There is already another inode in the bucket, so point this
* inode to the current head of the list.
*/
- error = xfs_iunlink_update_inode(tp, ip, agno, next_agino,
+ error = xfs_iunlink_update_inode(tp, ip, pag, next_agino,
&old_agino);
if (error)
- return error;
+ goto out;
ASSERT(old_agino == NULLAGINO);
/*
* agino has been unlinked, add a backref from the next inode
* back to agino.
*/
- error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino);
+ error = xfs_iunlink_add_backref(pag, agino, next_agino);
if (error)
- return error;
+ goto out;
}
/* Point the head of the list to point to this inode. */
- return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino);
+ error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+out:
+ xfs_perag_put(pag);
+ return error;
}
/* Return the imap, dinode pointer, and buffer for an inode. */
@@ -2238,14 +2267,13 @@ xfs_iunlink_map_ino(
STATIC int
xfs_iunlink_map_prev(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
xfs_agino_t head_agino,
xfs_agino_t target_agino,
xfs_agino_t *agino,
struct xfs_imap *imap,
struct xfs_dinode **dipp,
- struct xfs_buf **bpp,
- struct xfs_perag *pag)
+ struct xfs_buf **bpp)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_agino_t next_agino;
@@ -2257,7 +2285,8 @@ xfs_iunlink_map_prev(
/* See if our backref cache can find it faster. */
*agino = xfs_iunlink_lookup_backref(pag, target_agino);
if (*agino != NULLAGINO) {
- error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp);
+ error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap,
+ dipp, bpp);
if (error)
return error;
@@ -2273,7 +2302,7 @@ xfs_iunlink_map_prev(
WARN_ON_ONCE(1);
}
- trace_xfs_iunlink_map_prev_fallback(mp, agno);
+ trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno);
/* Otherwise, walk the entire bucket until we find it. */
next_agino = head_agino;
@@ -2284,8 +2313,8 @@ xfs_iunlink_map_prev(
xfs_trans_brelse(tp, *bpp);
*agino = next_agino;
- error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp,
- bpp);
+ error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap,
+ dipp, bpp);
if (error)
return error;
@@ -2294,7 +2323,7 @@ xfs_iunlink_map_prev(
* Make sure this pointer is valid and isn't an obvious
* infinite loop.
*/
- if (!xfs_verify_agino(mp, agno, unlinked_agino) ||
+ if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) ||
next_agino == unlinked_agino) {
XFS_CORRUPTION_ERROR(__func__,
XFS_ERRLEVEL_LOW, mp,
@@ -2314,6 +2343,7 @@ xfs_iunlink_map_prev(
STATIC int
xfs_iunlink_remove(
struct xfs_trans *tp,
+ struct xfs_perag *pag,
struct xfs_inode *ip)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -2321,7 +2351,6 @@ xfs_iunlink_remove(
struct xfs_buf *agibp;
struct xfs_buf *last_ibp;
struct xfs_dinode *last_dip = NULL;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
xfs_agino_t next_agino;
xfs_agino_t head_agino;
@@ -2331,7 +2360,7 @@ xfs_iunlink_remove(
trace_xfs_iunlink_remove(ip);
/* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(mp, tp, agno, &agibp);
+ error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
if (error)
return error;
agi = agibp->b_addr;
@@ -2341,7 +2370,7 @@ xfs_iunlink_remove(
* go on. Make sure the head pointer isn't garbage.
*/
head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (!xfs_verify_agino(mp, agno, head_agino)) {
+ if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
agi, sizeof(*agi));
return -EFSCORRUPTED;
@@ -2352,7 +2381,7 @@ xfs_iunlink_remove(
* the old pointer value so that we can update whatever was previous
* to us in the list to point to whatever was next in the list.
*/
- error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino);
+ error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino);
if (error)
return error;
@@ -2364,8 +2393,7 @@ xfs_iunlink_remove(
* this inode's backref to point from the next inode.
*/
if (next_agino != NULLAGINO) {
- error = xfs_iunlink_change_backref(agibp->b_pag, next_agino,
- NULLAGINO);
+ error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO);
if (error)
return error;
}
@@ -2375,14 +2403,13 @@ xfs_iunlink_remove(
xfs_agino_t prev_agino;
/* We need to search the list for the inode being freed. */
- error = xfs_iunlink_map_prev(tp, agno, head_agino, agino,
- &prev_agino, &imap, &last_dip, &last_ibp,
- agibp->b_pag);
+ error = xfs_iunlink_map_prev(tp, pag, head_agino, agino,
+ &prev_agino, &imap, &last_dip, &last_ibp);
if (error)
return error;
/* Point the previous inode on the list to the next inode. */
- xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp,
+ xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp,
last_dip, &imap, next_agino);
/*
@@ -2398,7 +2425,7 @@ xfs_iunlink_remove(
}
/* Point the head of the list to the next unlinked inode. */
- return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index,
+ return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
next_agino);
}
@@ -2409,12 +2436,11 @@ xfs_iunlink_remove(
*/
static void
xfs_ifree_mark_inode_stale(
- struct xfs_buf *bp,
+ struct xfs_perag *pag,
struct xfs_inode *free_ip,
xfs_ino_t inum)
{
- struct xfs_mount *mp = bp->b_mount;
- struct xfs_perag *pag = bp->b_pag;
+ struct xfs_mount *mp = pag->pag_mount;
struct xfs_inode_log_item *iip;
struct xfs_inode *ip;
@@ -2504,10 +2530,11 @@ out_iflags_unlock:
* inodes that are in memory - they all must be marked stale and attached to
* the cluster buffer.
*/
-STATIC int
+static int
xfs_ifree_cluster(
- struct xfs_inode *free_ip,
struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *free_ip,
struct xfs_icluster *xic)
{
struct xfs_mount *mp = free_ip->i_mount;
@@ -2569,7 +2596,7 @@ xfs_ifree_cluster(
* already marked XFS_ISTALE.
*/
for (i = 0; i < igeo->inodes_per_cluster; i++)
- xfs_ifree_mark_inode_stale(bp, free_ip, inum + i);
+ xfs_ifree_mark_inode_stale(pag, free_ip, inum + i);
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
@@ -2592,9 +2619,11 @@ xfs_ifree(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
- int error;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
struct xfs_icluster xic = { 0 };
struct xfs_inode_log_item *iip = ip->i_itemp;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(VFS_I(ip)->i_nlink == 0);
@@ -2602,16 +2631,18 @@ xfs_ifree(
ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
ASSERT(ip->i_nblocks == 0);
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
/*
* Pull the on-disk inode from the AGI unlinked list.
*/
- error = xfs_iunlink_remove(tp, ip);
+ error = xfs_iunlink_remove(tp, pag, ip);
if (error)
- return error;
+ goto out;
- error = xfs_difree(tp, ip->i_ino, &xic);
+ error = xfs_difree(tp, pag, ip->i_ino, &xic);
if (error)
- return error;
+ goto out;
/*
* Free any local-format data sitting around before we reset the
@@ -2626,7 +2657,7 @@ xfs_ifree(
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_diflags = 0;
- ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2;
+ ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
ip->i_forkoff = 0; /* mark the attr fork not in use */
ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
@@ -2645,8 +2676,9 @@ xfs_ifree(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (xic.deleted)
- error = xfs_ifree_cluster(ip, tp, &xic);
-
+ error = xfs_ifree_cluster(tp, pag, ip, &xic);
+out:
+ xfs_perag_put(pag);
return error;
}
@@ -2664,7 +2696,7 @@ xfs_iunpin(
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
/* Give the log a push to start the unpinning I/O */
- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
+ xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
}
@@ -2734,7 +2766,7 @@ xfs_remove(
trace_xfs_remove(dp, name);
- if (XFS_FORCED_SHUTDOWN(mp))
+ if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_qm_dqattach(dp);
@@ -2794,6 +2826,19 @@ xfs_remove(
error = xfs_droplink(tp, ip);
if (error)
goto out_trans_cancel;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
} else {
/*
* When removing a non-directory we need to log the parent
@@ -2820,7 +2865,7 @@ xfs_remove(
* remove transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
@@ -2897,7 +2942,7 @@ xfs_finish_rename(
* If this is a synchronous mount, make sure that the rename transaction
* goes to disk before returning to the user.
*/
- if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+ if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
xfs_trans_set_sync(tp);
return xfs_trans_commit(tp);
@@ -3250,8 +3295,13 @@ xfs_rename(
* in future.
*/
if (wip) {
+ struct xfs_perag *pag;
+
ASSERT(VFS_I(wip)->i_nlink == 0);
- error = xfs_iunlink_remove(tp, wip);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, wip);
+ xfs_perag_put(pag);
if (error)
goto out_trans_cancel;
@@ -3475,7 +3525,7 @@ xfs_iflush(
* happen but we need to still do it to ensure backwards compatibility
* with old kernels that predate logging all inode changes.
*/
- if (!xfs_sb_version_has_v3inode(&mp->m_sb))
+ if (!xfs_has_v3inodes(mp))
ip->i_flushiter++;
/*
@@ -3497,7 +3547,7 @@ xfs_iflush(
xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
/* Wrap, we never let the log put out DI_MAX_FLUSH */
- if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
+ if (!xfs_has_v3inodes(mp)) {
if (ip->i_flushiter == DI_MAX_FLUSH)
ip->i_flushiter = 0;
}
@@ -3616,7 +3666,7 @@ xfs_iflush_cluster(
* AIL, leaving a dirty/unpinned inode attached to the buffer
* that otherwise looks like it should be flushed.
*/
- if (XFS_FORCED_SHUTDOWN(mp)) {
+ if (xfs_is_shutdown(mp)) {
xfs_iunpin_wait(ip);
xfs_iflush_abort(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -3673,16 +3723,16 @@ int
xfs_log_force_inode(
struct xfs_inode *ip)
{
- xfs_lsn_t lsn = 0;
+ xfs_csn_t seq = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
- lsn = ip->i_itemp->ili_last_lsn;
+ seq = ip->i_itemp->ili_commit_seq;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (!lsn)
+ if (!seq)
return 0;
- return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
+ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
}
/*
@@ -3754,11 +3804,8 @@ xfs_ilock2_io_mmap(
ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
if (ret)
return ret;
- if (ip1 == ip2)
- xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- else
- xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
- ip2, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
return 0;
}
@@ -3768,12 +3815,9 @@ xfs_iunlock2_io_mmap(
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
- bool same_inode = (ip1 == ip2);
-
- xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
- if (!same_inode)
- xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
inode_unlock(VFS_I(ip2));
- if (!same_inode)
+ if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}