diff options
author | 2022-01-15 12:09:44 -0800 | |
---|---|---|
committer | 2022-01-15 12:09:44 -0800 | |
commit | 762f99f4f3cb41a775b5157dd761217beba65873 (patch) | |
tree | 2e93410a577dfff1202ea5b0d109460b97650e58 /fs/xfs/xfs_inode.c | |
parent | Input: zinitix - make sure the IRQ is allocated before it gets enabled (diff) | |
parent | Input: ti_am335x_tsc - fix a typo in a comment (diff) | |
download | wireguard-linux-762f99f4f3cb41a775b5157dd761217beba65873.tar.xz wireguard-linux-762f99f4f3cb41a775b5157dd761217beba65873.zip |
Merge branch 'next' into for-linus
Prepare input updates for 5.17 merge window.
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 472 |
1 files changed, 258 insertions, 214 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e4c2da4566f1..a4f6f034fb81 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -11,7 +11,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" @@ -35,6 +34,7 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" +#include "xfs_ag.h" kmem_zone_t *xfs_inode_zone; @@ -45,7 +45,8 @@ kmem_zone_t *xfs_inode_zone; #define XFS_ITRUNC_MAX_EXTENTS 2 STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); -STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *); +STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_inode *); /* * helper function to extract extent size hint from inode @@ -131,7 +132,7 @@ xfs_ilock_attr_map_shared( /* * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 - * multi-reader locks: i_mmap_lock and the i_lock. This routine allows + * multi-reader locks: invalidate_lock and the i_lock. This routine allows * various combinations of the locks to be obtained. * * The 3 locks should always be ordered so that the IO lock is obtained first, @@ -139,23 +140,23 @@ xfs_ilock_attr_map_shared( * * Basic locking order: * - * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock + * i_rwsem -> invalidate_lock -> page_lock -> i_ilock * * mmap_lock locking order: * * i_rwsem -> page lock -> mmap_lock - * mmap_lock -> i_mmap_lock -> page_lock + * mmap_lock -> invalidate_lock -> page_lock * * The difference in mmap_lock locking order mean that we cannot hold the - * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can - * fault in pages during copy in/out (for buffered IO) or require the mmap_lock - * in get_user_pages() to map the user pages into the kernel address space for - * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because - * page faults already hold the mmap_lock. + * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths + * can fault in pages during copy in/out (for buffered IO) or require the + * mmap_lock in get_user_pages() to map the user pages into the kernel address + * space for direct IO. Similarly the i_rwsem cannot be taken inside a page + * fault because page faults already hold the mmap_lock. * * Hence to serialise fully against both syscall and mmap based IO, we need to - * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both - * taken in places where we need to invalidate the page cache in a race + * take both the i_rwsem and the invalidate_lock. These locks should *only* be + * both taken in places where we need to invalidate the page cache in a race * free manner (e.g. truncate, hole punch and other extent manipulation * functions). */ @@ -187,10 +188,13 @@ xfs_ilock( XFS_IOLOCK_DEP(lock_flags)); } - if (lock_flags & XFS_MMAPLOCK_EXCL) - mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); - else if (lock_flags & XFS_MMAPLOCK_SHARED) - mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + if (lock_flags & XFS_MMAPLOCK_EXCL) { + down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } else if (lock_flags & XFS_MMAPLOCK_SHARED) { + down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); @@ -239,10 +243,10 @@ xfs_ilock_nowait( } if (lock_flags & XFS_MMAPLOCK_EXCL) { - if (!mrtryupdate(&ip->i_mmaplock)) + if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } else if (lock_flags & XFS_MMAPLOCK_SHARED) { - if (!mrtryaccess(&ip->i_mmaplock)) + if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } @@ -257,9 +261,9 @@ xfs_ilock_nowait( out_undo_mmaplock: if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) up_write(&VFS_I(ip)->i_rwsem); @@ -306,9 +310,9 @@ xfs_iunlock( up_read(&VFS_I(ip)->i_rwsem); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); @@ -334,7 +338,7 @@ xfs_ilock_demote( if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrdemote(&ip->i_mmaplock); + downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_IOLOCK_EXCL) downgrade_write(&VFS_I(ip)->i_rwsem); @@ -342,9 +346,29 @@ xfs_ilock_demote( } #if defined(DEBUG) || defined(XFS_WARN) -int +static inline bool +__xfs_rwsem_islocked( + struct rw_semaphore *rwsem, + bool shared) +{ + if (!debug_locks) + return rwsem_is_locked(rwsem); + + if (!shared) + return lockdep_is_held_type(rwsem, 0); + + /* + * We are checking that the lock is held at least in shared + * mode but don't care that it might be held exclusively + * (i.e. shared | excl). Hence we check if the lock is held + * in any mode rather than an explicit shared mode. + */ + return lockdep_is_held_type(rwsem, -1); +} + +bool xfs_isilocked( - xfs_inode_t *ip, + struct xfs_inode *ip, uint lock_flags) { if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { @@ -354,20 +378,17 @@ xfs_isilocked( } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { - if (!(lock_flags & XFS_MMAPLOCK_SHARED)) - return !!ip->i_mmaplock.mr_writer; - return rwsem_is_locked(&ip->i_mmaplock.mr_lock); + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } - if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { - if (!(lock_flags & XFS_IOLOCK_SHARED)) - return !debug_locks || - lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); - return rwsem_is_locked(&VFS_I(ip)->i_rwsem); + if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } ASSERT(0); - return 0; + return false; } #endif @@ -531,12 +552,10 @@ again: } /* - * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the mmaplock or the ilock, but not more than one type at a time. If we lock - * more than one at a time, lockdep will report false positives saying we have - * violated locking orders. The iolock must be double-locked separately since - * we use i_rwsem for that. We now support taking one lock EXCL and the other - * SHARED. + * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and + * mmaplock must be double-locked separately since we use i_rwsem and + * invalidate_lock for that. We now support taking one lock EXCL and the + * other SHARED. */ void xfs_lock_two_inodes( @@ -554,15 +573,8 @@ xfs_lock_two_inodes( ASSERT(hweight32(ip1_mode) == 1); ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -662,7 +674,7 @@ xfs_lookup( trace_xfs_lookup(dp, name); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + if (xfs_is_shutdown(dp->i_mount)) return -EIO; error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); @@ -704,7 +716,7 @@ xfs_inode_inherit_flags( di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(mode)) { if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && - xfs_sb_version_hasrealtime(&ip->i_mount->m_sb)) + xfs_has_realtime(ip->i_mount)) di_flags |= XFS_DIFLAG_REALTIME; if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSIZE; @@ -778,7 +790,7 @@ xfs_inode_inherit_flags2( * Initialise a newly allocated inode and return the in-core inode to the * caller locked exclusively. */ -static int +int xfs_init_new_inode( struct user_namespace *mnt_userns, struct xfs_trans *tp, @@ -825,8 +837,7 @@ xfs_init_new_inode( inode->i_rdev = rdev; ip->i_projid = prid; - if (dir && !(dir->i_mode & S_ISGID) && - (mp->m_flags & XFS_MOUNT_GRPID)) { + if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { inode_fsuid_set(inode, mnt_userns); inode->i_gid = dir->i_gid; inode->i_mode = mode; @@ -856,7 +867,7 @@ xfs_init_new_inode( ip->i_extsize = 0; ip->i_diflags = 0; - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { inode_set_iversion(inode, 1); ip->i_cowextsize = 0; ip->i_crtime = tv; @@ -877,7 +888,7 @@ xfs_init_new_inode( xfs_inode_inherit_flags(ip, pip); if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) xfs_inode_inherit_flags2(ip, pip); - /* FALLTHROUGH */ + fallthrough; case S_IFLNK: ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; ip->i_df.if_bytes = 0; @@ -896,7 +907,7 @@ xfs_init_new_inode( * this saves us from needing to run a separate transaction to set the * fork offset in the immediate future. */ - if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) { + if (init_xattrs && xfs_has_attr(mp)) { ip->i_forkoff = xfs_default_attroffset(ip) >> 3; ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0); } @@ -915,57 +926,6 @@ xfs_init_new_inode( } /* - * Allocates a new inode from disk and return a pointer to the incore copy. This - * routine will internally commit the current transaction and allocate a new one - * if we needed to allocate more on-disk free inodes to perform the requested - * operation. - * - * If we are allocating quota inodes, we do not have a parent inode to attach to - * or associate with (i.e. dp == NULL) because they are not linked into the - * directory structure - they are attached directly to the superblock - and so - * have no parent. - */ -int -xfs_dir_ialloc( - struct user_namespace *mnt_userns, - struct xfs_trans **tpp, - struct xfs_inode *dp, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - bool init_xattrs, - struct xfs_inode **ipp) -{ - struct xfs_buf *agibp; - xfs_ino_t parent_ino = dp ? dp->i_ino : 0; - xfs_ino_t ino; - int error; - - ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); - - /* - * Call the space management code to pick the on-disk inode to be - * allocated. - */ - error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp); - if (error) - return error; - - if (!agibp) - return -ENOSPC; - - /* Allocate an inode from the selected AG */ - error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino); - if (error) - return error; - ASSERT(ino != NULLFSINO); - - return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev, - prid, init_xattrs, ipp); -} - -/* * Decrement the link count on an inode & log the change. If this causes the * link count to go to zero, move the inode to AGI unlinked list so that it can * be freed when the last active reference goes away via xfs_inactive(). @@ -1022,10 +982,11 @@ xfs_create( struct xfs_dquot *pdqp = NULL; struct xfs_trans_res *tres; uint resblks; + xfs_ino_t ino; trace_xfs_create(dp, name); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; prid = xfs_get_initial_prid(dp); @@ -1078,14 +1039,16 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev, - prid, init_xattrs, &ip); + error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + if (!error) + error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); if (error) goto out_trans_cancel; /* * Now we join the directory inode to the transaction. We do not do it - * earlier because xfs_dir_ialloc might commit the previous transaction + * earlier because xfs_dialloc might commit the previous transaction * (and release all the locks). An error from here on will result in * the transaction cancel unlocking dp so don't do it explicitly in the * error path. @@ -1115,7 +1078,7 @@ xfs_create( * create transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); /* @@ -1175,8 +1138,9 @@ xfs_create_tmpfile( struct xfs_dquot *pdqp = NULL; struct xfs_trans_res *tres; uint resblks; + xfs_ino_t ino; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; prid = xfs_get_initial_prid(dp); @@ -1199,12 +1163,14 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid, - false, &ip); + error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + if (!error) + error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + 0, 0, prid, false, &ip); if (error) goto out_trans_cancel; - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); /* @@ -1264,7 +1230,7 @@ xfs_link( ASSERT(!S_ISDIR(VFS_I(sip)->i_mode)); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(sip); @@ -1315,7 +1281,11 @@ xfs_link( * Handle initial link state of O_TMPFILE inode */ if (VFS_I(sip)->i_nlink == 0) { - error = xfs_iunlink_remove(tp, sip); + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino)); + error = xfs_iunlink_remove(tp, pag, sip); + xfs_perag_put(pag); if (error) goto error_return; } @@ -1334,7 +1304,7 @@ xfs_link( * link transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); @@ -1475,10 +1445,10 @@ xfs_release( return 0; /* If this is a read-only mount, don't do this (would generate I/O) */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return 0; - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xfs_is_shutdown(mp)) { int truncated; /* @@ -1561,7 +1531,7 @@ xfs_inactive_truncate( error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1632,7 +1602,7 @@ xfs_inactive_ifree( "Failed to remove inode(s) from unlinked list. " "Please free space, unmount and run xfs_repair."); } else { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); } return error; } @@ -1668,7 +1638,7 @@ xfs_inactive_ifree( * might do that, we need to make sure. Otherwise the * inode might be lost for a long time or forever. */ - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xfs_is_shutdown(mp)) { xfs_notice(mp, "%s: xfs_ifree returned error %d", __func__, error); xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); @@ -1695,6 +1665,59 @@ xfs_inactive_ifree( } /* + * Returns true if we need to update the on-disk metadata before we can free + * the memory used by this inode. Updates include freeing post-eof + * preallocations; freeing COW staging extents; and marking the inode free in + * the inobt if it is on the unlinked list. + */ +bool +xfs_inode_needs_inactive( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + + /* + * If the inode is already free, then there can be nothing + * to clean up here. + */ + if (VFS_I(ip)->i_mode == 0) + return false; + + /* If this is a read-only mount, don't do this (would generate I/O) */ + if (xfs_is_readonly(mp)) + return false; + + /* If the log isn't running, push inodes straight to reclaim. */ + if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp)) + return false; + + /* Metadata inodes require explicit resource cleanup. */ + if (xfs_is_metadata_inode(ip)) + return false; + + /* Want to clean out the cow blocks if there are any. */ + if (cow_ifp && cow_ifp->if_bytes > 0) + return true; + + /* Unlinked files must be freed. */ + if (VFS_I(ip)->i_nlink == 0) + return true; + + /* + * This file isn't being freed, so check if there are post-eof blocks + * to free. @force is true because we are evicting an inode from the + * cache. Post-eof blocks must be freed, lest we end up with broken + * free space accounting. + * + * Note: don't bother with iolock here since lockdep complains about + * acquiring it in reclaim context. We have the only reference to the + * inode at this point anyways. + */ + return xfs_can_free_eofblocks(ip, true); +} + +/* * xfs_inactive * * This is called when the vnode reference count for the vnode @@ -1716,19 +1739,19 @@ xfs_inactive( */ if (VFS_I(ip)->i_mode == 0) { ASSERT(ip->i_df.if_broot_bytes == 0); - return; + goto out; } mp = ip->i_mount; ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); /* If this is a read-only mount, don't do this (would generate I/O) */ - if (mp->m_flags & XFS_MOUNT_RDONLY) - return; + if (xfs_is_readonly(mp)) + goto out; /* Metadata inodes require explicit resource cleanup. */ if (xfs_is_metadata_inode(ip)) - return; + goto out; /* Try to clean out the cow blocks if there are any. */ if (xfs_inode_has_cow_data(ip)) @@ -1747,7 +1770,7 @@ xfs_inactive( if (xfs_can_free_eofblocks(ip, true)) xfs_free_eofblocks(ip); - return; + goto out; } if (S_ISREG(VFS_I(ip)->i_mode) && @@ -1757,14 +1780,14 @@ xfs_inactive( error = xfs_qm_dqattach(ip); if (error) - return; + goto out; if (S_ISLNK(VFS_I(ip)->i_mode)) error = xfs_inactive_symlink(ip); else if (truncate) error = xfs_inactive_truncate(ip); if (error) - return; + goto out; /* * If there are attributes associated with the file then blow them away @@ -1774,7 +1797,7 @@ xfs_inactive( if (XFS_IFORK_Q(ip)) { error = xfs_attr_inactive(ip); if (error) - return; + goto out; } ASSERT(!ip->i_afp); @@ -1783,12 +1806,12 @@ xfs_inactive( /* * Free the inode. */ - error = xfs_inactive_ifree(ip); - if (error) - return; + xfs_inactive_ifree(ip); +out: /* - * Release the dquots held by inode, if any. + * We're done making metadata updates for this inode, so we can release + * the attached dquots. */ xfs_qm_dqdetach(ip); } @@ -1998,7 +2021,7 @@ xfs_iunlink_destroy( rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, xfs_iunlink_free_item, &freed_anything); - ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount)); + ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount)); } /* @@ -2008,7 +2031,7 @@ xfs_iunlink_destroy( STATIC int xfs_iunlink_update_bucket( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, struct xfs_buf *agibp, unsigned int bucket_index, xfs_agino_t new_agino) @@ -2017,10 +2040,10 @@ xfs_iunlink_update_bucket( xfs_agino_t old_value; int offset; - ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino)); + ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino)); old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); - trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, + trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, old_value, new_agino); /* @@ -2044,7 +2067,7 @@ xfs_iunlink_update_bucket( STATIC void xfs_iunlink_update_dinode( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agino_t agino, struct xfs_buf *ibp, struct xfs_dinode *dip, @@ -2054,9 +2077,9 @@ xfs_iunlink_update_dinode( struct xfs_mount *mp = tp->t_mountp; int offset; - ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); + ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)); - trace_xfs_iunlink_update_dinode(mp, agno, agino, + trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino, be32_to_cpu(dip->di_next_unlinked), next_agino); dip->di_next_unlinked = cpu_to_be32(next_agino); @@ -2074,7 +2097,7 @@ STATIC int xfs_iunlink_update_inode( struct xfs_trans *tp, struct xfs_inode *ip, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agino_t next_agino, xfs_agino_t *old_next_agino) { @@ -2084,7 +2107,7 @@ xfs_iunlink_update_inode( xfs_agino_t old_value; int error; - ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); + ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)); error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); if (error) @@ -2093,7 +2116,7 @@ xfs_iunlink_update_inode( /* Make sure the old pointer isn't garbage. */ old_value = be32_to_cpu(dip->di_next_unlinked); - if (!xfs_verify_agino_or_null(mp, agno, old_value)) { + if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); error = -EFSCORRUPTED; @@ -2116,7 +2139,7 @@ xfs_iunlink_update_inode( } /* Ok, update the new pointer. */ - xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino), + xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino), ibp, dip, &ip->i_imap, next_agino); return 0; out: @@ -2137,10 +2160,10 @@ xfs_iunlink( struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; struct xfs_agi *agi; struct xfs_buf *agibp; xfs_agino_t next_agino; - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; @@ -2149,10 +2172,12 @@ xfs_iunlink( ASSERT(VFS_I(ip)->i_mode != 0); trace_xfs_iunlink(ip); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(mp, tp, agno, &agibp); + error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp); if (error) - return error; + goto out; agi = agibp->b_addr; /* @@ -2162,9 +2187,10 @@ xfs_iunlink( */ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); if (next_agino == agino || - !xfs_verify_agino_or_null(mp, agno, next_agino)) { + !xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) { xfs_buf_mark_corrupt(agibp); - return -EFSCORRUPTED; + error = -EFSCORRUPTED; + goto out; } if (next_agino != NULLAGINO) { @@ -2174,23 +2200,26 @@ xfs_iunlink( * There is already another inode in the bucket, so point this * inode to the current head of the list. */ - error = xfs_iunlink_update_inode(tp, ip, agno, next_agino, + error = xfs_iunlink_update_inode(tp, ip, pag, next_agino, &old_agino); if (error) - return error; + goto out; ASSERT(old_agino == NULLAGINO); /* * agino has been unlinked, add a backref from the next inode * back to agino. */ - error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino); + error = xfs_iunlink_add_backref(pag, agino, next_agino); if (error) - return error; + goto out; } /* Point the head of the list to point to this inode. */ - return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); + error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); +out: + xfs_perag_put(pag); + return error; } /* Return the imap, dinode pointer, and buffer for an inode. */ @@ -2238,14 +2267,13 @@ xfs_iunlink_map_ino( STATIC int xfs_iunlink_map_prev( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agino_t head_agino, xfs_agino_t target_agino, xfs_agino_t *agino, struct xfs_imap *imap, struct xfs_dinode **dipp, - struct xfs_buf **bpp, - struct xfs_perag *pag) + struct xfs_buf **bpp) { struct xfs_mount *mp = tp->t_mountp; xfs_agino_t next_agino; @@ -2257,7 +2285,8 @@ xfs_iunlink_map_prev( /* See if our backref cache can find it faster. */ *agino = xfs_iunlink_lookup_backref(pag, target_agino); if (*agino != NULLAGINO) { - error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp); + error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap, + dipp, bpp); if (error) return error; @@ -2273,7 +2302,7 @@ xfs_iunlink_map_prev( WARN_ON_ONCE(1); } - trace_xfs_iunlink_map_prev_fallback(mp, agno); + trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno); /* Otherwise, walk the entire bucket until we find it. */ next_agino = head_agino; @@ -2284,8 +2313,8 @@ xfs_iunlink_map_prev( xfs_trans_brelse(tp, *bpp); *agino = next_agino; - error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp, - bpp); + error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap, + dipp, bpp); if (error) return error; @@ -2294,7 +2323,7 @@ xfs_iunlink_map_prev( * Make sure this pointer is valid and isn't an obvious * infinite loop. */ - if (!xfs_verify_agino(mp, agno, unlinked_agino) || + if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) || next_agino == unlinked_agino) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -2314,6 +2343,7 @@ xfs_iunlink_map_prev( STATIC int xfs_iunlink_remove( struct xfs_trans *tp, + struct xfs_perag *pag, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; @@ -2321,7 +2351,6 @@ xfs_iunlink_remove( struct xfs_buf *agibp; struct xfs_buf *last_ibp; struct xfs_dinode *last_dip = NULL; - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino; xfs_agino_t head_agino; @@ -2331,7 +2360,7 @@ xfs_iunlink_remove( trace_xfs_iunlink_remove(ip); /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(mp, tp, agno, &agibp); + error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp); if (error) return error; agi = agibp->b_addr; @@ -2341,7 +2370,7 @@ xfs_iunlink_remove( * go on. Make sure the head pointer isn't garbage. */ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (!xfs_verify_agino(mp, agno, head_agino)) { + if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi, sizeof(*agi)); return -EFSCORRUPTED; @@ -2352,7 +2381,7 @@ xfs_iunlink_remove( * the old pointer value so that we can update whatever was previous * to us in the list to point to whatever was next in the list. */ - error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino); + error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino); if (error) return error; @@ -2364,8 +2393,7 @@ xfs_iunlink_remove( * this inode's backref to point from the next inode. */ if (next_agino != NULLAGINO) { - error = xfs_iunlink_change_backref(agibp->b_pag, next_agino, - NULLAGINO); + error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO); if (error) return error; } @@ -2375,14 +2403,13 @@ xfs_iunlink_remove( xfs_agino_t prev_agino; /* We need to search the list for the inode being freed. */ - error = xfs_iunlink_map_prev(tp, agno, head_agino, agino, - &prev_agino, &imap, &last_dip, &last_ibp, - agibp->b_pag); + error = xfs_iunlink_map_prev(tp, pag, head_agino, agino, + &prev_agino, &imap, &last_dip, &last_ibp); if (error) return error; /* Point the previous inode on the list to the next inode. */ - xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp, + xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp, last_dip, &imap, next_agino); /* @@ -2398,7 +2425,7 @@ xfs_iunlink_remove( } /* Point the head of the list to the next unlinked inode. */ - return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, + return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, next_agino); } @@ -2409,12 +2436,11 @@ xfs_iunlink_remove( */ static void xfs_ifree_mark_inode_stale( - struct xfs_buf *bp, + struct xfs_perag *pag, struct xfs_inode *free_ip, xfs_ino_t inum) { - struct xfs_mount *mp = bp->b_mount; - struct xfs_perag *pag = bp->b_pag; + struct xfs_mount *mp = pag->pag_mount; struct xfs_inode_log_item *iip; struct xfs_inode *ip; @@ -2504,10 +2530,11 @@ out_iflags_unlock: * inodes that are in memory - they all must be marked stale and attached to * the cluster buffer. */ -STATIC int +static int xfs_ifree_cluster( - struct xfs_inode *free_ip, struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_inode *free_ip, struct xfs_icluster *xic) { struct xfs_mount *mp = free_ip->i_mount; @@ -2569,7 +2596,7 @@ xfs_ifree_cluster( * already marked XFS_ISTALE. */ for (i = 0; i < igeo->inodes_per_cluster; i++) - xfs_ifree_mark_inode_stale(bp, free_ip, inum + i); + xfs_ifree_mark_inode_stale(pag, free_ip, inum + i); xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); @@ -2592,9 +2619,11 @@ xfs_ifree( struct xfs_trans *tp, struct xfs_inode *ip) { - int error; + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; struct xfs_icluster xic = { 0 }; struct xfs_inode_log_item *iip = ip->i_itemp; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(VFS_I(ip)->i_nlink == 0); @@ -2602,16 +2631,18 @@ xfs_ifree( ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); ASSERT(ip->i_nblocks == 0); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + /* * Pull the on-disk inode from the AGI unlinked list. */ - error = xfs_iunlink_remove(tp, ip); + error = xfs_iunlink_remove(tp, pag, ip); if (error) - return error; + goto out; - error = xfs_difree(tp, ip->i_ino, &xic); + error = xfs_difree(tp, pag, ip->i_ino, &xic); if (error) - return error; + goto out; /* * Free any local-format data sitting around before we reset the @@ -2626,7 +2657,7 @@ xfs_ifree( VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_diflags = 0; - ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2; + ip->i_diflags2 = mp->m_ino_geo.new_diflags2; ip->i_forkoff = 0; /* mark the attr fork not in use */ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS)) @@ -2645,8 +2676,9 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (xic.deleted) - error = xfs_ifree_cluster(ip, tp, &xic); - + error = xfs_ifree_cluster(tp, pag, ip, &xic); +out: + xfs_perag_put(pag); return error; } @@ -2664,7 +2696,7 @@ xfs_iunpin( trace_xfs_inode_unpin_nowait(ip, _RET_IP_); /* Give the log a push to start the unpinning I/O */ - xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); + xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); } @@ -2734,7 +2766,7 @@ xfs_remove( trace_xfs_remove(dp, name); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(dp); @@ -2794,6 +2826,19 @@ xfs_remove( error = xfs_droplink(tp, ip); if (error) goto out_trans_cancel; + + /* + * Point the unlinked child directory's ".." entry to the root + * directory to eliminate back-references to inodes that may + * get freed before the child directory is closed. If the fs + * gets shrunk, this can lead to dirent inode validation errors. + */ + if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, + tp->t_mountp->m_sb.sb_rootino, 0); + if (error) + return error; + } } else { /* * When removing a non-directory we need to log the parent @@ -2820,7 +2865,7 @@ xfs_remove( * remove transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -2897,7 +2942,7 @@ xfs_finish_rename( * If this is a synchronous mount, make sure that the rename transaction * goes to disk before returning to the user. */ - if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); @@ -3250,8 +3295,13 @@ xfs_rename( * in future. */ if (wip) { + struct xfs_perag *pag; + ASSERT(VFS_I(wip)->i_nlink == 0); - error = xfs_iunlink_remove(tp, wip); + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino)); + error = xfs_iunlink_remove(tp, pag, wip); + xfs_perag_put(pag); if (error) goto out_trans_cancel; @@ -3475,7 +3525,7 @@ xfs_iflush( * happen but we need to still do it to ensure backwards compatibility * with old kernels that predate logging all inode changes. */ - if (!xfs_sb_version_has_v3inode(&mp->m_sb)) + if (!xfs_has_v3inodes(mp)) ip->i_flushiter++; /* @@ -3497,7 +3547,7 @@ xfs_iflush( xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn); /* Wrap, we never let the log put out DI_MAX_FLUSH */ - if (!xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (!xfs_has_v3inodes(mp)) { if (ip->i_flushiter == DI_MAX_FLUSH) ip->i_flushiter = 0; } @@ -3616,7 +3666,7 @@ xfs_iflush_cluster( * AIL, leaving a dirty/unpinned inode attached to the buffer * that otherwise looks like it should be flushed. */ - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -3673,16 +3723,16 @@ int xfs_log_force_inode( struct xfs_inode *ip) { - xfs_lsn_t lsn = 0; + xfs_csn_t seq = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) - lsn = ip->i_itemp->ili_last_lsn; + seq = ip->i_itemp->ili_commit_seq; xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (!lsn) + if (!seq) return 0; - return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); } /* @@ -3754,11 +3804,8 @@ xfs_ilock2_io_mmap( ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); if (ret) return ret; - if (ip1 == ip2) - xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - else - xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL, - ip2, XFS_MMAPLOCK_EXCL); + filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); return 0; } @@ -3768,12 +3815,9 @@ xfs_iunlock2_io_mmap( struct xfs_inode *ip1, struct xfs_inode *ip2) { - bool same_inode = (ip1 == ip2); - - xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); inode_unlock(VFS_I(ip2)); - if (!same_inode) + if (ip1 != ip2) inode_unlock(VFS_I(ip1)); } |