aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_bmap_util.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 17:49:53 -0600
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 17:49:53 -0600
commitdc1cc85133120e49c223f36aa77d398b8abac727 (patch)
tree3c9a823d83144867d13a6ba7b1b0e09fd3ccace8 /fs/xfs/xfs_bmap_util.c
parentMerge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs (diff)
parentMerge branch 'xfs-misc-fixes-3.17-2' into for-next (diff)
downloadlinux-dev-dc1cc85133120e49c223f36aa77d398b8abac727.tar.xz
linux-dev-dc1cc85133120e49c223f36aa77d398b8abac727.zip
Merge tag 'xfs-for-linus-3.17-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Dave Chinner: "This update contains: - conversion of the XFS core to pass negative error numbers - restructing of core XFS code that is shared with userspace to fs/xfs/libxfs - introduction of sysfs interface for XFS - bulkstat refactoring - demand driven speculative preallocation removal - XFS now always requires 64 bit sectors to be configured - metadata verifier changes to ensure CRCs are calculated during log recovery - various minor code cleanups - miscellaneous bug fixes The diffstat is kind of noisy because of the restructuring of the code to make kernel/userspace code sharing simpler, along with the XFS wide change to use the standard negative error return convention (at last!)" * tag 'xfs-for-linus-3.17-rc1' of git://oss.sgi.com/xfs/xfs: (45 commits) xfs: fix coccinelle warnings xfs: flush both inodes in xfs_swap_extents xfs: fix swapext ilock deadlock xfs: kill xfs_vnode.h xfs: kill VN_MAPPED xfs: kill VN_CACHED xfs: kill VN_DIRTY() xfs: dquot recovery needs verifiers xfs: quotacheck leaves dquot buffers without verifiers xfs: ensure verifiers are attached to recovered buffers xfs: catch buffers written without verifiers attached xfs: avoid false quotacheck after unclean shutdown xfs: fix rounding error of fiemap length parameter xfs: introduce xfs_bulkstat_ag_ichunk xfs: require 64-bit sector_t xfs: fix uflags detection at xfs_fs_rm_xquota xfs: remove XFS_IS_OQUOTA_ON macros xfs: tidy up xfs_set_inode32 xfs: allow inode allocations in post-growfs disk space xfs: mark xfs_qm_quotacheck as static ...
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r--fs/xfs/xfs_bmap_util.c174
1 files changed, 85 insertions, 89 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 64731ef3324d..2f1e30d39a35 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -133,7 +133,7 @@ xfs_bmap_finish(
mp = ntp->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp,
- (error == EFSCORRUPTED) ?
+ (error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
return error;
@@ -365,7 +365,7 @@ xfs_bmap_count_tree(
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
xfs_trans_brelse(tp, bp);
} else {
@@ -425,14 +425,14 @@ xfs_bmap_count_blocks(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
@@ -524,13 +524,13 @@ xfs_getbmap(
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
prealloced = 0;
@@ -539,7 +539,7 @@ xfs_getbmap(
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
@@ -559,26 +559,26 @@ xfs_getbmap(
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (whichfork == XFS_DATA_FORK) {
if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
- error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock_iolock;
@@ -611,7 +611,7 @@ xfs_getbmap(
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
- error = ENOMEM;
+ error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
@@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
* have speculative prealloc/delalloc blocks to remove.
*/
if (VFS_I(ip)->i_size == 0 &&
- VN_CACHED(VFS_I(ip)) == 0 &&
+ VFS_I(ip)->i_mapping->nrpages == 0 &&
ip->i_delayed_blks == 0)
return false;
@@ -882,7 +882,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
- return EAGAIN;
+ return -EAGAIN;
}
}
@@ -955,14 +955,14 @@ xfs_alloc_file_space(
trace_xfs_alloc_file_space(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
if (len <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
@@ -1028,7 +1028,7 @@ xfs_alloc_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1065,7 +1065,7 @@ xfs_alloc_file_space(
allocated_fsb = imapp->br_blockcount;
if (nimaps == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
break;
}
@@ -1126,7 +1126,7 @@ xfs_zero_remaining_bytes(
mp->m_rtdev_targp : mp->m_ddev_targp,
BTOBB(mp->m_sb.sb_blocksize), 0);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_buf_unlock(bp);
@@ -1158,7 +1158,7 @@ xfs_zero_remaining_bytes(
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
xfs_buf_iorequest(bp);
@@ -1176,7 +1176,7 @@ xfs_zero_remaining_bytes(
XFS_BUF_WRITE(bp);
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
xfs_buf_iorequest(bp);
@@ -1234,7 +1234,7 @@ xfs_free_file_space(
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
- error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);
if (error)
goto out;
@@ -1315,7 +1315,7 @@ xfs_free_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1557,14 +1557,14 @@ xfs_swap_extents_check_format(
/* Should never get a local format */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode has less extents that then temporary inode then
* why did userspace call us?
*/
if (ip->i_d.di_nextents < tip->i_d.di_nextents)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode is in extent form and the temp inode is in btree
@@ -1573,19 +1573,19 @@ xfs_swap_extents_check_format(
*/
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
- return EINVAL;
+ return -EINVAL;
/* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/*
* If we are in a btree format, check that the temp root block will fit
@@ -1599,26 +1599,50 @@ xfs_swap_extents_check_format(
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(ip) &&
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(tip) &&
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
return 0;
}
int
+xfs_swap_extent_flush(
+ struct xfs_inode *ip)
+{
+ int error;
+
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ truncate_pagecache_range(VFS_I(ip), 0, -1);
+
+ /* Verify O_DIRECT for ftmp */
+ if (VFS_I(ip)->i_mapping->nrpages)
+ return -EINVAL;
+
+ /*
+ * Don't try to swap extents on mmap()d files because we can't lock
+ * out races against page faults safely.
+ */
+ if (mapping_mapped(VFS_I(ip)->i_mapping))
+ return -EBUSY;
+ return 0;
+}
+
+int
xfs_swap_extents(
xfs_inode_t *ip, /* target inode */
xfs_inode_t *tip, /* tmp inode */
@@ -1633,51 +1657,57 @@ xfs_swap_extents(
int aforkblks = 0;
int taforkblks = 0;
__uint64_t tmp;
+ int lock_flags;
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
- error = XFS_ERROR(ENOMEM);
+ error = -ENOMEM;
goto out;
}
/*
- * we have to do two separate lock calls here to keep lockdep
- * happy. If we try to get all the locks in one call, lock will
- * report false positives when we drop the ILOCK and regain them
- * below.
+ * Lock up the inodes against other IO and truncate to begin with.
+ * Then we can ensure the inodes are flushed and have no page cache
+ * safely. Once we have done this we can take the ilocks and do the rest
+ * of the checks.
*/
+ lock_flags = XFS_IOLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
/* Verify both files are either real-time or non-realtime */
if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
- error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
+ error = xfs_swap_extent_flush(ip);
+ if (error)
+ goto out_unlock;
+ error = xfs_swap_extent_flush(tip);
if (error)
goto out_unlock;
- truncate_pagecache_range(VFS_I(tip), 0, -1);
- /* Verify O_DIRECT for ftmp */
- if (VN_CACHED(VFS_I(tip)) != 0) {
- error = XFS_ERROR(EINVAL);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_unlock;
}
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+ lock_flags |= XFS_ILOCK_EXCL;
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
sxp->sx_length != ip->i_d.di_size ||
sxp->sx_length != tip->i_d.di_size) {
- error = XFS_ERROR(EFAULT);
- goto out_unlock;
+ error = -EFAULT;
+ goto out_trans_cancel;
}
trace_xfs_swap_extent_before(ip, 0);
@@ -1689,7 +1719,7 @@ xfs_swap_extents(
xfs_notice(mp,
"%s: inode 0x%llx format is incompatible for exchanging.",
__func__, ip->i_ino);
- goto out_unlock;
+ goto out_trans_cancel;
}
/*
@@ -1703,43 +1733,9 @@ xfs_swap_extents(
(sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
- }
-
- /* We need to fail if the file is memory mapped. Once we have tossed
- * all existing pages, the page fault will have no option
- * but to go to the filesystem for pages. By making the page fault call
- * vop_read (or write in the case of autogrow) they block on the iolock
- * until we have switched the extents.
- */
- if (VN_MAPPED(VFS_I(ip))) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL);
-
- /*
- * There is a race condition here since we gave up the
- * ilock. However, the data fork will not change since
- * we have the iolock (locked for truncation too) so we
- * are safe. We don't really care if non-io related
- * fields change.
- */
- truncate_pagecache_range(VFS_I(ip), 0, -1);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_IOLOCK_EXCL);
- xfs_trans_cancel(tp, 0);
- goto out;
+ error = -EBUSY;
+ goto out_trans_cancel;
}
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
/*
* Count the number of extended attribute blocks
*/
@@ -1757,8 +1753,8 @@ xfs_swap_extents(
goto out_trans_cancel;
}
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ijoin(tp, tip, lock_flags);
/*
* Before we've swapped the forks, lets set the owners of the forks
@@ -1887,8 +1883,8 @@ out:
return error;
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, lock_flags);
+ xfs_iunlock(tip, lock_flags);
goto out;
out_trans_cancel: