aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/kmem.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c35
-rw-r--r--fs/xfs/libxfs/xfs_attr.c141
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c32
-rw-r--r--fs/xfs/libxfs/xfs_bit.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c43
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c37
-rw-r--r--fs/xfs/libxfs/xfs_format.h13
-rw-r--r--fs/xfs/libxfs/xfs_fs.h38
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c27
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c16
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_acl.c23
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c43
-rw-r--r--fs/xfs/xfs_buf.c27
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_dquot.c13
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_file.c38
-rw-r--r--fs/xfs/xfs_inode.c85
-rw-r--r--fs/xfs/xfs_ioctl.c92
-rw-r--r--fs/xfs/xfs_iomap.c21
-rw-r--r--fs/xfs/xfs_iops.c18
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c581
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_super.c6
-rw-r--r--fs/xfs/xfs_symlink.c12
-rw-r--r--fs/xfs/xfs_sysfs.c36
-rw-r--r--fs/xfs/xfs_trace.h26
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_dquot.c14
-rw-r--r--fs/xfs/xfs_xattr.c143
54 files changed, 1048 insertions, 677 deletions
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index cc6b768fc068..d1c66e465ca5 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -84,6 +84,7 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN
#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT
#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+#define KM_ZONE_ACCOUNT SLAB_ACCOUNT
#define kmem_zone kmem_cache
#define kmem_zone_t struct kmem_cache
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3479294c1d58..a708e38b494c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
+ .name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
@@ -1926,7 +1927,7 @@ xfs_alloc_space_available(
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_fix_freelist(
struct xfs_alloc_arg *args, /* allocation argument structure */
int flags) /* XFS_ALLOC_FLAG_... */
@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
+ .name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0ecde4d5cac8..135eb3d24db7 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -235,5 +235,6 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 90de071dd4c2..444626ddbd1b 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -293,14 +293,7 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
@@ -311,14 +304,7 @@ xfs_allocbt_verify(
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
@@ -332,21 +318,7 @@ xfs_allocbt_verify(
return false;
}
- /* numrecs verification */
- if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
-
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
@@ -379,6 +351,7 @@ xfs_allocbt_write_verify(
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+ .name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index f949818fa1c7..fa3b948ef9c2 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -207,7 +207,7 @@ xfs_attr_set(
struct xfs_trans_res tres;
xfs_fsblock_t firstblock;
int rsvd = (flags & ATTR_ROOT) != 0;
- int error, err2, committed, local;
+ int error, err2, local;
XFS_STATS_INC(mp, xs_attr_set);
@@ -334,25 +334,15 @@ xfs_attr_set(
*/
xfs_bmap_init(args.flist, args.firstblock);
error = xfs_attr_shortform_to_leaf(&args);
- if (!error) {
- error = xfs_bmap_finish(&args.trans, args.flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args.trans, args.flist, dp);
if (error) {
- ASSERT(committed);
args.trans = NULL;
xfs_bmap_cancel(&flist);
goto out;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args.trans, dp, 0);
-
- /*
* Commit the leaf transformation. We'll need another (linked)
* transaction to add the new attribute to the leaf.
*/
@@ -568,7 +558,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
- int retval, error, committed, forkoff;
+ int retval, error, forkoff;
trace_xfs_attr_leaf_addname(args);
@@ -628,25 +618,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
- /*
* Commit the current trans (including the inode) and start
* a new one.
*/
@@ -729,25 +709,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -775,7 +744,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
- int error, committed, forkoff;
+ int error, forkoff;
trace_xfs_attr_leaf_removename(args);
@@ -803,23 +772,13 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
return 0;
}
@@ -877,7 +836,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
xfs_mount_t *mp;
- int committed, retval, error;
+ int retval, error;
trace_xfs_attr_node_addname(args);
@@ -938,27 +897,16 @@ restart:
state = NULL;
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
- /*
* Commit the node conversion and start the next
* trans in the chain.
*/
@@ -977,23 +925,13 @@ restart:
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_split(state);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1086,25 +1024,14 @@ restart:
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -1146,7 +1073,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
struct xfs_buf *bp;
- int retval, error, committed, forkoff;
+ int retval, error, forkoff;
trace_xfs_attr_node_removename(args);
@@ -1220,24 +1147,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
/*
* Commit the Btree join operation and start a new trans.
*/
@@ -1265,25 +1181,14 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans,
- args->flist,
- &committed);
- }
+ args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
-
- /*
- * bmap_finish() may have committed the last trans
- * and started a new one. We need the inode to be
- * in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_trans_brelse(args->trans, bp);
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index aa187f7ba2dd..01a5ecfedfcf 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
+ .name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 5ab95ffa4ae9..a572532a55cd 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
+ .name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
@@ -447,8 +448,6 @@ xfs_attr_rmtval_set(
* Roll through the "value", allocating blocks on disk as required.
*/
while (blkcnt > 0) {
- int committed;
-
/*
* Allocate a single extent, up to the size of the value.
*
@@ -466,24 +465,14 @@ xfs_attr_rmtval_set(
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
args->total, &map, &nmap, args->flist);
- if (!error) {
- error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ if (!error)
+ error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
- /*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, dp, 0);
-
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -614,31 +603,20 @@ xfs_attr_rmtval_remove(
blkcnt = args->rmtblkcnt;
done = 0;
while (!done) {
- int committed;
-
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK, 1, args->firstblock,
args->flist, &done);
- if (!error) {
+ if (!error)
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
- }
+ args->dp);
if (error) {
- ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
- * bmap_finish() may have committed the last trans and started
- * a new one. We need the inode to be in all transactions.
- */
- if (committed)
- xfs_trans_ijoin(args->trans, args->dp, 0);
-
- /*
* Close out trans and start the next one in the chain.
*/
error = xfs_trans_roll(&args->trans, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 0e8885a59646..0a94cce5ea35 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -32,13 +32,13 @@ int
xfs_bitmap_empty(uint *map, uint size)
{
uint i;
- uint ret = 0;
for (i = 0; i < size; i++) {
- ret |= map[i];
+ if (map[i] != 0)
+ return 0;
}
- return (ret == 0);
+ return 1;
}
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422aac7..ef00156f4f96 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -325,9 +325,11 @@ xfs_check_block(
/*
* Check that the extents for the inode ip are in the right order in all
- * btree leaves.
+ * btree leaves. THis becomes prohibitively expensive for large extent count
+ * files, so don't bother with inodes that have more than 10,000 extents in
+ * them. The btree record ordering checks will still be done, so for such large
+ * bmapbt constructs that is going to catch most corruptions.
*/
-
STATIC void
xfs_bmap_check_leaf_extents(
xfs_btree_cur_t *cur, /* btree cursor or null */
@@ -352,6 +354,10 @@ xfs_bmap_check_leaf_extents(
return;
}
+ /* skip large extent count inodes */
+ if (ip->i_d.di_nextents > 10000)
+ return;
+
bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -1111,7 +1117,6 @@ xfs_bmap_add_attrfork(
xfs_trans_t *tp; /* transaction pointer */
int blks; /* space reservation */
int version = 1; /* superblock attr version */
- int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
@@ -1214,7 +1219,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
- error = xfs_bmap_finish(&tp, &flist, &committed);
+ error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto bmap_cancel;
error = xfs_trans_commit(tp);
@@ -1723,10 +1728,11 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp=0; /* value for da_new calculations */
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
+ int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
- ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+ mp = bma->ip->i_mount;
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1785,7 +1791,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@@ -2016,10 +2022,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
- &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
+ &bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2100,10 +2106,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
- &tmp_rval, XFS_DATA_FORK);
+ &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2169,10 +2175,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- 1, &tmp_rval, XFS_DATA_FORK);
+ 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2215,13 +2221,13 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+ da_old > 0, &tmp_logflags, whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
@@ -2242,7 +2248,7 @@ xfs_bmap_add_extent_delay_real(
if (bma->cur)
bma->cur->bc_private.b.allocated = 0;
- xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
bma->logflags |= rval;
return error;
@@ -2939,7 +2945,7 @@ xfs_bmap_add_extent_hole_real(
int state; /* state bits, accessed thru macros */
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
+ mp = bma->ip->i_mount;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
@@ -5950,7 +5956,6 @@ xfs_bmap_split_extent(
struct xfs_trans *tp;
struct xfs_bmap_free free_list;
xfs_fsblock_t firstfsb;
- int committed;
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
@@ -5971,7 +5976,7 @@ xfs_bmap_split_extent(
if (error)
goto out;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index a160f8a5a3fc..423a34e832bd 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -195,7 +195,7 @@ void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
- int *committed);
+ struct xfs_inode *ip);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6b0cf6546a82..1637c37bfbaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+ .name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee5586e..a0eb18ce3ad3 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4080,3 +4080,61 @@ xfs_btree_change_owner(
return 0;
}
+
+/**
+ * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
+ * @pag_max_level: pointer to the per-ag max level field
+ */
+bool
+xfs_btree_sblock_v5hdr_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ return true;
+}
+
+/**
+ * xfs_btree_sblock_verify() -- verify a short-format btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: maximum records allowed in this btree node
+ */
+bool
+xfs_btree_sblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 992dec0638f3..2e874be70209 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -472,4 +472,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
+bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e89a0f8f827c..097bf7717d80 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
+ .name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9c10e2b8cfcb..aa17cb788946 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
+ .name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index af71a84f343c..725fc7841fde 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
+ .name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
+ .name = "xfs_dir3_data_reada",
.verify_read = xfs_dir3_data_reada_verify,
.verify_write = xfs_dir3_data_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 3923e1f94697..b887fb2a2bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
}
const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
+ .name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
+ .name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 70b0cb2fd556..63ee03db796c 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+ .name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5331b7f0460c..3cc3cf767474 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -54,7 +54,7 @@ xfs_dqcheck(
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
- char *str)
+ const char *str)
{
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
STATIC bool
xfs_dquot_buf_verify(
struct xfs_mount *mp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ int warn)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
if (i == 0)
id = be32_to_cpu(ddq->d_id);
- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
- "xfs_dquot_buf_verify");
+ error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
if (error)
return false;
}
@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(
if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dquot_buf_verify(mp, bp))
+ else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(
}
/*
+ * readahead errors are silent and simply leave the buffer as !done so a real
+ * read will then be run with the xfs_dquot_buf_ops verifier. See
+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
+ * reporting the failure.
+ */
+static void
+xfs_dquot_buf_readahead_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (!xfs_dquot_buf_verify_crc(mp, bp) ||
+ !xfs_dquot_buf_verify(mp, bp, 0)) {
+ xfs_buf_ioerror(bp, -EIO);
+ bp->b_flags &= ~XBF_DONE;
+ }
+}
+
+/*
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
* buffer always has an up-to-date CRC value.
@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if (!xfs_dquot_buf_verify(mp, bp)) {
+ if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
@@ -282,7 +301,13 @@ xfs_dquot_buf_write_verify(
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
+ .name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
+ .name = "xfs_dquot_ra",
+ .verify_read = xfs_dquot_buf_readahead_verify,
+ .verify_write = xfs_dquot_buf_write_verify,
+};
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8774498ce0ff..dc97eb21af07 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -786,7 +786,7 @@ typedef struct xfs_agfl {
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
-} xfs_agfl_t;
+} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
/*
* Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
*/
#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */
#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */
@@ -1026,6 +1024,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX)
+
+/*
* Inode number format:
* low inopblog bits - offset in block
* next agblklog bits - block number in ag
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b2b73a998d42..fffe3d01bd9f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -36,40 +36,6 @@ struct dioattr {
#endif
/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-
-/*
* Structure for XFS_IOC_GETBMAP.
* On input, fill in bmv_offset and bmv_length of the first structure
* to indicate the area of interest in the file, and bmv_entries with
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 70c1db99f6a7..66d702e6b9ff 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
+ .name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index f39b285beb19..c679f3c05b63 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -221,7 +221,6 @@ xfs_inobt_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
@@ -237,14 +236,7 @@ xfs_inobt_verify(
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
@@ -254,24 +246,12 @@ xfs_inobt_verify(
return 0;
}
- /* numrecs and level verification */
+ /* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
- if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
static void
@@ -304,6 +284,7 @@ xfs_inobt_write_verify(
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
+ .name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 268c00f4f83a..1aabfda669b0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -62,11 +62,14 @@ xfs_inobp_check(
* has not had the inode cores stamped into it. Hence for readahead, the buffer
* may be potentially invalid.
*
- * If the readahead buffer is invalid, we don't want to mark it with an error,
- * but we do want to clear the DONE status of the buffer so that a followup read
- * will re-read it from disk. This will ensure that we don't get an unnecessary
- * warnings during log recovery and we don't get unnecssary panics on debug
- * kernels.
+ * If the readahead buffer is invalid, we need to mark it with an error and
+ * clear the DONE status of the buffer so that a followup read will re-read it
+ * from disk. We don't report the error otherwise to avoid warnings during log
+ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
+ * because all we want to do is say readahead failed; there is no-one to report
+ * the error to, so this will distinguish it from a non-ra verifier failure.
+ * Changes to this readahead error behavour also need to be reflected in
+ * xfs_dquot_buf_readahead_verify().
*/
static void
xfs_inode_buf_verify(
@@ -93,6 +96,7 @@ xfs_inode_buf_verify(
XFS_RANDOM_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
+ xfs_buf_ioerror(bp, -EIO);
return;
}
@@ -132,11 +136,13 @@ xfs_inode_buf_write_verify(
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
+ .name = "xfs_inode",
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+ .name = "xxfs_inode_ra",
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379d..8e385f91d660 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -60,6 +60,7 @@ typedef struct xlog_recover {
*/
#define XLOG_BC_TABLE_SIZE 64
+#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 1b0a08379759..f51078f1e92a 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, char *str);
+ xfs_dqid_t id, uint type, uint flags, const char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a0b071d881a0..8a53eaa349f4 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -679,11 +679,13 @@ xfs_sb_write_verify(
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
+ .name = "xfs_sb",
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+ .name = "xfs_sb_quiet",
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 5be529707903..15c3ceb845b9 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index cb6fd20a4d3d..2e2c6716b623 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
+ .name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6bb470fbb8e8..2d5df1f23bbc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -252,29 +252,6 @@ xfs_set_mode(struct inode *inode, umode_t mode)
return error;
}
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
- int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
-
- return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
- ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
- return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode))
- return 0;
- return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 52f8255d6bdf..286fa89217f5 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,16 +24,12 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int posix_acl_access_exists(struct inode *inode);
-extern int posix_acl_default_exists(struct inode *inode);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
return NULL;
}
# define xfs_set_acl NULL
-# define posix_acl_access_exists(inode) 0
-# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29e7e5dd5178..379c089fb051 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1917,6 +1917,7 @@ xfs_vm_readpage(
struct file *unused,
struct page *page)
{
+ trace_xfs_vm_readpage(page->mapping->host, 1);
return mpage_readpage(page, xfs_get_blocks);
}
@@ -1927,6 +1928,7 @@ xfs_vm_readpages(
struct list_head *pages,
unsigned nr_pages)
{
+ trace_xfs_vm_readpages(mapping->host, nr_pages);
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dbae6490a79a..45ec9e40150c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -91,32 +91,32 @@ xfs_zero_extent(
* last due to locking considerations. We never free any extents in
* the first transaction.
*
- * Return 1 if the given transaction was committed and a new one
- * started, and 0 otherwise in the committed parameter.
+ * If an inode *ip is provided, rejoin it to the transaction if
+ * the transaction was committed.
*/
int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
struct xfs_bmap_free *flist, /* i/o: list extents to free */
- int *committed)/* xact committed or not */
+ struct xfs_inode *ip)
{
struct xfs_efd_log_item *efd; /* extent free data */
struct xfs_efi_log_item *efi; /* extent free intention */
int error; /* error return value */
+ int committed;/* xact committed or not */
struct xfs_bmap_free_item *free; /* free extent item */
struct xfs_bmap_free_item *next; /* next item on free list */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- if (flist->xbf_count == 0) {
- *committed = 0;
+ if (flist->xbf_count == 0)
return 0;
- }
+
efi = xfs_trans_get_efi(*tp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next)
xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount);
- error = __xfs_trans_roll(tp, NULL, committed);
+ error = __xfs_trans_roll(tp, ip, &committed);
if (error) {
/*
* If the transaction was committed, drop the EFD reference
@@ -128,16 +128,13 @@ xfs_bmap_finish(
* transaction so we should return committed=1 even though we're
* returning an error.
*/
- if (*committed) {
+ if (committed) {
xfs_efi_release(efi);
xfs_force_shutdown((*tp)->t_mountp,
(error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
- } else {
- *committed = 1;
}
-
return error;
}
@@ -969,7 +966,6 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
- int committed;
int error;
trace_xfs_alloc_file_space(ip);
@@ -1064,23 +1060,20 @@ xfs_alloc_file_space(
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
allocatesize_fsb, alloc_type, &firstfsb,
resblks, imapp, &nimaps, &free_list);
- if (error) {
+ if (error)
goto error0;
- }
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
+ if (error)
goto error0;
- }
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error) {
+ if (error)
break;
- }
allocated_fsb = imapp->br_blockcount;
@@ -1206,7 +1199,6 @@ xfs_free_file_space(
xfs_off_t offset,
xfs_off_t len)
{
- int committed;
int done;
xfs_fileoff_t endoffset_fsb;
int error;
@@ -1346,17 +1338,15 @@ xfs_free_file_space(
error = xfs_bunmapi(tp, ip, startoffset_fsb,
endoffset_fsb - startoffset_fsb,
0, 2, &firstfsb, &free_list, &done);
- if (error) {
+ if (error)
goto error0;
- }
/*
* complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
+ if (error)
goto error0;
- }
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1434,7 +1424,6 @@ xfs_shift_file_space(
int error;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
- int committed;
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
@@ -1526,7 +1515,7 @@ xfs_shift_file_space(
if (error)
goto out_bmap_cancel;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 3243cdf97f33..435c7de42e5f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -604,6 +604,13 @@ found:
}
}
+ /*
+ * Clear b_error if this is a lookup from a caller that doesn't expect
+ * valid data to be found in the buffer.
+ */
+ if (!(flags & XBF_READ))
+ xfs_buf_ioerror(bp, 0);
+
XFS_STATS_INC(target->bt_mount, xb_get);
trace_xfs_buf_get(bp, flags, _RET_IP_);
return bp;
@@ -1045,7 +1052,7 @@ xfs_buf_ioend_work(
xfs_buf_ioend(bp);
}
-void
+static void
xfs_buf_ioend_async(
struct xfs_buf *bp)
{
@@ -1520,6 +1527,16 @@ xfs_wait_buftarg(
LIST_HEAD(dispose);
int loop = 0;
+ /*
+ * We need to flush the buffer workqueue to ensure that all IO
+ * completion processing is 100% done. Just waiting on buffer locks is
+ * not sufficient for async IO as the reference count held over IO is
+ * not released until after the buffer lock is dropped. Hence we need to
+ * ensure here that all reference counts have been dropped before we
+ * start walking the LRU list.
+ */
+ drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
/* loop until there is nothing left on the lru list. */
while (list_lru_count(&btp->bt_lru)) {
list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
@@ -1632,13 +1649,9 @@ xfs_setsize_buftarg(
btp->bt_meta_sectormask = sectorsize - 1;
if (set_blocksize(btp->bt_bdev, sectorsize)) {
- char name[BDEVNAME_SIZE];
-
- bdevname(btp->bt_bdev, name);
-
xfs_warn(btp->bt_mount,
- "Cannot set_blocksize to %u on device %s",
- sectorsize, name);
+ "Cannot set_blocksize to %u on device %pg",
+ sectorsize, btp->bt_bdev);
return -EINVAL;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c79b717d9b88..c75721acd867 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -132,6 +132,7 @@ struct xfs_buf_map {
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
+ char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7ac6c5c586cb..9c44d38dcd1f 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -306,7 +306,7 @@ xfs_qm_dqalloc(
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
xfs_bmbt_irec_t map;
- int nmaps, error, committed;
+ int nmaps, error;
xfs_buf_t *bp;
xfs_trans_t *tp = *tpp;
@@ -379,11 +379,12 @@ xfs_qm_dqalloc(
xfs_trans_bhold(tp, bp);
- if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+ error = xfs_bmap_finish(tpp, &flist, NULL);
+ if (error)
goto error1;
- }
- if (committed) {
+ /* Transaction was committed? */
+ if (*tpp != tp) {
tp = *tpp;
xfs_trans_bjoin(tp, bp);
} else {
@@ -393,9 +394,9 @@ xfs_qm_dqalloc(
*O_bpp = bp;
return 0;
- error1:
+error1:
xfs_bmap_cancel(&flist);
- error0:
+error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return error;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 74d0e5966ebc..88693a98fac5 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -164,9 +164,9 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- __return_address, bp->b_bn);
+ __return_address, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f5392ab2def1..52883ac3cf84 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -55,7 +55,7 @@ xfs_rw_ilock(
int type)
{
if (type & XFS_IOLOCK_EXCL)
- mutex_lock(&VFS_I(ip)->i_mutex);
+ inode_lock(VFS_I(ip));
xfs_ilock(ip, type);
}
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
/*
@@ -402,19 +402,26 @@ xfs_file_splice_read(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
- /* for dax, we need to avoid the page cache */
- if (IS_DAX(VFS_I(ip)))
- ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
- else
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
+ /*
+ * DAX inodes cannot ues the page cache for splice, so we have to push
+ * them through the VFS IO path. This means it goes through
+ * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
+ * cannot lock the splice operation at this level for DAX inodes.
+ */
+ if (IS_DAX(VFS_I(ip))) {
+ ret = default_file_splice_read(infilp, ppos, pipe, count,
+ flags);
+ goto out;
+ }
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+out:
+ if (ret > 0)
+ XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
return ret;
}
@@ -1603,9 +1610,8 @@ xfs_filemap_pmd_fault(
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
*/
static int
xfs_filemap_pfn_mkwrite(
@@ -1628,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else if (IS_DAX(inode))
+ ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8ee393996b7d..ceba1a83cacc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -610,60 +610,69 @@ __xfs_iflock(
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags)
+ __uint16_t di_flags,
+ uint64_t di_flags2,
+ bool has_attr)
{
uint flags = 0;
if (di_flags & XFS_DIFLAG_ANY) {
if (di_flags & XFS_DIFLAG_REALTIME)
- flags |= XFS_XFLAG_REALTIME;
+ flags |= FS_XFLAG_REALTIME;
if (di_flags & XFS_DIFLAG_PREALLOC)
- flags |= XFS_XFLAG_PREALLOC;
+ flags |= FS_XFLAG_PREALLOC;
if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= XFS_XFLAG_IMMUTABLE;
+ flags |= FS_XFLAG_IMMUTABLE;
if (di_flags & XFS_DIFLAG_APPEND)
- flags |= XFS_XFLAG_APPEND;
+ flags |= FS_XFLAG_APPEND;
if (di_flags & XFS_DIFLAG_SYNC)
- flags |= XFS_XFLAG_SYNC;
+ flags |= FS_XFLAG_SYNC;
if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= XFS_XFLAG_NOATIME;
+ flags |= FS_XFLAG_NOATIME;
if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= XFS_XFLAG_NODUMP;
+ flags |= FS_XFLAG_NODUMP;
if (di_flags & XFS_DIFLAG_RTINHERIT)
- flags |= XFS_XFLAG_RTINHERIT;
+ flags |= FS_XFLAG_RTINHERIT;
if (di_flags & XFS_DIFLAG_PROJINHERIT)
- flags |= XFS_XFLAG_PROJINHERIT;
+ flags |= FS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
- flags |= XFS_XFLAG_NOSYMLINKS;
+ flags |= FS_XFLAG_NOSYMLINKS;
if (di_flags & XFS_DIFLAG_EXTSIZE)
- flags |= XFS_XFLAG_EXTSIZE;
+ flags |= FS_XFLAG_EXTSIZE;
if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= XFS_XFLAG_EXTSZINHERIT;
+ flags |= FS_XFLAG_EXTSZINHERIT;
if (di_flags & XFS_DIFLAG_NODEFRAG)
- flags |= XFS_XFLAG_NODEFRAG;
+ flags |= FS_XFLAG_NODEFRAG;
if (di_flags & XFS_DIFLAG_FILESTREAM)
- flags |= XFS_XFLAG_FILESTREAM;
+ flags |= FS_XFLAG_FILESTREAM;
}
+ if (di_flags2 & XFS_DIFLAG2_ANY) {
+ if (di_flags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ }
+
+ if (has_attr)
+ flags |= FS_XFLAG_HASATTR;
+
return flags;
}
uint
xfs_ip2xflags(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- xfs_icdinode_t *dic = &ip->i_d;
+ struct xfs_icdinode *dic = &ip->i_d;
- return _xfs_dic2xflags(dic->di_flags) |
- (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
}
uint
xfs_dic2xflags(
- xfs_dinode_t *dip)
+ struct xfs_dinode *dip)
{
- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
- (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+ be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
}
/*
@@ -862,7 +871,8 @@ xfs_ialloc(
case S_IFREG:
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
+ uint64_t di_flags2 = 0;
+ uint di_flags = 0;
if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
di_flags |= XFS_DIFLAG_NODEFRAG;
if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
ip->i_d.di_flags |= di_flags;
+ ip->i_d.di_flags2 |= di_flags2;
}
/* FALLTHROUGH */
case S_IFLNK:
@@ -1143,7 +1157,6 @@ xfs_create(
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- int committed;
prid_t prid;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
@@ -1226,7 +1239,7 @@ xfs_create(
* pointing to itself.
*/
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
- prid, resblks > 0, &ip, &committed);
+ prid, resblks > 0, &ip, NULL);
if (error)
goto out_trans_cancel;
@@ -1275,7 +1288,7 @@ xfs_create(
*/
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -1427,7 +1440,6 @@ xfs_link(
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
int resblks;
trace_xfs_link(tdp, target_name);
@@ -1502,11 +1514,10 @@ xfs_link(
* link transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- }
- error = xfs_bmap_finish (&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_bmap_cancel(&free_list);
goto error_return;
@@ -1555,7 +1566,6 @@ xfs_itruncate_extents(
xfs_fileoff_t first_unmap_block;
xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
- int committed;
int error = 0;
int done = 0;
@@ -1601,9 +1611,7 @@ xfs_itruncate_extents(
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (committed)
- xfs_trans_ijoin(tp, ip, 0);
+ error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto out_bmap_cancel;
@@ -1774,7 +1782,6 @@ xfs_inactive_ifree(
{
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error;
@@ -1841,7 +1848,7 @@ xfs_inactive_ifree(
* Just ignore errors at this point. There is nothing we can do except
* to try to keep going. Make sure it's not a silent error.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error);
@@ -2523,7 +2530,6 @@ xfs_remove(
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
- int committed;
uint resblks;
trace_xfs_remove(dp, name);
@@ -2624,7 +2630,7 @@ xfs_remove(
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -2701,7 +2707,6 @@ xfs_finish_rename(
struct xfs_trans *tp,
struct xfs_bmap_free *free_list)
{
- int committed = 0;
int error;
/*
@@ -2711,7 +2716,7 @@ xfs_finish_rename(
if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, free_list, &committed);
+ error = xfs_bmap_finish(&tp, free_list, NULL);
if (error) {
xfs_bmap_cancel(free_list);
xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42738deec6d..478d04e07f95 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
unsigned int xflags = start;
if (flags & FS_IMMUTABLE_FL)
- xflags |= XFS_XFLAG_IMMUTABLE;
+ xflags |= FS_XFLAG_IMMUTABLE;
else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
+ xflags &= ~FS_XFLAG_IMMUTABLE;
if (flags & FS_APPEND_FL)
- xflags |= XFS_XFLAG_APPEND;
+ xflags |= FS_XFLAG_APPEND;
else
- xflags &= ~XFS_XFLAG_APPEND;
+ xflags &= ~FS_XFLAG_APPEND;
if (flags & FS_SYNC_FL)
- xflags |= XFS_XFLAG_SYNC;
+ xflags |= FS_XFLAG_SYNC;
else
- xflags &= ~XFS_XFLAG_SYNC;
+ xflags &= ~FS_XFLAG_SYNC;
if (flags & FS_NOATIME_FL)
- xflags |= XFS_XFLAG_NOATIME;
+ xflags |= FS_XFLAG_NOATIME;
else
- xflags &= ~XFS_XFLAG_NOATIME;
+ xflags &= ~FS_XFLAG_NOATIME;
if (flags & FS_NODUMP_FL)
- xflags |= XFS_XFLAG_NODUMP;
+ xflags |= FS_XFLAG_NODUMP;
else
- xflags &= ~XFS_XFLAG_NODUMP;
+ xflags &= ~FS_XFLAG_NODUMP;
return xflags;
}
@@ -945,40 +945,51 @@ xfs_set_diflags(
unsigned int xflags)
{
unsigned int di_flags;
+ uint64_t di_flags2;
/* can't set PREALLOC this way, just preserve it */
di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & XFS_XFLAG_NODUMP)
+ if (xflags & FS_XFLAG_NODUMP)
di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & XFS_XFLAG_NODEFRAG)
+ if (xflags & FS_XFLAG_NODEFRAG)
di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & XFS_XFLAG_FILESTREAM)
+ if (xflags & FS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
if (S_ISDIR(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_RTINHERIT)
+ if (xflags & FS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & XFS_XFLAG_NOSYMLINKS)
+ if (xflags & FS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & XFS_XFLAG_PROJINHERIT)
+ if (xflags & FS_XFLAG_PROJINHERIT)
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_REALTIME)
+ if (xflags & FS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & XFS_XFLAG_EXTSIZE)
+ if (xflags & FS_XFLAG_EXTSIZE)
di_flags |= XFS_DIFLAG_EXTSIZE;
}
-
ip->i_d.di_flags = di_flags;
+
+ /* diflags2 only valid for v3 inodes. */
+ if (ip->i_d.di_version < 3)
+ return;
+
+ di_flags2 = 0;
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
+ ip->i_d.di_flags2 = di_flags2;
+
}
STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
struct inode *inode = VFS_I(ip);
unsigned int xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
inode->i_flags |= S_SYNC;
else
inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
else
inode->i_flags &= ~S_NOATIME;
+ if (xflags & FS_XFLAG_DAX)
+ inode->i_flags |= S_DAX;
+ else
+ inode->i_flags &= ~S_DAX;
+
}
static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
/* Can't change realtime flag if any extents are allocated. */
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+ XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
return -EINVAL;
/* If realtime flag is set then must have realtime device */
- if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+ if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
* we have appropriate permission.
*/
if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
@@ -1095,8 +1111,8 @@ out_cancel:
* extent size hint validation is somewhat cumbersome. Rules are:
*
* 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. can only be changed on regular files if no extents are allocated
* 5. can be changed on directories at any time
* 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
{
struct xfs_mount *mp = ip->i_mount;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
!S_ISDIR(ip->i_d.di_mode))
return -EINVAL;
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
return -EINVAL;
if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
} else {
size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
if (fa->fsx_extsize % size)
return -EINVAL;
} else
- fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
return 0;
}
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
if (xfs_get_projid(ip) != fa->fsx_projid)
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+ if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
return -EINVAL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f4f5b43cf647..d81bdc080370 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -129,7 +129,6 @@ xfs_iomap_write_direct(
xfs_trans_t *tp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
- int committed;
int error;
int lockmode;
int bmapi_flags = XFS_BMAPI_PREALLOC;
@@ -203,15 +202,20 @@ xfs_iomap_write_direct(
* this outside the transaction context, but if we commit and then crash
* we may not have zeroed the blocks and this will be exposed on
* recovery of the allocation. Hence we must zero before commit.
+ *
* Further, if we are mapping unwritten extents here, we need to zero
* and convert them to written so that we don't need an unwritten extent
* callback for DAX. This also means that we need to be able to dip into
- * the reserve block pool if there is no space left but we need to do
- * unwritten extent conversion.
+ * the reserve block pool for bmbt block allocation if there is no space
+ * left but we need to do unwritten extent conversion.
*/
+
if (IS_DAX(VFS_I(ip))) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
- tp->t_flags |= XFS_TRANS_RESERVE;
+ if (ISUNWRITTEN(imap)) {
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
+ }
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, resrtextents);
@@ -247,7 +251,7 @@ xfs_iomap_write_direct(
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -693,7 +697,7 @@ xfs_iomap_write_allocate(
xfs_bmap_free_t free_list;
xfs_filblks_t count_fsb;
xfs_trans_t *tp;
- int nimaps, committed;
+ int nimaps;
int error = 0;
int nres;
@@ -794,7 +798,7 @@ xfs_iomap_write_allocate(
if (error)
goto trans_cancel;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto trans_cancel;
@@ -852,7 +856,6 @@ xfs_iomap_write_unwritten(
xfs_bmap_free_t free_list;
xfs_fsize_t i_size;
uint resblks;
- int committed;
int error;
trace_xfs_unwritten_convert(ip, offset, count);
@@ -924,7 +927,7 @@ xfs_iomap_write_unwritten(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0cdf0..76b71a1c6c32 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -414,13 +414,17 @@ xfs_vn_rename(
* uio is kmalloced for this reason...
*/
STATIC const char *
-xfs_vn_follow_link(
+xfs_vn_get_link(
struct dentry *dentry,
- void **cookie)
+ struct inode *inode,
+ struct delayed_call *done)
{
char *link;
int error = -ENOMEM;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
@@ -429,7 +433,8 @@ xfs_vn_follow_link(
if (unlikely(error))
goto out_kfree;
- return *cookie = link;
+ set_delayed_call(done, kfree_link, link);
+ return link;
out_kfree:
kfree(link);
@@ -1172,8 +1177,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = xfs_vn_follow_link,
- .put_link = kfree_put_link,
+ .get_link = xfs_vn_get_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1201,8 +1205,8 @@ xfs_diflags_to_iflags(
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- /* XXX: Also needs an on-disk per inode flag! */
- if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+ ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
inode->i_flags |= S_DAX;
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f52c72a1a06f..9c9a1c9bcc7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
int aborted = 0;
/*
- * Race to shutdown the filesystem if we see an error.
+ * Race to shutdown the filesystem if we see an error or the iclog is in
+ * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
+ * CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
- XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
+ XFS_RANDOM_IODONE_IOERR) ||
+ iclog->ic_state & XLOG_STATE_IOABORT) {
+ if (iclog->ic_state & XLOG_STATE_IOABORT)
+ iclog->ic_state &= ~XLOG_STATE_IOABORT;
+
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
@@ -1838,6 +1844,23 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
+#ifdef DEBUG
+ /*
+ * Intentionally corrupt the log record CRC based on the error injection
+ * frequency, if defined. This facilitates testing log recovery in the
+ * event of torn writes. Hence, set the IOABORT state to abort the log
+ * write on I/O completion and shutdown the fs. The subsequent mount
+ * detects the bad CRC and attempts to recover.
+ */
+ if (log->l_badcrc_factor &&
+ (prandom_u32() % log->l_badcrc_factor == 0)) {
+ iclog->ic_header.h_crc &= 0xAAAAAAAA;
+ iclog->ic_state |= XLOG_STATE_IOABORT;
+ xfs_warn(log->l_mp,
+ "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
+ be64_to_cpu(iclog->ic_header.h_lsn));
+ }
+#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
@@ -2045,12 +2068,14 @@ xlog_print_tic_res(
"QM_DQCLUSTER",
"QM_QINOCREATE",
"QM_QUOTAOFF_END",
- "SB_UNIT",
"FSYNC_TS",
"GROWFSRT_ALLOC",
"GROWFSRT_ZERO",
"GROWFSRT_FREE",
- "SWAPEXT"
+ "SWAPEXT",
+ "CHECKPOINT",
+ "ICREATE",
+ "CREATE_TMPFILE"
};
xfs_warn(mp, "xlog_write: reservation summary:");
@@ -2791,11 +2816,19 @@ xlog_state_do_callback(
}
} while (!ioerrors && loopdidcallbacks);
+#ifdef DEBUG
/*
- * make one last gasp attempt to see if iclogs are being left in
- * limbo..
+ * Make one last gasp attempt to see if iclogs are being left in limbo.
+ * If the above loop finds an iclog earlier than the current iclog and
+ * in one of the syncing states, the current iclog is put into
+ * DO_CALLBACK and the callbacks are deferred to the completion of the
+ * earlier iclog. Walk the iclogs in order and make sure that no iclog
+ * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
+ * states.
+ *
+ * Note that SYNCING|IOABORT is a valid state so we cannot just check
+ * for ic_state == SYNCING.
*/
-#ifdef DEBUG
if (funcdidcallbacks) {
first_iclog = iclog = log->l_iclog;
do {
@@ -2810,7 +2843,7 @@ xlog_state_do_callback(
* IOERROR - give up hope all ye who enter here
*/
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state == XLOG_STATE_SYNCING ||
+ iclog->ic_state & XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
iclog->ic_state == XLOG_STATE_IOERROR )
break;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8daba7491b13..ed8896310c00 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
+#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
@@ -410,6 +411,8 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
+ /* log record crc error injection factor */
+ uint32_t l_badcrc_factor;
#endif
};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c5ecaacdd218..da37beb76f6e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -61,6 +61,9 @@ xlog_recover_check_summary(
#else
#define xlog_recover_check_summary(log)
#endif
+STATIC int
+xlog_do_recovery_pass(
+ struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
/*
* This structure is used during recovery to record the buf log items which
@@ -868,6 +871,351 @@ validate_head:
}
/*
+ * Seek backwards in the log for log record headers.
+ *
+ * Given a starting log block, walk backwards until we find the provided number
+ * of records or hit the provided tail block. The return value is the number of
+ * records encountered or a negative error code. The log block and buffer
+ * pointer of the last record seen are returned in rblk and rhead respectively.
+ */
+STATIC int
+xlog_rseek_logrec_hdr(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
+{
+ int i;
+ int error;
+ int found = 0;
+ char *offset = NULL;
+ xfs_daddr_t end_blk;
+
+ *wrapped = false;
+
+ /*
+ * Walk backwards from the head block until we hit the tail or the first
+ * block in the log.
+ */
+ end_blk = head_blk > tail_blk ? tail_blk : 0;
+ for (i = (int) head_blk - 1; i >= end_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+
+ /*
+ * If we haven't hit the tail block or the log record header count,
+ * start looking again from the end of the physical log. Note that
+ * callers can pass head == tail if the tail is not yet known.
+ */
+ if (tail_blk >= head_blk && found != count) {
+ for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+ }
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Seek forward in the log for log record headers.
+ *
+ * Given head and tail blocks, walk forward from the tail block until we find
+ * the provided number of records or hit the head block. The return value is the
+ * number of records encountered or a negative error code. The log block and
+ * buffer pointer of the last record seen are returned in rblk and rhead
+ * respectively.
+ */
+STATIC int
+xlog_seek_logrec_hdr(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
+{
+ int i;
+ int error;
+ int found = 0;
+ char *offset = NULL;
+ xfs_daddr_t end_blk;
+
+ *wrapped = false;
+
+ /*
+ * Walk forward from the tail block until we hit the head or the last
+ * block in the log.
+ */
+ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
+ for (i = (int) tail_blk; i <= end_blk; i++) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+
+ /*
+ * If we haven't hit the head block or the log record header count,
+ * start looking again from the start of the physical log.
+ */
+ if (tail_blk > head_blk && found != count) {
+ for (i = 0; i < (int) head_blk; i++) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+ }
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Check the log tail for torn writes. This is required when torn writes are
+ * detected at the head and the head had to be walked back to a previous record.
+ * The tail of the previous record must now be verified to ensure the torn
+ * writes didn't corrupt the previous tail.
+ *
+ * Return an error if CRC verification fails as recovery cannot proceed.
+ */
+STATIC int
+xlog_verify_tail(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk)
+{
+ struct xlog_rec_header *thead;
+ struct xfs_buf *bp;
+ xfs_daddr_t first_bad;
+ int count;
+ int error = 0;
+ bool wrapped;
+ xfs_daddr_t tmp_head;
+
+ bp = xlog_get_bp(log, 1);
+ if (!bp)
+ return -ENOMEM;
+
+ /*
+ * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
+ * a temporary head block that points after the last possible
+ * concurrently written record of the tail.
+ */
+ count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
+ XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
+ &wrapped);
+ if (count < 0) {
+ error = count;
+ goto out;
+ }
+
+ /*
+ * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
+ * into the actual log head. tmp_head points to the start of the record
+ * so update it to the actual head block.
+ */
+ if (count < XLOG_MAX_ICLOGS + 1)
+ tmp_head = head_blk;
+
+ /*
+ * We now have a tail and temporary head block that covers at least
+ * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
+ * records were completely written. Run a CRC verification pass from
+ * tail to head and return the result.
+ */
+ error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+
+out:
+ xlog_put_bp(bp);
+ return error;
+}
+
+/*
+ * Detect and trim torn writes from the head of the log.
+ *
+ * Storage without sector atomicity guarantees can result in torn writes in the
+ * log in the event of a crash. Our only means to detect this scenario is via
+ * CRC verification. While we can't always be certain that CRC verification
+ * failure is due to a torn write vs. an unrelated corruption, we do know that
+ * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
+ * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
+ * the log and treat failures in this range as torn writes as a matter of
+ * policy. In the event of CRC failure, the head is walked back to the last good
+ * record in the log and the tail is updated from that record and verified.
+ */
+STATIC int
+xlog_verify_head(
+ struct xlog *log,
+ xfs_daddr_t *head_blk, /* in/out: unverified head */
+ xfs_daddr_t *tail_blk, /* out: tail block */
+ struct xfs_buf *bp,
+ xfs_daddr_t *rhead_blk, /* start blk of last record */
+ struct xlog_rec_header **rhead, /* ptr to last record */
+ bool *wrapped) /* last rec. wraps phys. log */
+{
+ struct xlog_rec_header *tmp_rhead;
+ struct xfs_buf *tmp_bp;
+ xfs_daddr_t first_bad;
+ xfs_daddr_t tmp_rhead_blk;
+ int found;
+ int error;
+ bool tmp_wrapped;
+
+ /*
+ * Search backwards through the log looking for the log record header
+ * block. This wraps all the way back around to the head so something is
+ * seriously wrong if we can't find it.
+ */
+ found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
+ rhead, wrapped);
+ if (found < 0)
+ return found;
+ if (!found) {
+ xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+ return -EIO;
+ }
+
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
+
+ /*
+ * Now that we have a tail block, check the head of the log for torn
+ * writes. Search again until we hit the tail or the maximum number of
+ * log record I/Os that could have been in flight at one time. Use a
+ * temporary buffer so we don't trash the rhead/bp pointer from the
+ * call above.
+ */
+ tmp_bp = xlog_get_bp(log, 1);
+ if (!tmp_bp)
+ return -ENOMEM;
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
+ XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
+ &tmp_rhead, &tmp_wrapped);
+ xlog_put_bp(tmp_bp);
+ if (error < 0)
+ return error;
+
+ /*
+ * Now run a CRC verification pass over the records starting at the
+ * block found above to the current head. If a CRC failure occurs, the
+ * log block of the first bad record is saved in first_bad.
+ */
+ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+ if (error == -EFSBADCRC) {
+ /*
+ * We've hit a potential torn write. Reset the error and warn
+ * about it.
+ */
+ error = 0;
+ xfs_warn(log->l_mp,
+"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
+ first_bad, *head_blk);
+
+ /*
+ * Get the header block and buffer pointer for the last good
+ * record before the bad record.
+ *
+ * Note that xlog_find_tail() clears the blocks at the new head
+ * (i.e., the records with invalid CRC) if the cycle number
+ * matches the the current cycle.
+ */
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
+ rhead_blk, rhead, wrapped);
+ if (found < 0)
+ return found;
+ if (found == 0) /* XXX: right thing to do here? */
+ return -EIO;
+
+ /*
+ * Reset the head block to the starting block of the first bad
+ * log record and set the tail block based on the last good
+ * record.
+ *
+ * Bail out if the updated head/tail match as this indicates
+ * possible corruption outside of the acceptable
+ * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
+ */
+ *head_blk = first_bad;
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
+ if (*head_blk == *tail_blk) {
+ ASSERT(0);
+ return 0;
+ }
+
+ /*
+ * Now verify the tail based on the updated head. This is
+ * required because the torn writes trimmed from the head could
+ * have been written over the tail of a previous record. Return
+ * any errors since recovery cannot proceed if the tail is
+ * corrupt.
+ *
+ * XXX: This leaves a gap in truly robust protection from torn
+ * writes in the log. If the head is behind the tail, the tail
+ * pushes forward to create some space and then a crash occurs
+ * causing the writes into the previous record's tail region to
+ * tear, log recovery isn't able to recover.
+ *
+ * How likely is this to occur? If possible, can we do something
+ * more intelligent here? Is it safe to push the tail forward if
+ * we can determine that the tail is within the range of the
+ * torn write (e.g., the kernel can only overwrite the tail if
+ * it has actually been pushed forward)? Alternatively, could we
+ * somehow prevent this condition at runtime?
+ */
+ error = xlog_verify_tail(log, *head_blk, *tail_blk);
+ }
+
+ return error;
+}
+
+/*
* Find the sync block number or the tail of the log.
*
* This will be the block number of the last record to have its
@@ -893,13 +1241,13 @@ xlog_find_tail(
xlog_op_header_t *op_head;
char *offset = NULL;
xfs_buf_t *bp;
- int error, i, found;
+ int error;
xfs_daddr_t umount_data_blk;
xfs_daddr_t after_umount_blk;
+ xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn;
int hblks;
-
- found = 0;
+ bool wrapped = false;
/*
* Find previous log record
@@ -923,48 +1271,16 @@ xlog_find_tail(
}
/*
- * Search backwards looking for log record header block
+ * Trim the head block back to skip over torn records. We can have
+ * multiple log I/Os in flight at any time, so we assume CRC failures
+ * back through the previous several records are torn writes and skip
+ * them.
*/
ASSERT(*head_blk < INT_MAX);
- for (i = (int)(*head_blk) - 1; i >= 0; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
- if (error)
- goto done;
-
- if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 1;
- break;
- }
- }
- /*
- * If we haven't found the log record header block, start looking
- * again from the end of the physical log. XXXmiken: There should be
- * a check here to make sure we didn't search more than N blocks in
- * the previous code.
- */
- if (!found) {
- for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
- if (error)
- goto done;
-
- if (*(__be32 *)offset ==
- cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 2;
- break;
- }
- }
- }
- if (!found) {
- xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
- xlog_put_bp(bp);
- ASSERT(0);
- return -EIO;
- }
-
- /* find blk_no of tail of log */
- rhead = (xlog_rec_header_t *)offset;
- *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+ error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+ &rhead, &wrapped);
+ if (error)
+ goto done;
/*
* Reset log values according to the state of the log when we
@@ -976,10 +1292,10 @@ xlog_find_tail(
* written was complete and ended exactly on the end boundary
* of the physical log.
*/
- log->l_prev_block = i;
+ log->l_prev_block = rhead_blk;
log->l_curr_block = (int)*head_blk;
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
- if (found == 2)
+ if (wrapped)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
@@ -1014,12 +1330,13 @@ xlog_find_tail(
} else {
hblks = 1;
}
- after_umount_blk = (i + hblks + (int)
- BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
+ after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+ after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
- umount_data_blk = (i + hblks) % log->l_logBBsize;
+ umount_data_blk = rhead_blk + hblks;
+ umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
if (error)
goto done;
@@ -3204,6 +3521,7 @@ xlog_recover_dquot_ra_pass2(
struct xfs_disk_dquot *recddq;
struct xfs_dq_logformat *dq_f;
uint type;
+ int len;
if (mp->m_qflags == 0)
@@ -3224,8 +3542,12 @@ xlog_recover_dquot_ra_pass2(
ASSERT(dq_f);
ASSERT(dq_f->qlf_len == 1);
- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
+ return;
+
+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
+ &xfs_dquot_buf_ra_ops);
}
STATIC void
@@ -4118,26 +4440,69 @@ xlog_recover_process_iunlinks(
mp->m_dmevmask = mp_dmevmask;
}
+STATIC int
+xlog_unpack_data(
+ struct xlog_rec_header *rhead,
+ char *dp,
+ struct xlog *log)
+{
+ int i, j, k;
+
+ for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
+ i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
+ *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
+ dp += BBSIZE;
+ }
+
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
+ for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
+ j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+ dp += BBSIZE;
+ }
+ }
+
+ return 0;
+}
+
/*
- * Upack the log buffer data and crc check it. If the check fails, issue a
- * warning if and only if the CRC in the header is non-zero. This makes the
- * check an advisory warning, and the zero CRC check will prevent failure
- * warnings from being emitted when upgrading the kernel from one that does not
- * add CRCs by default.
- *
- * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
- * corruption failure
+ * CRC check, unpack and process a log record.
*/
STATIC int
-xlog_unpack_data_crc(
+xlog_recover_process(
+ struct xlog *log,
+ struct hlist_head rhash[],
struct xlog_rec_header *rhead,
char *dp,
- struct xlog *log)
+ int pass)
{
+ int error;
__le32 crc;
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
- if (crc != rhead->h_crc) {
+
+ /*
+ * Nothing else to do if this is a CRC verification pass. Just return
+ * if this a record with a non-zero crc. Unfortunately, mkfs always
+ * sets h_crc to 0 so we must consider this valid even on v5 supers.
+ * Otherwise, return EFSBADCRC on failure so the callers up the stack
+ * know precisely what failed.
+ */
+ if (pass == XLOG_RECOVER_CRCPASS) {
+ if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+ return -EFSBADCRC;
+ return 0;
+ }
+
+ /*
+ * We're in the normal recovery path. Issue a warning if and only if the
+ * CRC in the header is non-zero. This is an advisory warning and the
+ * zero CRC check prevents warnings from being emitted when upgrading
+ * the kernel from one that does not add CRCs by default.
+ */
+ if (crc != le32_to_cpu(rhead->h_crc)) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",
@@ -4147,47 +4512,18 @@ xlog_unpack_data_crc(
}
/*
- * If we've detected a log record corruption, then we can't
- * recover past this point. Abort recovery if we are enforcing
- * CRC protection by punting an error back up the stack.
+ * If the filesystem is CRC enabled, this mismatch becomes a
+ * fatal log corruption failure.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
return -EFSCORRUPTED;
}
- return 0;
-}
-
-STATIC int
-xlog_unpack_data(
- struct xlog_rec_header *rhead,
- char *dp,
- struct xlog *log)
-{
- int i, j, k;
- int error;
-
- error = xlog_unpack_data_crc(rhead, dp, log);
+ error = xlog_unpack_data(rhead, dp, log);
if (error)
return error;
- for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
- i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
- *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
- dp += BBSIZE;
- }
-
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
- for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
- j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
- dp += BBSIZE;
- }
- }
-
- return 0;
+ return xlog_recover_process_data(log, rhash, rhead, dp, pass);
}
STATIC int
@@ -4239,18 +4575,21 @@ xlog_do_recovery_pass(
struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
- int pass)
+ int pass,
+ xfs_daddr_t *first_bad) /* out: first bad log rec */
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
+ xfs_daddr_t rhead_blk;
char *offset;
xfs_buf_t *hbp, *dbp;
- int error = 0, h_size;
+ int error = 0, h_size, h_len;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
struct hlist_head rhash[XLOG_RHASH_SIZE];
ASSERT(head_blk != tail_blk);
+ rhead_blk = 0;
/*
* Read the header of the tail block and get the iclog buffer size from
@@ -4274,7 +4613,31 @@ xlog_do_recovery_pass(
error = xlog_valid_rec_header(log, rhead, tail_blk);
if (error)
goto bread_err1;
+
+ /*
+ * xfsprogs has a bug where record length is based on lsunit but
+ * h_size (iclog size) is hardcoded to 32k. Now that we
+ * unconditionally CRC verify the unmount record, this means the
+ * log buffer can be too small for the record and cause an
+ * overrun.
+ *
+ * Detect this condition here. Use lsunit for the buffer size as
+ * long as this looks like the mkfs case. Otherwise, return an
+ * error to avoid a buffer overrun.
+ */
h_size = be32_to_cpu(rhead->h_size);
+ h_len = be32_to_cpu(rhead->h_len);
+ if (h_len > h_size) {
+ if (h_len <= log->l_mp->m_logbsize &&
+ be32_to_cpu(rhead->h_num_logops) == 1) {
+ xfs_warn(log->l_mp,
+ "invalid iclog size (%d bytes), using lsunit (%d bytes)",
+ h_size, log->l_mp->m_logbsize);
+ h_size = log->l_mp->m_logbsize;
+ } else
+ return -EFSCORRUPTED;
+ }
+
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -4301,7 +4664,7 @@ xlog_do_recovery_pass(
}
memset(rhash, 0, sizeof(rhash));
- blk_no = tail_blk;
+ blk_no = rhead_blk = tail_blk;
if (tail_blk > head_blk) {
/*
* Perform recovery around the end of the physical log.
@@ -4408,19 +4771,18 @@ xlog_do_recovery_pass(
goto bread_err2;
}
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset,
+ pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks;
+ rhead_blk = blk_no;
}
ASSERT(blk_no >= log->l_logBBsize);
blk_no -= log->l_logBBsize;
+ rhead_blk = blk_no;
}
/* read first part of physical log */
@@ -4441,21 +4803,22 @@ xlog_do_recovery_pass(
if (error)
goto bread_err2;
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset, pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks + hblks;
+ rhead_blk = blk_no;
}
bread_err2:
xlog_put_bp(dbp);
bread_err1:
xlog_put_bp(hbp);
+
+ if (error && first_bad)
+ *first_bad = rhead_blk;
+
return error;
}
@@ -4493,7 +4856,7 @@ xlog_do_log_recovery(
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS1);
+ XLOG_RECOVER_PASS1, NULL);
if (error != 0) {
kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
@@ -4504,7 +4867,7 @@ xlog_do_log_recovery(
* When it is complete free the table of buf cancel items.
*/
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS2);
+ XLOG_RECOVER_PASS2, NULL);
#ifdef DEBUG
if (!error) {
int i;
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index dc6221942b85..ade236e90bb3 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -42,11 +42,11 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xfs_ilock(ip, *iolock);
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ab1bac6a3a1c..be02a68b2fe2 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -766,7 +766,6 @@ xfs_growfs_rt_alloc(
{
xfs_fileoff_t bno; /* block number in file */
struct xfs_buf *bp; /* temporary buffer for zeroing */
- int committed; /* transaction committed flag */
xfs_daddr_t d; /* disk block address */
int error; /* error return value */
xfs_fsblock_t firstblock;/* first block allocated in xaction */
@@ -811,7 +810,7 @@ xfs_growfs_rt_alloc(
/*
* Free any blocks freed up in the transaction, then commit.
*/
- error = xfs_bmap_finish(&tp, &flist, &committed);
+ error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto out_bmap_cancel;
error = xfs_trans_commit(tp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 36bd8825bfb0..59c9b7bd958d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -137,7 +137,7 @@ static const match_table_t tokens = {
};
-STATIC unsigned long
+STATIC int
suffix_kstrtoint(char *s, unsigned int base, int *res)
{
int last, shift_left_factor = 0, _res;
@@ -1714,8 +1714,8 @@ xfs_init_zones(void)
xfs_inode_zone =
kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
+ KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
if (!xfs_inode_zone)
goto out_destroy_efi_zone;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 996481eeb491..b44284c1adda 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -178,7 +178,6 @@ xfs_symlink(
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
- int committed;
xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks;
int nmaps;
@@ -387,7 +386,7 @@ xfs_symlink(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
@@ -434,7 +433,6 @@ xfs_inactive_symlink_rmt(
struct xfs_inode *ip)
{
xfs_buf_t *bp;
- int committed;
int done;
int error;
xfs_fsblock_t first_block;
@@ -510,16 +508,10 @@ xfs_inactive_symlink_rmt(
/*
* Commit the first transaction. This logs the EFI and the inode.
*/
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto error_bmap_cancel;
/*
- * The transaction must have been committed, since there were
- * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
- * The new tp has the extent freeing and EFDs.
- */
- ASSERT(committed);
- /*
* The first xact was committed, so add the inode to the new one.
* Mark it dirty so it will be logged and moved forward in the log as
* part of every commit.
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ee70f5dec9dc..641d625eb334 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -255,11 +255,47 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
+#ifdef DEBUG
+STATIC ssize_t
+log_badcrc_factor_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ int ret;
+ uint32_t val;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ log->l_badcrc_factor = val;
+
+ return count;
+}
+
+STATIC ssize_t
+log_badcrc_factor_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
+}
+
+XFS_SYSFS_ATTR_RW(log_badcrc_factor);
+#endif /* DEBUG */
+
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
+#ifdef DEBUG
+ ATTR_LIST(log_badcrc_factor),
+#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 877079eb0f8f..391d797cb53f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1222,6 +1222,32 @@ DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
+DECLARE_EVENT_CLASS(xfs_readpage_class,
+ TP_PROTO(struct inode *inode, int nr_pages),
+ TP_ARGS(inode, nr_pages),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, nr_pages)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nr_pages = nr_pages;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->nr_pages)
+)
+
+#define DEFINE_READPAGE_EVENT(name) \
+DEFINE_EVENT(xfs_readpage_class, name, \
+ TP_PROTO(struct inode *inode, int nr_pages), \
+ TP_ARGS(inode, nr_pages))
+DEFINE_READPAGE_EVENT(xfs_vm_readpage);
+DEFINE_READPAGE_EVENT(xfs_vm_readpages);
+
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int type, struct xfs_bmbt_irec *irec),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index aa67339b9537..4f18fd92ca13 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,7 +497,6 @@ xfsaild(
long tout = 0; /* milliseconds */
current->flags |= PF_MEMALLOC;
- set_freezable();
while (!kthread_should_stop()) {
if (tout && tout <= 20)
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a047e..995170194df0 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -572,12 +572,16 @@ xfs_quota_warn(
struct xfs_dquot *dqp,
int type)
{
- /* no warnings for project quotas - we just return ENOSPC later */
+ enum quota_type qtype;
+
if (dqp->dq_flags & XFS_DQ_PROJ)
- return;
- quota_send_warning(make_kqid(&init_user_ns,
- (dqp->dq_flags & XFS_DQ_USER) ?
- USRQUOTA : GRPQUOTA,
+ qtype = PRJQUOTA;
+ else if (dqp->dq_flags & XFS_DQ_USER)
+ qtype = USRQUOTA;
+ else
+ qtype = GRPQUOTA;
+
+ quota_send_warning(make_kqid(&init_user_ns, qtype,
be32_to_cpu(dqp->q_core.d_id)),
mp->m_super->s_dev, type);
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca21c6..110f1d7d86b0 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -39,9 +39,6 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error, asize = size;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
xflags |= ATTR_KERNOVAL;
@@ -84,9 +81,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
xflags |= ATTR_CREATE;
@@ -135,47 +129,19 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return sizeof("security");
- else if (flags & XFS_ATTR_ROOT)
- return sizeof("trusted");
- else
- return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return xfs_xattr_security_handler.prefix;
- else if (flags & XFS_ATTR_ROOT)
- return xfs_xattr_trusted_handler.prefix;
- else
- return xfs_xattr_user_handler.prefix;
-}
-
static int
-xfs_xattr_put_listent(
+__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
+ char *prefix,
+ int prefix_len,
+ unsigned char *name,
+ int namelen)
{
- unsigned int prefix_len = xfs_xattr_prefix_len(flags);
char *offset;
int arraytop;
- ASSERT(context->count >= 0);
-
- /*
- * Only show root namespace entries if we are actually allowed to
- * see them.
- */
- if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
- return 0;
+ if (!context->alist)
+ goto compute_size;
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
@@ -183,17 +149,19 @@ xfs_xattr_put_listent(
return 1;
}
offset = (char *)context->alist + context->count;
- strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ strncpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
+
+compute_size:
context->count += prefix_len + namelen + 1;
return 0;
}
static int
-xfs_xattr_put_listent_sizes(
+xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
@@ -201,24 +169,55 @@ xfs_xattr_put_listent_sizes(
int valuelen,
unsigned char *value)
{
- context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
- return 0;
-}
+ char *prefix;
+ int prefix_len;
-static int
-list_one_attr(const char *name, const size_t len, void *data,
- size_t size, ssize_t *result)
-{
- char *p = data + *result;
+ ASSERT(context->count >= 0);
- *result += len;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
+ if (flags & XFS_ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+ if (namelen == SGI_ACL_FILE_SIZE &&
+ strncmp(name, SGI_ACL_FILE,
+ SGI_ACL_FILE_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_ACCESS,
+ strlen(XATTR_POSIX_ACL_ACCESS));
+ if (ret)
+ return ret;
+ } else if (namelen == SGI_ACL_DEFAULT_SIZE &&
+ strncmp(name, SGI_ACL_DEFAULT,
+ SGI_ACL_DEFAULT_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_DEFAULT,
+ strlen(XATTR_POSIX_ACL_DEFAULT));
+ if (ret)
+ return ret;
+ }
+#endif
- strcpy(p, name);
- return 0;
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+
+ prefix = XATTR_TRUSTED_PREFIX;
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+ } else if (flags & XFS_ATTR_SECURE) {
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ } else {
+ prefix = XATTR_USER_PREFIX;
+ prefix_len = XATTR_USER_PREFIX_LEN;
+ }
+
+ return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
+ namelen);
}
ssize_t
@@ -227,7 +226,6 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry);
- int error;
/*
* First read the regular on-disk attributes.
@@ -236,37 +234,14 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
context.dp = XFS_I(inode);
context.cursor = &cursor;
context.resynch = 1;
- context.alist = data;
+ context.alist = size ? data : NULL;
context.bufsize = size;
context.firstu = context.bufsize;
-
- if (size)
- context.put_listent = xfs_xattr_put_listent;
- else
- context.put_listent = xfs_xattr_put_listent_sizes;
+ context.put_listent = xfs_xattr_put_listent;
xfs_attr_list_int(&context);
if (context.count < 0)
return -ERANGE;
- /*
- * Then add the two synthetic ACL attributes.
- */
- if (posix_acl_access_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- if (posix_acl_default_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
return context.count;
}