aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h4
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c336
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h9
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_btree.c20
-rw-r--r--fs/xfs/libxfs/xfs_btree.h43
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h26
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h5
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c26
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c18
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c16
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c77
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h7
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h4
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c1
-rw-r--r--fs/xfs/libxfs/xfs_sb.c13
-rw-r--r--fs/xfs/libxfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_aops.c309
-rw-r--r--fs/xfs/xfs_aops.h9
-rw-r--r--fs/xfs/xfs_attr.h4
-rw-r--r--fs/xfs/xfs_attr_list.c59
-rw-r--r--fs/xfs/xfs_bmap_util.c45
-rw-r--r--fs/xfs/xfs_buf.c125
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_file.c293
-rw-r--r--fs/xfs/xfs_icache.c40
-rw-r--r--fs/xfs/xfs_icreate_item.c2
-rw-r--r--fs/xfs/xfs_inode.c84
-rw-r--r--fs/xfs/xfs_inode.h18
-rw-r--r--fs/xfs/xfs_inode_item.c4
-rw-r--r--fs/xfs/xfs_ioctl.c12
-rw-r--r--fs/xfs/xfs_iomap.c104
-rw-r--r--fs/xfs/xfs_iops.c16
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log_recover.c16
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_pnfs.c7
-rw-r--r--fs/xfs/xfs_pnfs.h4
-rw-r--r--fs/xfs/xfs_qm.c2
-rw-r--r--fs/xfs/xfs_reflink.c396
-rw-r--r--fs/xfs/xfs_reflink.h6
-rw-r--r--fs/xfs/xfs_stats.c10
-rw-r--r--fs/xfs/xfs_stats.h200
-rw-r--r--fs/xfs/xfs_super.c27
-rw-r--r--fs/xfs/xfs_symlink.c7
-rw-r--r--fs/xfs/xfs_trace.h109
-rw-r--r--fs/xfs/xfs_xattr.c23
60 files changed, 943 insertions, 1700 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index effb64cf714f..5050056a0b06 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2455,12 +2455,15 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
return false;
- if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
+ if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
return false;
if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
+ (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
return false;
/*
@@ -2477,7 +2480,8 @@ xfs_agf_verify(
return false;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
- be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
+ (be32_to_cpu(agf->agf_refcount_level) < 1 ||
+ be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
return false;
return true;;
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 5ba2dac5e67c..efb467b10a71 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -421,13 +421,17 @@ xfs_allocbt_init_cursor(
ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_allocbt_ops;
+ if (btnum == XFS_BTNUM_BNO)
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+ else
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 8ea91f363093..2852521fc8ec 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -253,6 +253,7 @@ xfs_attr3_leaf_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
+ struct xfs_perag *pag = bp->b_pag;
struct xfs_attr3_icleaf_hdr ichdr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -273,7 +274,12 @@ xfs_attr3_leaf_verify(
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
return false;
}
- if (ichdr.count == 0)
+ /*
+ * In recovery there is a transient state where count == 0 is valid
+ * because we may have transitioned an empty shortform attr to a leaf
+ * if the attr didn't fit in shortform.
+ */
+ if (pag && pag->pagf_init && ichdr.count == 0)
return false;
/* XXX: need to range check rest of attr header values */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 4f2aed04f827..f7dda0c237b0 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -51,7 +51,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
-int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
+int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
@@ -77,7 +77,7 @@ int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
-int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
+void xfs_attr3_leaf_list_int(struct xfs_buf *bp,
struct xfs_attr_list_context *context);
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c6eb21940783..2760bc3b2536 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,6 +49,8 @@
#include "xfs_rmap.h"
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_icache.h"
kmem_zone_t *xfs_bmap_free_item_zone;
@@ -190,8 +192,12 @@ xfs_bmap_worst_indlen(
int maxrecs; /* maximum record count at this level */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t rval; /* return value */
+ xfs_filblks_t orig_len;
mp = ip->i_mount;
+
+ /* Calculate the worst-case size of the bmbt. */
+ orig_len = len;
maxrecs = mp->m_bmap_dmxr[0];
for (level = 0, rval = 0;
level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -199,12 +205,20 @@ xfs_bmap_worst_indlen(
len += maxrecs - 1;
do_div(len, maxrecs);
rval += len;
- if (len == 1)
- return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+ if (len == 1) {
+ rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
level - 1;
+ break;
+ }
if (level == 0)
maxrecs = mp->m_bmap_dmxr[1];
}
+
+ /* Calculate the worst-case size of the rmapbt. */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
+ mp->m_rmap_maxlevels;
+
return rval;
}
@@ -504,7 +518,7 @@ void
xfs_bmap_trace_exlist(
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t cnt, /* count of entries in the list */
- int whichfork, /* data or attr fork */
+ int whichfork, /* data or attr or cow fork */
unsigned long caller_ip)
{
xfs_extnum_t idx; /* extent record index */
@@ -513,11 +527,13 @@ xfs_bmap_trace_exlist(
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
+ else if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+ ASSERT(cnt == xfs_iext_count(ifp));
for (idx = 0; idx < cnt; idx++)
- trace_xfs_extlist(ip, idx, whichfork, caller_ip);
+ trace_xfs_extlist(ip, idx, state, caller_ip);
}
/*
@@ -811,7 +827,7 @@ try_another_ag:
XFS_BTREE_LONG_PTRS);
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
for (cnt = i = 0; i < nextents; i++) {
ep = xfs_iext_get_ext(ifp, i);
if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
@@ -1137,6 +1153,10 @@ xfs_bmap_add_attrfork(
goto trans_cancel;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
+ if (ip->i_d.di_anextents != 0) {
+ error = -EFSCORRUPTED;
+ goto trans_cancel;
+ }
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
/*
* For inodes coming from pre-6.2 filesystems.
@@ -1144,7 +1164,6 @@ xfs_bmap_add_attrfork(
ASSERT(ip->i_d.di_aformat == 0);
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
}
- ASSERT(ip->i_d.di_anextents == 0);
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1296,7 +1315,7 @@ xfs_bmap_read_extents(
/*
* Here with bp and block set to the leftmost leaf node in the tree.
*/
- room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ room = xfs_iext_count(ifp);
i = 0;
/*
* Loop over all leaf nodes. Copy information to the extent records.
@@ -1361,8 +1380,9 @@ xfs_bmap_read_extents(
return error;
block = XFS_BUF_TO_BLOCK(bp);
}
- ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
- ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+ if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
+ return -EFSCORRUPTED;
+ ASSERT(i == xfs_iext_count(ifp));
XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
return 0;
error0:
@@ -1370,97 +1390,6 @@ error0:
return -EFSCORRUPTED;
}
-
-/*
- * Search the extent records for the entry containing block bno.
- * If bno lies in a hole, point to the next entry. If bno lies
- * past eof, *eofp will be set, and *prevp will contain the last
- * entry (null if none). Else, *lastxp will be set to the index
- * of the found entry; *gotp will contain the entry.
- */
-STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
-xfs_bmap_search_multi_extents(
- xfs_ifork_t *ifp, /* inode fork pointer */
- xfs_fileoff_t bno, /* block number searched for */
- int *eofp, /* out: end of file found */
- xfs_extnum_t *lastxp, /* out: last extent index */
- xfs_bmbt_irec_t *gotp, /* out: extent entry found */
- xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
-{
- xfs_bmbt_rec_host_t *ep; /* extent record pointer */
- xfs_extnum_t lastx; /* last extent index */
-
- /*
- * Initialize the extent entry structure to catch access to
- * uninitialized br_startblock field.
- */
- gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
- gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
- gotp->br_state = XFS_EXT_INVALID;
- gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
- prevp->br_startoff = NULLFILEOFF;
-
- ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
- if (lastx > 0) {
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
- }
- if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
- xfs_bmbt_get_all(ep, gotp);
- *eofp = 0;
- } else {
- if (lastx > 0) {
- *gotp = *prevp;
- }
- *eofp = 1;
- ep = NULL;
- }
- *lastxp = lastx;
- return ep;
-}
-
-/*
- * Search the extents list for the inode, for the extent containing bno.
- * If bno lies in a hole, point to the next entry. If bno lies past eof,
- * *eofp will be set, and *prevp will contain the last entry (null if none).
- * Else, *lastxp will be set to the index of the found
- * entry; *gotp will contain the entry.
- */
-xfs_bmbt_rec_host_t * /* pointer to found extent entry */
-xfs_bmap_search_extents(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_fileoff_t bno, /* block number searched for */
- int fork, /* data or attr fork */
- int *eofp, /* out: end of file found */
- xfs_extnum_t *lastxp, /* out: last extent index */
- xfs_bmbt_irec_t *gotp, /* out: extent entry found */
- xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
-{
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_bmbt_rec_host_t *ep; /* extent record pointer */
-
- XFS_STATS_INC(ip->i_mount, xs_look_exlist);
- ifp = XFS_IFORK_PTR(ip, fork);
-
- ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
-
- if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
- !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
- xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
- "Access to block zero in inode %llu "
- "start_block: %llx start_off: %llx "
- "blkcnt: %llx extent-state: %x lastx: %x",
- (unsigned long long)ip->i_ino,
- (unsigned long long)gotp->br_startblock,
- (unsigned long long)gotp->br_startoff,
- (unsigned long long)gotp->br_blockcount,
- gotp->br_state, *lastxp);
- *lastxp = NULLEXTNUM;
- *eofp = 1;
- return NULL;
- }
- return ep;
-}
-
/*
* Returns the file-relative block number of the first unused block(s)
* in the file with at least "len" logically contiguous blocks free.
@@ -1497,7 +1426,7 @@ xfs_bmap_first_unused(
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
lowest = *first_unused;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
off = xfs_bmbt_get_startoff(ep);
@@ -1523,44 +1452,44 @@ xfs_bmap_first_unused(
*/
int /* error */
xfs_bmap_last_before(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- xfs_fileoff_t *last_block, /* last block */
- int whichfork) /* data or attr fork */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_inode *ip, /* incore inode */
+ xfs_fileoff_t *last_block, /* last block */
+ int whichfork) /* data or attr fork */
{
- xfs_fileoff_t bno; /* input file offset */
- int eof; /* hit end of file */
- xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
- int error; /* error return value */
- xfs_bmbt_irec_t got; /* current extent value */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t lastx; /* last extent used */
- xfs_bmbt_irec_t prev; /* previous extent value */
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_bmbt_irec got;
+ xfs_extnum_t idx;
+ int error;
- if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
- return -EIO;
- if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_LOCAL:
*last_block = 0;
return 0;
+ case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return -EIO;
}
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(tp, ip, whichfork)))
- return error;
- bno = *last_block - 1;
- ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
- &prev);
- if (eof || xfs_bmbt_get_startoff(ep) > bno) {
- if (prev.br_startoff == NULLFILEOFF)
- *last_block = 0;
- else
- *last_block = prev.br_startoff + prev.br_blockcount;
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
}
- /*
- * Otherwise *last_block is already the right answer.
- */
+
+ if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) {
+ if (got.br_startoff <= *last_block - 1)
+ return 0;
+ }
+
+ if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
+ *last_block = got.br_startoff + got.br_blockcount;
+ return 0;
+ }
+
+ *last_block = 0;
return 0;
}
@@ -1582,7 +1511,7 @@ xfs_bmap_last_extent(
return error;
}
- nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
if (nextents == 0) {
*is_empty = 1;
return 0;
@@ -1735,7 +1664,7 @@ xfs_bmap_add_extent_delay_real(
&bma->ip->i_d.di_nextents);
ASSERT(bma->idx >= 0);
- ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(bma->idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@@ -1794,7 +1723,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@@ -2300,7 +2229,7 @@ xfs_bmap_add_extent_unwritten_real(
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
ASSERT(*idx >= 0);
- ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
XFS_STATS_INC(mp, xs_add_exlist);
@@ -2356,7 +2285,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (*idx < xfs_iext_count(&ip->i_df) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
@@ -2836,7 +2765,7 @@ xfs_bmap_add_extent_hole_delay(
* Check and set flags if the current (right) segment exists.
* If it doesn't exist, we're converting the hole at end-of-file.
*/
- if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+ if (*idx < xfs_iext_count(ifp)) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
@@ -2966,7 +2895,7 @@ xfs_bmap_add_extent_hole_real(
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
- ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(bma->idx <= xfs_iext_count(ifp));
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
!(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@@ -2992,7 +2921,7 @@ xfs_bmap_add_extent_hole_real(
* Check and set flags if this segment has a current value.
* Not true if we're inserting into the "hole" at eof.
*/
- if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+ if (bma->idx < xfs_iext_count(ifp)) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
if (isnullstartblock(right.br_startblock))
@@ -4145,12 +4074,11 @@ xfs_bmapi_read(
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp;
struct xfs_bmbt_irec got;
- struct xfs_bmbt_irec prev;
xfs_fileoff_t obno;
xfs_fileoff_t end;
- xfs_extnum_t lastx;
+ xfs_extnum_t idx;
int error;
- int eof;
+ bool eof = false;
int n = 0;
int whichfork = xfs_bmapi_whichfork(flags);
@@ -4190,7 +4118,8 @@ xfs_bmapi_read(
return error;
}
- xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+ if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
+ eof = true;
end = bno + len;
obno = bno;
@@ -4221,10 +4150,8 @@ xfs_bmapi_read(
break;
/* Else go on to the next record. */
- if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
- else
- eof = 1;
+ if (!xfs_iext_get_extent(ifp, ++idx, &got))
+ eof = true;
}
*nmap = n;
return 0;
@@ -4234,10 +4161,10 @@ int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
int whichfork,
- xfs_fileoff_t aoff,
+ xfs_fileoff_t off,
xfs_filblks_t len,
+ xfs_filblks_t prealloc,
struct xfs_bmbt_irec *got,
- struct xfs_bmbt_irec *prev,
xfs_extnum_t *lastx,
int eof)
{
@@ -4248,10 +4175,17 @@ xfs_bmapi_reserve_delalloc(
char rt = XFS_IS_REALTIME_INODE(ip);
xfs_extlen_t extsz;
int error;
+ xfs_fileoff_t aoff = off;
- alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
+ /*
+ * Cap the alloc length. Keep track of prealloc so we know whether to
+ * tag the inode before we return.
+ */
+ alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
+ if (prealloc && alen >= len)
+ prealloc = alen - len;
/* Figure out the extent size, adjust alen */
if (whichfork == XFS_COW_FORK)
@@ -4259,7 +4193,12 @@ xfs_bmapi_reserve_delalloc(
else
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
- error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
+ struct xfs_bmbt_irec prev;
+
+ if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
+ prev.br_startoff = NULLFILEOFF;
+
+ error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
}
@@ -4312,6 +4251,16 @@ xfs_bmapi_reserve_delalloc(
*/
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ /*
+ * Tag the inode if blocks were preallocated. Note that COW fork
+ * preallocation can occur at the start or end of the extent, even when
+ * prealloc == 0, so we must also check the aligned offset and length.
+ */
+ if (whichfork == XFS_DATA_FORK && prealloc)
+ xfs_inode_set_eofblocks_tag(ip);
+ if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
+ xfs_inode_set_cowblocks_tag(ip);
+
ASSERT(got->br_startoff <= aoff);
ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
ASSERT(isnullstartblock(got->br_startblock));
@@ -4349,7 +4298,7 @@ xfs_bmapi_allocate(
if (bma->wasdel) {
bma->length = (xfs_extlen_t)bma->got.br_blockcount;
bma->offset = bma->got.br_startoff;
- if (bma->idx != NULLEXTNUM && bma->idx) {
+ if (bma->idx) {
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
&bma->prev);
}
@@ -4563,7 +4512,7 @@ xfs_bmapi_write(
struct xfs_ifork *ifp;
struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
xfs_fileoff_t end; /* end of mapped file region */
- int eof; /* after the end of extents */
+ bool eof = false; /* after the end of extents */
int error; /* error return */
int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */
@@ -4641,12 +4590,14 @@ xfs_bmapi_write(
goto error0;
}
- xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
- &bma.prev);
n = 0;
end = bno + len;
obno = bno;
+ if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got))
+ eof = true;
+ if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev))
+ bma.prev.br_startoff = NULLFILEOFF;
bma.tp = tp;
bma.ip = ip;
bma.total = total;
@@ -4733,11 +4684,8 @@ xfs_bmapi_write(
/* Else go on to the next record. */
bma.prev = bma.got;
- if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
- &bma.got);
- } else
- eof = 1;
+ if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got))
+ eof = true;
}
*nmap = n;
@@ -4885,7 +4833,7 @@ xfs_bmap_del_extent_delay(
da_new = 0;
ASSERT(*idx >= 0);
- ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
@@ -4902,8 +4850,11 @@ xfs_bmap_del_extent_delay(
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
- xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+ error = xfs_trans_reserve_quota_nblks(NULL, ip,
+ -((long)del->br_blockcount), 0,
isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ return error;
ip->i_delayed_blks -= del->br_blockcount;
if (whichfork == XFS_COW_FORK)
@@ -5013,7 +4964,7 @@ xfs_bmap_del_extent_cow(
got_endoff = got->br_startoff + got->br_blockcount;
ASSERT(*idx >= 0);
- ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(*idx <= xfs_iext_count(ifp));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
@@ -5119,8 +5070,7 @@ xfs_bmap_del_extent(
state |= BMAP_COWFORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
- (uint)sizeof(xfs_bmbt_rec_t)));
+ ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
ASSERT(del->br_blockcount > 0);
ep = xfs_iext_get_ext(ifp, *idx);
xfs_bmbt_get_all(ep, &got);
@@ -5434,8 +5384,6 @@ __xfs_bunmapi(
{
xfs_btree_cur_t *cur; /* bmap btree cursor */
xfs_bmbt_irec_t del; /* extent being deleted */
- int eof; /* is deleting at eof */
- xfs_bmbt_rec_host_t *ep; /* extent record pointer */
int error; /* error return value */
xfs_extnum_t extno; /* extent number in list */
xfs_bmbt_irec_t got; /* current extent record */
@@ -5445,8 +5393,6 @@ __xfs_bunmapi(
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
xfs_mount_t *mp; /* mount structure */
- xfs_extnum_t nextents; /* number of file extents */
- xfs_bmbt_irec_t prev; /* previous extent record */
xfs_fileoff_t start; /* first file offset deleted */
int tmp_logflags; /* partial logging flags */
int wasdel; /* was a delayed alloc extent */
@@ -5477,8 +5423,7 @@ __xfs_bunmapi(
if (!(ifp->if_flags & XFS_IFEXTENTS) &&
(error = xfs_iread_extents(tp, ip, whichfork)))
return error;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- if (nextents == 0) {
+ if (xfs_iext_count(ifp) == 0) {
*rlen = 0;
return 0;
}
@@ -5486,18 +5431,17 @@ __xfs_bunmapi(
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
start = bno;
bno = start + len - 1;
- ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
- &prev);
/*
* Check to see if the given block number is past the end of the
* file, back up to the last block if so...
*/
- if (eof) {
- ep = xfs_iext_get_ext(ifp, --lastx);
- xfs_bmbt_get_all(ep, &got);
+ if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) {
+ ASSERT(lastx > 0);
+ xfs_iext_get_extent(ifp, --lastx, &got);
bno = got.br_startoff + got.br_blockcount - 1;
}
+
logflags = 0;
if (ifp->if_flags & XFS_IFBROOT) {
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
@@ -5528,8 +5472,7 @@ __xfs_bunmapi(
if (got.br_startoff > bno) {
if (--lastx < 0)
break;
- ep = xfs_iext_get_ext(ifp, lastx);
- xfs_bmbt_get_all(ep, &got);
+ xfs_iext_get_extent(ifp, lastx, &got);
}
/*
* Is the last block of this extent before the range
@@ -5543,7 +5486,6 @@ __xfs_bunmapi(
* Then deal with the (possibly delayed) allocated space
* we found.
*/
- ASSERT(ep != NULL);
del = got;
wasdel = isnullstartblock(del.br_startblock);
if (got.br_startoff < start) {
@@ -5624,15 +5566,12 @@ __xfs_bunmapi(
*/
ASSERT(bno >= del.br_blockcount);
bno -= del.br_blockcount;
- if (got.br_startoff > bno) {
- if (--lastx >= 0) {
- ep = xfs_iext_get_ext(ifp,
- lastx);
- xfs_bmbt_get_all(ep, &got);
- }
- }
+ if (got.br_startoff > bno && --lastx >= 0)
+ xfs_iext_get_extent(ifp, lastx, &got);
continue;
} else if (del.br_state == XFS_EXT_UNWRITTEN) {
+ struct xfs_bmbt_irec prev;
+
/*
* This one is already unwritten.
* It must have a written left neighbor.
@@ -5640,8 +5579,7 @@ __xfs_bunmapi(
* try again.
*/
ASSERT(lastx > 0);
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
- lastx - 1), &prev);
+ xfs_iext_get_extent(ifp, lastx - 1, &prev);
ASSERT(prev.br_state == XFS_EXT_NORM);
ASSERT(!isnullstartblock(prev.br_startblock));
ASSERT(del.br_startblock ==
@@ -5739,13 +5677,9 @@ nodelete:
*/
if (bno != (xfs_fileoff_t)-1 && bno >= start) {
if (lastx >= 0) {
- ep = xfs_iext_get_ext(ifp, lastx);
- if (xfs_bmbt_get_startoff(ep) > bno) {
- if (--lastx >= 0)
- ep = xfs_iext_get_ext(ifp,
- lastx);
- }
- xfs_bmbt_get_all(ep, &got);
+ xfs_iext_get_extent(ifp, lastx, &got);
+ if (got.br_startoff > bno && --lastx >= 0)
+ xfs_iext_get_extent(ifp, lastx, &got);
}
extno++;
}
@@ -5963,7 +5897,7 @@ xfs_bmse_shift_one(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ total_extents = xfs_iext_count(ifp);
xfs_bmbt_get_all(gotp, &got);
@@ -6140,7 +6074,7 @@ xfs_bmap_shift_extents(
* are collapsing out, so we cannot use the count of real extents here.
* Instead we have to calculate it from the incore fork.
*/
- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ total_extents = xfs_iext_count(ifp);
if (total_extents == 0) {
*done = 1;
goto del_cursor;
@@ -6200,7 +6134,7 @@ xfs_bmap_shift_extents(
* count can change. Update the total and grade the next record.
*/
if (direction == SHIFT_LEFT) {
- total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ total_extents = xfs_iext_count(ifp);
stop_extent = total_extents;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 7cae6ec27fa6..cecd094404cc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -237,14 +237,9 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_defer_ops *dfops, enum shift_direction direction,
int num_exts);
int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
-struct xfs_bmbt_rec_host *
- xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno,
- int fork, int *eofp, xfs_extnum_t *lastxp,
- struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
- xfs_fileoff_t aoff, xfs_filblks_t len,
- struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev,
- xfs_extnum_t *lastx, int eof);
+ xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+ struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
enum xfs_bmap_intent_type {
XFS_BMAP_MAP = 1,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 8007d2ba9aef..d6330c297ca0 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -796,13 +796,14 @@ xfs_bmbt_init_cursor(
struct xfs_btree_cur *cur;
ASSERT(whichfork != XFS_COW_FORK);
- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
cur->bc_btnum = XFS_BTNUM_BMAP;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
cur->bc_ops = &xfs_bmbt_ops;
cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 0e80993c8a59..21e6a6ab6b9a 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block(
if (error)
return error;
+ /* Check the inode owner since the verifiers don't. */
+ if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
+ (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
+ be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
+ cur->bc_private.b.ip->i_ino)
+ goto out_bad;
+
+ /* Did we get the level we were looking for? */
+ if (be16_to_cpu((*blkp)->bb_level) != level)
+ goto out_bad;
+
+ /* Check that internal nodes have at least one record. */
+ if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
+ goto out_bad;
+
xfs_btree_setbuf(cur, level, bp);
return 0;
+
+out_bad:
+ *blkp = NULL;
+ xfs_trans_brelse(cur->bc_tp, bp);
+ return -EFSCORRUPTED;
}
/*
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index c2b01d1c79ee..b69b947c4c1b 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -96,46 +96,10 @@ union xfs_btree_rec {
/*
* Generic stats interface
*/
-#define __XFS_BTREE_STATS_INC(mp, type, stat) \
- XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat)
#define XFS_BTREE_STATS_INC(cur, stat) \
-do { \
- struct xfs_mount *__mp = cur->bc_mp; \
- switch (cur->bc_btnum) { \
- case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \
- case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \
- case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
- case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
- case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
- case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
- case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(__mp, refcbt, stat); break; \
- case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
- } \
-} while (0)
-
-#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \
- XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val)
-#define XFS_BTREE_STATS_ADD(cur, stat, val) \
-do { \
- struct xfs_mount *__mp = cur->bc_mp; \
- switch (cur->bc_btnum) { \
- case XFS_BTNUM_BNO: \
- __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \
- case XFS_BTNUM_CNT: \
- __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \
- case XFS_BTNUM_BMAP: \
- __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \
- case XFS_BTNUM_INO: \
- __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
- case XFS_BTNUM_FINO: \
- __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
- case XFS_BTNUM_RMAP: \
- __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
- case XFS_BTNUM_REFC: \
- __XFS_BTREE_STATS_ADD(__mp, refcbt, stat, val); break; \
- case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
- } \
-} while (0)
+ XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat)
+#define XFS_BTREE_STATS_ADD(cur, stat, val) \
+ XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val)
#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */
@@ -253,6 +217,7 @@ typedef struct xfs_btree_cur
__uint8_t bc_nlevels; /* number of levels in the tree */
__uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
+ int bc_statoff; /* offset of btre stats array */
union {
struct { /* needed for BNO, CNT, INO */
struct xfs_buf *agbp; /* agf/agi buffer pointer */
diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index fad1676ad8cd..a416c7cb23ea 100644
--- a/fs/xfs/libxfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
@@ -6,10 +6,11 @@
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
* inside it. The offset of the 32bit crc fields is passed as the
- * cksum_offset parameter.
+ * cksum_offset parameter. We do not modify the buffer during verification,
+ * hence we have to split the CRC calculation across the cksum_offset.
*/
static inline __uint32_t
-xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
+xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t zero = 0;
__uint32_t crc;
@@ -26,6 +27,20 @@ xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
}
/*
+ * Fast CRC method where the buffer is modified. Callers must have exclusive
+ * access to the buffer while the calculation takes place.
+ */
+static inline __uint32_t
+xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
+{
+ /* zero the CRC field */
+ *(__le32 *)(buffer + cksum_offset) = 0;
+
+ /* single pass CRC calculation for the entire buffer */
+ return crc32c(XFS_CRC_SEED, buffer, length);
+}
+
+/*
* Convert the intermediate checksum to the final ondisk format.
*
* The CRC32c calculation uses LE format even on BE machines, but returns the
@@ -40,11 +55,14 @@ xfs_end_cksum(__uint32_t crc)
/*
* Helper to generate the checksum for a buffer.
+ *
+ * This modifies the buffer temporarily - callers must have exclusive
+ * access to the buffer while the calculation takes place.
*/
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
+ __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
@@ -55,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
- __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
+ __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 20a96dd5af7e..c58d72c220f5 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -93,7 +93,7 @@ xfs_ascii_ci_compname(
return result;
}
-static struct xfs_nameops xfs_ascii_ci_nameops = {
+static const struct xfs_nameops xfs_ascii_ci_nameops = {
.hashname = xfs_ascii_ci_hashname,
.compname = xfs_ascii_ci_compname,
};
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index becc926c3e3d..0197590fa7d7 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -157,6 +157,9 @@ extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
+extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo,
+ const struct xfs_dir_ops *ops,
+ struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
@@ -177,6 +180,8 @@ extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
struct xfs_dir2_data_unused *dup);
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 725fc7841fde..d478065b9544 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -329,7 +329,7 @@ xfs_dir3_data_read(
err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
- if (!err && tp)
+ if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
return err;
}
@@ -505,8 +505,9 @@ xfs_dir2_data_freeremove(
* Given a data block, reconstruct its bestfree map.
*/
void
-xfs_dir2_data_freescan(
- struct xfs_inode *dp,
+xfs_dir2_data_freescan_int(
+ struct xfs_da_geometry *geo,
+ const struct xfs_dir_ops *ops,
struct xfs_dir2_data_hdr *hdr,
int *loghead)
{
@@ -516,7 +517,6 @@ xfs_dir2_data_freescan(
struct xfs_dir2_data_free *bf;
char *endp; /* end of block's data */
char *p; /* current entry pointer */
- struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -526,13 +526,13 @@ xfs_dir2_data_freescan(
/*
* Start by clearing the table.
*/
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = ops->data_bestfree_p(hdr);
memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
*loghead = 1;
/*
* Set up pointers.
*/
- p = (char *)dp->d_ops->data_entry_p(hdr);
+ p = (char *)ops->data_entry_p(hdr);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
btp = xfs_dir2_block_tail_p(geo, hdr);
@@ -559,12 +559,22 @@ xfs_dir2_data_freescan(
else {
dep = (xfs_dir2_data_entry_t *)p;
ASSERT((char *)dep - (char *)hdr ==
- be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
- p += dp->d_ops->data_entsize(dep->namelen);
+ be16_to_cpu(*ops->data_entry_tag_p(dep)));
+ p += ops->data_entsize(dep->namelen);
}
}
}
+void
+xfs_dir2_data_freescan(
+ struct xfs_inode *dp,
+ struct xfs_dir2_data_hdr *hdr,
+ int *loghead)
+{
+ return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops,
+ hdr, loghead);
+}
+
/*
* Initialize a data block at the given block number in the directory.
* Give back the buffer for the created block.
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index ef9f6ead96a4..d04547fcf274 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -21,7 +21,6 @@
struct dir_context;
/* xfs_dir2.c */
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);
extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 51b4e0de1fdc..f272abff11e1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2344,7 +2344,8 @@ xfs_imap(
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
imap->im_len = XFS_FSB_TO_BB(mp, 1);
- imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
+ imap->im_boffset = (unsigned short)(offset <<
+ mp->m_sb.sb_inodelog);
return 0;
}
@@ -2372,7 +2373,7 @@ out_map:
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
- imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
+ imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
/*
* If the inode number maps to a block outside the bounds
@@ -2450,8 +2451,6 @@ xfs_ialloc_log_agi(
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
-
/*
* Compute byte offsets for the first and last fields in the first
* region and log the agi buffer. This only logs up through
@@ -2512,8 +2511,15 @@ xfs_agi_verify(
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
return false;
- if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
+ if (be32_to_cpu(agi->agi_level) < 1 ||
+ be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
+ return false;
+
+ if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+ (be32_to_cpu(agi->agi_free_level) < 1 ||
+ be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
return false;
+
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -2592,6 +2598,8 @@ xfs_read_agi(
XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
if (error)
return error;
+ if (tp)
+ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF);
xfs_buf_set_ref(*bpp, XFS_AGI_REF);
return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index eab68ae2e011..0fd086d03d41 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -357,7 +357,7 @@ xfs_inobt_init_cursor(
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
struct xfs_btree_cur *cur;
- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
cur->bc_tp = tp;
cur->bc_mp = mp;
@@ -365,9 +365,11 @@ xfs_inobt_init_cursor(
if (btnum == XFS_BTNUM_INO) {
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
cur->bc_ops = &xfs_inobt_ops;
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2);
} else {
cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
cur->bc_ops = &xfs_finobt_ops;
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2);
}
cur->bc_blocklog = mp->m_sb.sb_blocklog;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 134424fac434..dd483e2767f7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -383,7 +383,7 @@ xfs_log_dinode_to_disk(
static bool
xfs_dinode_verify(
struct xfs_mount *mp,
- struct xfs_inode *ip,
+ xfs_ino_t ino,
struct xfs_dinode *dip)
{
uint16_t flags;
@@ -392,6 +392,14 @@ xfs_dinode_verify(
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
return false;
+ /* don't allow invalid i_size */
+ if (be64_to_cpu(dip->di_size) & (1ULL << 63))
+ return false;
+
+ /* No zero-length symlinks. */
+ if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
+ return false;
+
/* only version 3 or greater inodes are extensively verified here */
if (dip->di_version < 3)
return true;
@@ -401,7 +409,7 @@ xfs_dinode_verify(
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF))
return false;
- if (be64_to_cpu(dip->di_ino) != ip->i_ino)
+ if (be64_to_cpu(dip->di_ino) != ino)
return false;
if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
return false;
@@ -436,7 +444,7 @@ xfs_dinode_calc_crc(
return;
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
- crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
+ crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
XFS_DINODE_CRC_OFF);
dip->di_crc = xfs_end_cksum(crc);
}
@@ -493,7 +501,7 @@ xfs_iread(
return error;
/* even unallocated inodes are verified */
- if (!xfs_dinode_verify(mp, ip, dip)) {
+ if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
xfs_alert(mp, "%s: validation failed for inode %lld failed",
__func__, ip->i_ino);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 3cfe12a4f58a..6848a0afbce7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -58,8 +58,8 @@ struct xfs_icdinode {
*/
struct xfs_imap {
xfs_daddr_t im_blkno; /* starting BB of inode chunk */
- ushort im_len; /* length in BBs of inode chunk */
- ushort im_boffset; /* inode offset in block in bytes */
+ unsigned short im_len; /* length in BBs of inode chunk */
+ unsigned short im_boffset; /* inode offset in block in bytes */
};
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 5dd56d3dbb3a..222e103356c6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -775,6 +775,13 @@ xfs_idestroy_fork(
}
}
+/* Count number of incore extents based on if_bytes */
+xfs_extnum_t
+xfs_iext_count(struct xfs_ifork *ifp)
+{
+ return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+}
+
/*
* Convert in-core extents to on-disk form
*
@@ -803,7 +810,7 @@ xfs_iextents_copy(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
- nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nrecs = xfs_iext_count(ifp);
XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
ASSERT(nrecs > 0);
@@ -941,7 +948,7 @@ xfs_iext_get_ext(
xfs_extnum_t idx) /* index of target extent */
{
ASSERT(idx >= 0);
- ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+ ASSERT(idx < xfs_iext_count(ifp));
if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
return ifp->if_u1.if_ext_irec->er_extbuf;
@@ -1017,7 +1024,7 @@ xfs_iext_add(
int new_size; /* size of extents after adding */
xfs_extnum_t nextents; /* number of extents in file */
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
ASSERT((idx >= 0) && (idx <= nextents));
byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
new_size = ifp->if_bytes + byte_diff;
@@ -1241,7 +1248,7 @@ xfs_iext_remove(
trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
ASSERT(ext_diff > 0);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
if (new_size == 0) {
@@ -1270,7 +1277,7 @@ xfs_iext_remove_inline(
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
ASSERT(idx < XFS_INLINE_EXTS);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
ASSERT(((nextents - ext_diff) > 0) &&
(nextents - ext_diff) < XFS_INLINE_EXTS);
@@ -1309,7 +1316,7 @@ xfs_iext_remove_direct(
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
new_size = ifp->if_bytes -
(ext_diff * sizeof(xfs_bmbt_rec_t));
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
if (new_size == 0) {
xfs_iext_destroy(ifp);
@@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct(
int size; /* size of file extents */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
ASSERT(nextents <= XFS_LINEAR_EXTS);
size = nextents * sizeof(xfs_bmbt_rec_t);
@@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext(
xfs_extnum_t nextents; /* number of file extents */
xfs_fileoff_t startoff = 0; /* start offset of extent */
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
if (nextents == 0) {
*idxp = 0;
return NULL;
@@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec(
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
ASSERT(page_idx >= 0);
- ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
- ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+ ASSERT(page_idx <= xfs_iext_count(ifp));
+ ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
erp_idx = 0;
@@ -1782,7 +1789,7 @@ xfs_iext_irec_init(
xfs_extnum_t nextents; /* number of extents in file */
ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
ASSERT(nextents <= XFS_LINEAR_EXTS);
erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
@@ -1906,7 +1913,7 @@ xfs_iext_irec_compact(
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
if (nextents == 0) {
xfs_iext_destroy(ifp);
@@ -1996,3 +2003,49 @@ xfs_ifork_init_cow(
ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
ip->i_cnextents = 0;
}
+
+/*
+ * Lookup the extent covering bno.
+ *
+ * If there is an extent covering bno return the extent index, and store the
+ * expanded extent structure in *gotp, and the extent index in *idx.
+ * If there is no extent covering bno, but there is an extent after it (e.g.
+ * it lies in a hole) return that extent in *gotp and its index in *idx
+ * instead.
+ * If bno is beyond the last extent return false, and return the index after
+ * the last valid index in *idxp.
+ */
+bool
+xfs_iext_lookup_extent(
+ struct xfs_inode *ip,
+ struct xfs_ifork *ifp,
+ xfs_fileoff_t bno,
+ xfs_extnum_t *idxp,
+ struct xfs_bmbt_irec *gotp)
+{
+ struct xfs_bmbt_rec_host *ep;
+
+ XFS_STATS_INC(ip->i_mount, xs_look_exlist);
+
+ ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
+ if (!ep)
+ return false;
+ xfs_bmbt_get_all(ep, gotp);
+ return true;
+}
+
+/*
+ * Return true if there is an extent at index idx, and return the expanded
+ * extent structure at idx in that case. Else return false.
+ */
+bool
+xfs_iext_get_extent(
+ struct xfs_ifork *ifp,
+ xfs_extnum_t idx,
+ struct xfs_bmbt_irec *gotp)
+{
+ if (idx < 0 || idx >= xfs_iext_count(ifp))
+ return false;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
+ return true;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index c9476f50e32d..7fb8365326d1 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
struct xfs_bmbt_rec_host *
xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
+xfs_extnum_t xfs_iext_count(struct xfs_ifork *);
void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
struct xfs_bmbt_irec *, int);
void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
@@ -181,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *);
void xfs_iext_irec_compact_full(struct xfs_ifork *);
void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
+bool xfs_iext_lookup_extent(struct xfs_inode *ip,
+ struct xfs_ifork *ifp, xfs_fileoff_t bno,
+ xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
+bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
+ struct xfs_bmbt_irec *gotp);
+
extern struct kmem_zone *xfs_ifork_zone;
extern void xfs_ifork_init_cow(struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 083cdd6d6c28..7ae571f8e34a 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -481,8 +481,8 @@ static inline uint xfs_log_dinode_size(int version)
typedef struct xfs_buf_log_format {
unsigned short blf_type; /* buf log item type indicator */
unsigned short blf_size; /* size of this item */
- ushort blf_flags; /* misc state */
- ushort blf_len; /* number of blocks in this buf */
+ unsigned short blf_flags; /* misc state */
+ unsigned short blf_len; /* number of blocks in this buf */
__int64_t blf_blkno; /* starting blkno of this buf */
unsigned int blf_map_size; /* used size of data bitmap in words */
unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 8e385f91d660..d9f65e2d5cc8 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -52,7 +52,7 @@ typedef struct xlog_recover {
struct list_head r_itemq; /* q for items */
} xlog_recover_t;
-#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
+#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
/*
* This is the number of entries in the l_buf_cancel_table used during
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 453bb2757ec2..6fb2215f8ff7 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -354,6 +354,7 @@ xfs_refcountbt_init_cursor(
cur->bc_btnum = XFS_BTNUM_REFC;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_refcountbt_ops;
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2);
cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 83e672ff7577..de25771764ba 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -484,6 +484,7 @@ xfs_rmapbt_init_cursor(
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_rmapbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index e2e1106c9fad..ea45584a9913 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1016,4 +1016,3 @@ xfs_rtfree_extent(
}
return 0;
}
-
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a70aec910626..2580262e4ea0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -262,6 +262,12 @@ xfs_mount_validate_sb(
return -EFSCORRUPTED;
}
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
+ xfs_notice(mp, "v5 SB sanity check failed");
+ return -EFSCORRUPTED;
+ }
+
/*
* Until this is fixed only page-sized or smaller data blocks work.
*/
@@ -338,13 +344,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
- if (sbp->sb_qflags & XFS_PQUOTA_ACCT) {
+ if (sbp->sb_qflags & XFS_PQUOTA_ACCT &&
+ sbp->sb_gquotino != NULLFSINO) {
/*
* In older version of superblock, on-disk superblock only
* has sb_gquotino, and in-core superblock has both sb_gquotino
* and sb_pquotino. But, only one of them is supported at any
* point of time. So, if PQUOTA is set in disk superblock,
- * copy over sb_gquotino to sb_pquotino.
+ * copy over sb_gquotino to sb_pquotino. The NULLFSINO test
+ * above is to make sure we don't do this twice and wipe them
+ * both out!
*/
sbp->sb_pquotino = sbp->sb_gquotino;
sbp->sb_gquotino = NULLFSINO;
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 8d74870468c2..717909f2f7b7 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -57,7 +57,6 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
#define NULLAGBLOCK ((xfs_agblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
-#define NULLEXTNUM ((xfs_extnum_t)-1)
#define NULLCOMMITLSN ((xfs_lsn_t)-1)
@@ -75,11 +74,14 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
* Minimum and maximum blocksize and sectorsize.
* The blocksize upper limit is pretty much arbitrary.
* The sectorsize upper limit is due to sizeof(sb_sectsize).
+ * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes
+ * cannot be used.
*/
#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
+#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1))
#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3e57a56cf829..0f56fcd3a5d5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -37,11 +37,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
-/* flags for direct write completions */
-#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
-#define XFS_DIO_FLAG_APPEND (1 << 1)
-#define XFS_DIO_FLAG_COW (1 << 2)
-
/*
* structure owned by writepages passed to individual writepage calls
*/
@@ -495,8 +490,8 @@ xfs_submit_ioend(
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+
/*
* If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
@@ -567,8 +562,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
@@ -777,7 +771,7 @@ xfs_map_cow(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_bmbt_irec imap;
- bool is_cow = false, need_alloc = false;
+ bool is_cow = false;
int error;
/*
@@ -795,7 +789,7 @@ xfs_map_cow(
* Else we need to check if there is a COW mapping at this offset.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
- is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
+ is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!is_cow)
@@ -805,7 +799,7 @@ xfs_map_cow(
* And if the COW mapping has a delayed extent here we need to
* allocate real space for it now.
*/
- if (need_alloc) {
+ if (isnullstartblock(imap.br_startblock)) {
error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
&imap);
if (error)
@@ -1176,45 +1170,6 @@ xfs_vm_releasepage(
}
/*
- * When we map a DIO buffer, we may need to pass flags to
- * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
- *
- * Note that for DIO, an IO to the highest supported file block offset (i.e.
- * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
- * bit variable. Hence if we see this overflow, we have to assume that the IO is
- * extending the file size. We won't know for sure until IO completion is run
- * and the actual max write offset is communicated to the IO completion
- * routine.
- */
-static void
-xfs_map_direct(
- struct inode *inode,
- struct buffer_head *bh_result,
- struct xfs_bmbt_irec *imap,
- xfs_off_t offset,
- bool is_cow)
-{
- uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
- xfs_off_t size = bh_result->b_size;
-
- trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
- ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
- XFS_IO_OVERWRITE, imap);
-
- if (ISUNWRITTEN(imap)) {
- *flags |= XFS_DIO_FLAG_UNWRITTEN;
- set_buffer_defer_completion(bh_result);
- } else if (is_cow) {
- *flags |= XFS_DIO_FLAG_COW;
- set_buffer_defer_completion(bh_result);
- }
- if (offset + size > i_size_read(inode) || offset + size < 0) {
- *flags |= XFS_DIO_FLAG_APPEND;
- set_buffer_defer_completion(bh_result);
- }
-}
-
-/*
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
* is, so that we can avoid repeated get_blocks calls.
*
@@ -1254,52 +1209,12 @@ xfs_map_trim_size(
bh_result->b_size = mapping_size;
}
-/* Bounce unaligned directio writes to the page cache. */
static int
-xfs_bounce_unaligned_dio_write(
- struct xfs_inode *ip,
- xfs_fileoff_t offset_fsb,
- struct xfs_bmbt_irec *imap)
-{
- struct xfs_bmbt_irec irec;
- xfs_fileoff_t delta;
- bool shared;
- bool x;
- int error;
-
- irec = *imap;
- if (offset_fsb > irec.br_startoff) {
- delta = offset_fsb - irec.br_startoff;
- irec.br_blockcount -= delta;
- irec.br_startblock += delta;
- irec.br_startoff = offset_fsb;
- }
- error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
- if (error)
- return error;
-
- /*
- * We're here because we're trying to do a directio write to a
- * region that isn't aligned to a filesystem block. If any part
- * of the extent is shared, fall back to buffered mode to handle
- * the RMW. This is done by returning -EREMCHG ("remote addr
- * changed"), which is caught further up the call stack.
- */
- if (shared) {
- trace_xfs_reflink_bounce_dio_write(ip, imap);
- return -EREMCHG;
- }
- return 0;
-}
-
-STATIC int
-__xfs_get_blocks(
+xfs_get_blocks(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
- int create,
- bool direct,
- bool dax_fault)
+ int create)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -1310,11 +1225,8 @@ __xfs_get_blocks(
int nimaps = 1;
xfs_off_t offset;
ssize_t size;
- int new = 0;
- bool is_cow = false;
- bool need_alloc = false;
- BUG_ON(create && !direct);
+ BUG_ON(create);
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -1323,7 +1235,7 @@ __xfs_get_blocks(
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size;
- if (!create && offset >= i_size_read(inode))
+ if (offset >= i_size_read(inode))
return 0;
/*
@@ -1338,52 +1250,12 @@ __xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
- if (create && direct && xfs_is_reflink_inode(ip))
- is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
- &need_alloc);
- if (!is_cow) {
- error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- &imap, &nimaps, XFS_BMAPI_ENTIRE);
- /*
- * Truncate an overwrite extent if there's a pending CoW
- * reservation before the end of this extent. This
- * forces us to come back to get_blocks to take care of
- * the CoW.
- */
- if (create && direct && nimaps &&
- imap.br_startblock != HOLESTARTBLOCK &&
- imap.br_startblock != DELAYSTARTBLOCK &&
- !ISUNWRITTEN(&imap))
- xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
- &imap);
- }
- ASSERT(!need_alloc);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
- /* for DAX, we convert unwritten extents directly */
- if (create &&
- (!nimaps ||
- (imap.br_startblock == HOLESTARTBLOCK ||
- imap.br_startblock == DELAYSTARTBLOCK) ||
- (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
- /*
- * xfs_iomap_write_direct() expects the shared lock. It
- * is unlocked on return.
- */
- if (lockmode == XFS_ILOCK_EXCL)
- xfs_ilock_demote(ip, lockmode);
-
- error = xfs_iomap_write_direct(ip, offset, size,
- &imap, nimaps);
- if (error)
- return error;
- new = 1;
-
- trace_xfs_get_blocks_alloc(ip, offset, size,
- ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
- : XFS_IO_DELALLOC, &imap);
- } else if (nimaps) {
+ if (nimaps) {
trace_xfs_get_blocks_found(ip, offset, size,
ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
: XFS_IO_OVERWRITE, &imap);
@@ -1393,12 +1265,6 @@ __xfs_get_blocks(
goto out_unlock;
}
- if (IS_DAX(inode) && create) {
- ASSERT(!ISUNWRITTEN(&imap));
- /* zeroing is not needed at a higher layer */
- new = 0;
- }
-
/* trim mapping down to size requested */
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
@@ -1408,50 +1274,14 @@ __xfs_get_blocks(
*/
if (imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK &&
- (create || !ISUNWRITTEN(&imap))) {
- if (create && direct && !is_cow) {
- error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
- &imap);
- if (error)
- return error;
- }
-
+ !ISUNWRITTEN(&imap))
xfs_map_buffer(inode, bh_result, &imap, offset);
- if (ISUNWRITTEN(&imap))
- set_buffer_unwritten(bh_result);
- /* direct IO needs special help */
- if (create) {
- if (dax_fault)
- ASSERT(!ISUNWRITTEN(&imap));
- else
- xfs_map_direct(inode, bh_result, &imap, offset,
- is_cow);
- }
- }
/*
* If this is a realtime file, data may be on a different device.
* to that pointed to from the buffer_head b_bdev currently.
*/
bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
- /*
- * If we previously allocated a block out beyond eof and we are now
- * coming back to use it then we will need to flag it as new even if it
- * has a disk address.
- *
- * With sub-block writes into unwritten extents we also need to mark
- * the buffer as new so that the unwritten parts of the buffer gets
- * correctly zeroed.
- */
- if (create &&
- ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
- (offset >= i_size_read(inode)) ||
- (new || ISUNWRITTEN(&imap))))
- set_buffer_new(bh_result);
-
- BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
-
return 0;
out_unlock:
@@ -1459,110 +1289,6 @@ out_unlock:
return error;
}
-int
-xfs_get_blocks(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
-}
-
-int
-xfs_get_blocks_direct(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
-}
-
-int
-xfs_get_blocks_dax_fault(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * xfs_map_direct passes us some flags in the private data to tell us what to
- * do. If no flags are set, then the write IO is an overwrite wholly within
- * the existing allocated file size and so there is nothing for us to do.
- *
- * Note that in this case the completion can be called in interrupt context,
- * whereas if we have flags set we will always be called in task context
- * (i.e. from a workqueue).
- */
-int
-xfs_end_io_direct_write(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct xfs_inode *ip = XFS_I(inode);
- uintptr_t flags = (uintptr_t)private;
- int error = 0;
-
- trace_xfs_end_io_direct_write(ip, offset, size);
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- if (size <= 0)
- return size;
-
- /*
- * The flags tell us whether we are doing unwritten extent conversions
- * or an append transaction that updates the on-disk file size. These
- * cases are the only cases where we should *potentially* be needing
- * to update the VFS inode size.
- */
- if (flags == 0) {
- ASSERT(offset + size <= i_size_read(inode));
- return 0;
- }
-
- /*
- * We need to update the in-core inode size here so that we don't end up
- * with the on-disk inode size being outside the in-core inode size. We
- * have no other method of updating EOF for AIO, so always do it here
- * if necessary.
- *
- * We need to lock the test/set EOF update as we can be racing with
- * other IO completions here to update the EOF. Failing to serialise
- * here can result in EOF moving backwards and Bad Things Happen when
- * that occurs.
- */
- spin_lock(&ip->i_flags_lock);
- if (offset + size > i_size_read(inode))
- i_size_write(inode, offset + size);
- spin_unlock(&ip->i_flags_lock);
-
- if (flags & XFS_DIO_FLAG_COW)
- error = xfs_reflink_end_cow(ip, offset, size);
- if (flags & XFS_DIO_FLAG_UNWRITTEN) {
- trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
-
- error = xfs_iomap_write_unwritten(ip, offset, size);
- }
- if (flags & XFS_DIO_FLAG_APPEND) {
- trace_xfs_end_io_direct_write_append(ip, offset, size);
-
- error = xfs_setfilesize(ip, offset, size);
- }
-
- return error;
-}
-
STATIC ssize_t
xfs_vm_direct_IO(
struct kiocb *iocb,
@@ -1583,7 +1309,6 @@ xfs_vm_bmap(
struct xfs_inode *ip = XFS_I(inode);
trace_xfs_vm_bmap(XFS_I(inode));
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
/*
* The swap code (ab-)uses ->bmap to get a block mapping and then
@@ -1591,12 +1316,10 @@ xfs_vm_bmap(
* that on reflinks inodes, so we have to skip out here. And yes,
* 0 is the magic code for a bmap error..
*/
- if (xfs_is_reflink_inode(ip)) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ if (xfs_is_reflink_inode(ip))
return 0;
- }
+
filemap_write_and_wait(mapping);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return generic_block_bmap(mapping, block, xfs_get_blocks);
}
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index b3c6634f9518..cc174ec6c2fd 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -55,15 +55,6 @@ struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations;
-int xfs_get_blocks(struct inode *inode, sector_t offset,
- struct buffer_head *map_bh, int create);
-int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
- struct buffer_head *map_bh, int create);
-int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
- struct buffer_head *map_bh, int create);
-
-int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private);
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
extern void xfs_count_page_state(struct page *, int *, int *);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index e3da5d448bcf..d14691aa02b4 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -112,8 +112,8 @@ typedef struct attrlist_cursor_kern {
*========================================================================*/
-/* Return 0 on success, or -errno; other state communicated via *context */
-typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
+/* void; state communicated via *context */
+typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 25e76cd6c053..97c45b6eb91e 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -74,7 +74,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
xfs_attr_sf_entry_t *sfe;
xfs_inode_t *dp;
int sbsize, nsbuf, count, i;
- int error;
ASSERT(context != NULL);
dp = context->dp;
@@ -102,13 +101,11 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(XFS_ISRESET_CURSOR(cursor) &&
(dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
- error = context->put_listent(context,
- sfe->flags,
- sfe->nameval,
- (int)sfe->namelen,
- (int)sfe->valuelen);
- if (error)
- return error;
+ context->put_listent(context,
+ sfe->flags,
+ sfe->nameval,
+ (int)sfe->namelen,
+ (int)sfe->valuelen);
/*
* Either search callback finished early or
* didn't fit it all in the buffer after all.
@@ -193,15 +190,11 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
- error = context->put_listent(context,
- sbp->flags,
- sbp->name,
- sbp->namelen,
- sbp->valuelen);
- if (error) {
- kmem_free(sbuf);
- return error;
- }
+ context->put_listent(context,
+ sbp->flags,
+ sbp->name,
+ sbp->namelen,
+ sbp->valuelen);
if (context->seen_enough)
break;
cursor->offset++;
@@ -335,11 +328,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
for (;;) {
leaf = bp->b_addr;
- error = xfs_attr3_leaf_list_int(bp, context);
- if (error) {
- xfs_trans_brelse(NULL, bp);
- return error;
- }
+ xfs_attr3_leaf_list_int(bp, context);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (context->seen_enough || leafhdr.forw == 0)
break;
@@ -356,7 +345,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
/*
* Copy out attribute list entries for attr_list(), for leaf attribute lists.
*/
-int
+void
xfs_attr3_leaf_list_int(
struct xfs_buf *bp,
struct xfs_attr_list_context *context)
@@ -366,7 +355,6 @@ xfs_attr3_leaf_list_int(
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entries;
struct xfs_attr_leaf_entry *entry;
- int retval;
int i;
struct xfs_mount *mp = context->dp->i_mount;
@@ -399,7 +387,7 @@ xfs_attr3_leaf_list_int(
}
if (i == ichdr.count) {
trace_xfs_attr_list_notfound(context);
- return 0;
+ return;
}
} else {
entry = &entries[0];
@@ -410,7 +398,6 @@ xfs_attr3_leaf_list_int(
/*
* We have found our place, start copying out the new attributes.
*/
- retval = 0;
for (; i < ichdr.count; entry++, i++) {
char *name;
int namelen, valuelen;
@@ -439,16 +426,14 @@ xfs_attr3_leaf_list_int(
valuelen = be32_to_cpu(name_rmt->valuelen);
}
- retval = context->put_listent(context, entry->flags,
+ context->put_listent(context, entry->flags,
name, namelen, valuelen);
- if (retval)
- break;
if (context->seen_enough)
break;
cursor->offset++;
}
trace_xfs_attr_list_leaf_end(context);
- return retval;
+ return;
}
/*
@@ -467,9 +452,9 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
if (error)
return error;
- error = xfs_attr3_leaf_list_int(bp, context);
+ xfs_attr3_leaf_list_int(bp, context);
xfs_trans_brelse(NULL, bp);
- return error;
+ return 0;
}
int
@@ -513,7 +498,7 @@ xfs_attr_list_int(
* Take care to check values and protect against them changing later,
* we may be reading them directly out of a user buffer.
*/
-STATIC int
+STATIC void
xfs_attr_put_listent(
xfs_attr_list_context_t *context,
int flags,
@@ -536,10 +521,10 @@ xfs_attr_put_listent(
*/
if (((context->flags & ATTR_SECURE) == 0) !=
((flags & XFS_ATTR_SECURE) == 0))
- return 0;
+ return;
if (((context->flags & ATTR_ROOT) == 0) !=
((flags & XFS_ATTR_ROOT) == 0))
- return 0;
+ return;
arraytop = sizeof(*alist) +
context->count * sizeof(alist->al_offset[0]);
@@ -548,7 +533,7 @@ xfs_attr_put_listent(
trace_xfs_attr_list_full(context);
alist->al_more = 1;
context->seen_enough = 1;
- return 0;
+ return;
}
aep = (attrlist_ent_t *)&context->alist[context->firstu];
@@ -558,7 +543,7 @@ xfs_attr_put_listent(
alist->al_offset[context->count++] = context->firstu;
alist->al_count = context->count;
trace_xfs_attr_list_add(context);
- return 0;
+ return;
}
/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 552465e011ec..b9abce524c33 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -359,9 +359,7 @@ xfs_bmap_count_blocks(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
- xfs_bmap_count_leaves(ifp, 0,
- ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
- count);
+ xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
return 0;
}
@@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole(
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
- (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
+ (lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
}
@@ -1792,6 +1790,7 @@ xfs_swap_extent_forks(
struct xfs_ifork tempifp, *ifp, *tifp;
int aforkblks = 0;
int taforkblks = 0;
+ xfs_extnum_t nextents;
__uint64_t tmp;
int error;
@@ -1877,14 +1876,13 @@ xfs_swap_extent_forks(
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
- /* If the extents fit in the inode, fix the
- * pointer. Otherwise it's already NULL or
- * pointing to the extent.
+ /*
+ * If the extents fit in the inode, fix the pointer. Otherwise
+ * it's already NULL or pointing to the extent.
*/
- if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
- ifp->if_u1.if_extents =
- ifp->if_u2.if_inline_ext;
- }
+ nextents = xfs_iext_count(&ip->i_df);
+ if (nextents <= XFS_INLINE_EXTS)
+ ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
(*src_log_flags) |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
@@ -1896,14 +1894,13 @@ xfs_swap_extent_forks(
switch (tip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
- /* If the extents fit in the inode, fix the
- * pointer. Otherwise it's already NULL or
- * pointing to the extent.
+ /*
+ * If the extents fit in the inode, fix the pointer. Otherwise
+ * it's already NULL or pointing to the extent.
*/
- if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
- tifp->if_u1.if_extents =
- tifp->if_u2.if_inline_ext;
- }
+ nextents = xfs_iext_count(&tip->i_df);
+ if (nextents <= XFS_INLINE_EXTS)
+ tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
(*target_log_flags) |= XFS_ILOG_DEXT;
break;
case XFS_DINODE_FMT_BTREE:
@@ -1938,8 +1935,8 @@ xfs_swap_extents(
* page cache safely. Once we have done this we can take the ilocks and
* do the rest of the checks.
*/
- lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
- xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+ lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
+ lock_flags = XFS_MMAPLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
@@ -2079,15 +2076,13 @@ xfs_swap_extents(
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
+out_unlock:
xfs_iunlock(ip, lock_flags);
xfs_iunlock(tip, lock_flags);
+ unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
-
-out_unlock:
- xfs_iunlock(ip, lock_flags);
- xfs_iunlock(tip, lock_flags);
- return error;
+ goto out_unlock;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b5b9bffe3520..7f0a01f7b592 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -219,7 +219,6 @@ _xfs_buf_alloc(
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
- RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
XB_SET_OWNER(bp);
@@ -473,6 +472,62 @@ _xfs_buf_map_pages(
/*
* Finding and Reading Buffers
*/
+static int
+_xfs_buf_obj_cmp(
+ struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct xfs_buf_map *map = arg->key;
+ const struct xfs_buf *bp = obj;
+
+ /*
+ * The key hashing in the lookup path depends on the key being the
+ * first element of the compare_arg, make sure to assert this.
+ */
+ BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
+
+ if (bp->b_bn != map->bm_bn)
+ return 1;
+
+ if (unlikely(bp->b_length != map->bm_len)) {
+ /*
+ * found a block number match. If the range doesn't
+ * match, the only way this is allowed is if the buffer
+ * in the cache is stale and the transaction that made
+ * it stale has not yet committed. i.e. we are
+ * reallocating a busy extent. Skip this buffer and
+ * continue searching for an exact match.
+ */
+ ASSERT(bp->b_flags & XBF_STALE);
+ return 1;
+ }
+ return 0;
+}
+
+static const struct rhashtable_params xfs_buf_hash_params = {
+ .min_size = 32, /* empty AGs have minimal footprint */
+ .nelem_hint = 16,
+ .key_len = sizeof(xfs_daddr_t),
+ .key_offset = offsetof(struct xfs_buf, b_bn),
+ .head_offset = offsetof(struct xfs_buf, b_rhash_head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = _xfs_buf_obj_cmp,
+};
+
+int
+xfs_buf_hash_init(
+ struct xfs_perag *pag)
+{
+ spin_lock_init(&pag->pag_buf_lock);
+ return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+}
+
+void
+xfs_buf_hash_destroy(
+ struct xfs_perag *pag)
+{
+ rhashtable_destroy(&pag->pag_buf_hash);
+}
/*
* Look up, and creates if absent, a lockable buffer for
@@ -488,27 +543,24 @@ _xfs_buf_find(
xfs_buf_t *new_bp)
{
struct xfs_perag *pag;
- struct rb_node **rbp;
- struct rb_node *parent;
xfs_buf_t *bp;
- xfs_daddr_t blkno = map[0].bm_bn;
+ struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
xfs_daddr_t eofs;
- int numblks = 0;
int i;
for (i = 0; i < nmaps; i++)
- numblks += map[i].bm_len;
+ cmap.bm_len += map[i].bm_len;
/* Check for IOs smaller than the sector size / not sector aligned */
- ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
- ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
+ ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
+ ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
/*
* Corrupted block numbers can get through to here, unfortunately, so we
* have to check that the buffer falls within the filesystem bounds.
*/
eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
- if (blkno < 0 || blkno >= eofs) {
+ if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
/*
* XXX (dgc): we should really be returning -EFSCORRUPTED here,
* but none of the higher level infrastructure supports
@@ -516,53 +568,29 @@ _xfs_buf_find(
*/
xfs_alert(btp->bt_mount,
"%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
- __func__, blkno, eofs);
+ __func__, cmap.bm_bn, eofs);
WARN_ON(1);
return NULL;
}
- /* get tree root */
pag = xfs_perag_get(btp->bt_mount,
- xfs_daddr_to_agno(btp->bt_mount, blkno));
+ xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
- /* walk tree */
spin_lock(&pag->pag_buf_lock);
- rbp = &pag->pag_buf_tree.rb_node;
- parent = NULL;
- bp = NULL;
- while (*rbp) {
- parent = *rbp;
- bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
- if (blkno < bp->b_bn)
- rbp = &(*rbp)->rb_left;
- else if (blkno > bp->b_bn)
- rbp = &(*rbp)->rb_right;
- else {
- /*
- * found a block number match. If the range doesn't
- * match, the only way this is allowed is if the buffer
- * in the cache is stale and the transaction that made
- * it stale has not yet committed. i.e. we are
- * reallocating a busy extent. Skip this buffer and
- * continue searching to the right for an exact match.
- */
- if (bp->b_length != numblks) {
- ASSERT(bp->b_flags & XBF_STALE);
- rbp = &(*rbp)->rb_right;
- continue;
- }
- atomic_inc(&bp->b_hold);
- goto found;
- }
+ bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
+ xfs_buf_hash_params);
+ if (bp) {
+ atomic_inc(&bp->b_hold);
+ goto found;
}
/* No match found */
if (new_bp) {
- rb_link_node(&new_bp->b_rbnode, parent, rbp);
- rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
/* the buffer keeps the perag reference until it is freed */
new_bp->b_pag = pag;
+ rhashtable_insert_fast(&pag->pag_buf_hash,
+ &new_bp->b_rhash_head,
+ xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
@@ -930,7 +958,6 @@ xfs_buf_rele(
if (!pag) {
ASSERT(list_empty(&bp->b_lru));
- ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold)) {
xfs_buf_ioacct_dec(bp);
xfs_buf_free(bp);
@@ -938,8 +965,6 @@ xfs_buf_rele(
return;
}
- ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
ASSERT(atomic_read(&bp->b_hold) > 0);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
@@ -983,7 +1008,8 @@ xfs_buf_rele(
}
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
- rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+ rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
+ xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
freebuf = true;
@@ -1304,7 +1330,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_WRITE) {
op = REQ_OP_WRITE;
if (bp->b_flags & XBF_SYNCIO)
- op_flags = WRITE_SYNC;
+ op_flags = REQ_SYNC;
if (bp->b_flags & XBF_FUA)
op_flags |= REQ_FUA;
if (bp->b_flags & XBF_FLUSH)
@@ -1711,8 +1737,7 @@ xfs_free_buftarg(
percpu_counter_destroy(&btp->bt_io_count);
list_lru_destroy(&btp->bt_lru);
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_blkdev_issue_flush(btp);
+ xfs_blkdev_issue_flush(btp);
kmem_free(btp);
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 1c2e52b2d926..8a9d3a9599f0 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -71,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_READ, "READ" }, \
{ XBF_WRITE, "WRITE" }, \
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
+ { XBF_NO_IOACCT, "NO_IOACCT" }, \
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
@@ -150,7 +151,7 @@ typedef struct xfs_buf {
* which is the only bit that is touched if we hit the semaphore
* fast-path on locking.
*/
- struct rb_node b_rbnode; /* rbtree node */
+ struct rhash_head b_rhash_head; /* pag buffer hash node */
xfs_daddr_t b_bn; /* block number of buffer */
int b_length; /* size of buffer in BBs */
atomic_t b_hold; /* reference count */
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 29816981b50a..003a99b83bd8 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -677,7 +677,6 @@ xfs_readdir(
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
- xfs_ilock(dp, XFS_IOLOCK_SHARED);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx);
else if ((rval = xfs_dir2_isblock(&args, &v)))
@@ -686,7 +685,6 @@ xfs_readdir(
rval = xfs_dir2_block_getdents(&args, ctx);
else
rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
- xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return rval;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6e4f7f900fea..bbb9eb6811b2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -48,40 +48,6 @@
static const struct vm_operations_struct xfs_file_vm_ops;
/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
- struct xfs_inode *ip,
- int type)
-{
- if (type & XFS_IOLOCK_EXCL)
- inode_lock(VFS_I(ip));
- xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
- struct xfs_inode *ip,
- int type)
-{
- xfs_iunlock(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- inode_unlock(VFS_I(ip));
-}
-
-static inline void
-xfs_rw_ilock_demote(
- struct xfs_inode *ip,
- int type)
-{
- xfs_ilock_demote(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- inode_unlock(VFS_I(ip));
-}
-
-/*
* Clear the specified ranges to zero through either the pagecache or DAX.
* Holes and unwritten extents will be left as-is as they already are zeroed.
*/
@@ -183,19 +149,16 @@ xfs_file_fsync(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
- if (mp->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If we have an RT and/or log subvolume we need to make sure
- * to flush the write cache the device used for file data
- * first. This is to ensure newly written file data make
- * it to disk before logging the new inode size in case of
- * an extending write.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(mp->m_rtdev_targp);
- else if (mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_blkdev_issue_flush(mp->m_ddev_targp);
- }
+ /*
+ * If we have an RT and/or log subvolume we need to make sure to flush
+ * the write cache the device used for file data first. This is to
+ * ensure newly written file data make it to disk before logging the new
+ * inode size in case of an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
/*
* All metadata updates are logged, which means that we just have to
@@ -230,10 +193,8 @@ xfs_file_fsync(
* an already allocated file and thus do not have any metadata to
* commit.
*/
- if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
- mp->m_logdev_targp == mp->m_ddev_targp &&
- !XFS_IS_REALTIME_INODE(ip) &&
- !log_flushed)
+ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
+ mp->m_logdev_targp == mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
return error;
@@ -244,62 +205,21 @@ xfs_file_dio_aio_read(
struct kiocb *iocb,
struct iov_iter *to)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- loff_t isize = i_size_read(inode);
+ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
size_t count = iov_iter_count(to);
- loff_t end = iocb->ki_pos + count - 1;
- struct iov_iter data;
- struct xfs_buftarg *target;
- ssize_t ret = 0;
+ ssize_t ret;
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
if (!count)
return 0; /* skip atime */
- if (XFS_IS_REALTIME_INODE(ip))
- target = ip->i_mount->m_rtdev_targp;
- else
- target = ip->i_mount->m_ddev_targp;
-
- /* DIO must be aligned to device logical sector size */
- if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
- if (iocb->ki_pos == isize)
- return 0;
- return -EINVAL;
- }
-
file_accessed(iocb->ki_filp);
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if (mapping->nrpages) {
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
- if (ret)
- goto out_unlock;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- /*
- * Invalidate whole pages. This can return an error if we fail
- * to invalidate a page, but this should never happen on XFS.
- * Warn if it does fail.
- */
- ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- ret = 0;
- }
-
- data = *to;
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, NULL, NULL, 0);
- if (ret >= 0) {
- iocb->ki_pos += ret;
- iov_iter_advance(to, ret);
- }
-
-out_unlock:
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@@ -317,9 +237,9 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
return ret;
@@ -335,9 +255,9 @@ xfs_file_buffered_aio_read(
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_read_iter(iocb, to);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
@@ -418,15 +338,18 @@ restart:
if (error <= 0)
return error;
- error = xfs_break_layouts(inode, iolock, true);
+ error = xfs_break_layouts(inode, iolock);
if (error)
return error;
- /* For changing security info in file_remove_privs() we need i_mutex */
+ /*
+ * For changing security info in file_remove_privs() we need i_rwsem
+ * exclusively.
+ */
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
- xfs_rw_iunlock(ip, *iolock);
+ xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ xfs_ilock(ip, *iolock);
goto restart;
}
/*
@@ -451,9 +374,9 @@ restart:
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
+ xfs_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
+ xfs_ilock(ip, *iolock);
iov_iter_reexpand(from, count);
}
/*
@@ -496,6 +419,58 @@ restart:
return 0;
}
+static int
+xfs_dio_write_end_io(
+ struct kiocb *iocb,
+ ssize_t size,
+ unsigned flags)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ loff_t offset = iocb->ki_pos;
+ bool update_size = false;
+ int error = 0;
+
+ trace_xfs_end_io_direct_write(ip, offset, size);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ if (size <= 0)
+ return size;
+
+ /*
+ * We need to update the in-core inode size here so that we don't end up
+ * with the on-disk inode size being outside the in-core inode size. We
+ * have no other method of updating EOF for AIO, so always do it here
+ * if necessary.
+ *
+ * We need to lock the test/set EOF update as we can be racing with
+ * other IO completions here to update the EOF. Failing to serialise
+ * here can result in EOF moving backwards and Bad Things Happen when
+ * that occurs.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (offset + size > i_size_read(inode)) {
+ i_size_write(inode, offset + size);
+ update_size = true;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ if (flags & IOMAP_DIO_COW) {
+ error = xfs_reflink_end_cow(ip, offset, size);
+ if (error)
+ return error;
+ }
+
+ if (flags & IOMAP_DIO_UNWRITTEN)
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ else if (update_size)
+ error = xfs_setfilesize(ip, offset, size);
+
+ return error;
+}
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@@ -535,9 +510,7 @@ xfs_file_dio_aio_write(
int unaligned_io = 0;
int iolock;
size_t count = iov_iter_count(from);
- loff_t end;
- struct iov_iter data;
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
+ struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
@@ -559,29 +532,12 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
count = iov_iter_count(from);
- end = iocb->ki_pos + count - 1;
-
- if (mapping->nrpages) {
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
- if (ret)
- goto out;
-
- /*
- * Invalidate whole pages. This can return an error if we fail
- * to invalidate a page, but this should never happen on XFS.
- * Warn if it does fail.
- */
- ret = invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- ret = 0;
- }
/*
* If we are doing unaligned IO, wait for all other IO to drain,
@@ -591,7 +547,7 @@ xfs_file_dio_aio_write(
if (unaligned_io)
inode_dio_wait(inode);
else if (iolock == XFS_IOLOCK_EXCL) {
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
@@ -604,24 +560,9 @@ xfs_file_dio_aio_write(
goto out;
}
- data = *from;
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
- xfs_get_blocks_direct, xfs_end_io_direct_write,
- NULL, DIO_ASYNC_EXTEND);
-
- /* see generic_file_direct_write() for why this is necessary */
- if (mapping->nrpages) {
- invalidate_inode_pages2_range(mapping,
- iocb->ki_pos >> PAGE_SHIFT,
- end >> PAGE_SHIFT);
- }
-
- if (ret > 0) {
- iocb->ki_pos += ret;
- iov_iter_advance(from, ret);
- }
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
/*
* No fallback to buffered IO on errors for XFS, direct IO will either
@@ -643,7 +584,7 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -652,15 +593,13 @@ xfs_file_dax_write(
count = iov_iter_count(from);
trace_xfs_file_dax_write(ip, count, pos);
-
- ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+ ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos);
error = xfs_setfilesize(ip, pos, ret);
}
-
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
return error ? error : ret;
}
@@ -677,7 +616,7 @@ xfs_file_buffered_aio_write(
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, iolock);
+ xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@@ -721,7 +660,7 @@ write_retry:
current->backing_dev_info = NULL;
out:
- xfs_rw_iunlock(ip, iolock);
+ xfs_iunlock(ip, iolock);
return ret;
}
@@ -797,7 +736,7 @@ xfs_file_fallocate(
return -EOPNOTSUPP;
xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, false);
+ error = xfs_break_layouts(inode, &iolock);
if (error)
goto out_unlock;
@@ -909,24 +848,6 @@ out_unlock:
return error;
}
-STATIC ssize_t
-xfs_file_copy_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- size_t len,
- unsigned int flags)
-{
- int error;
-
- error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
- if (error)
- return error;
- return len;
-}
-
STATIC int
xfs_file_clone_range(
struct file *file_in,
@@ -939,7 +860,6 @@ xfs_file_clone_range(
len, false);
}
-#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
STATIC ssize_t
xfs_file_dedupe_range(
struct file *src_file,
@@ -950,14 +870,6 @@ xfs_file_dedupe_range(
{
int error;
- /*
- * Limit the total length we will dedupe for each operation.
- * This is intended to bound the total time spent in this
- * ioctl to something sane.
- */
- if (len > XFS_MAX_DEDUPE_LEN)
- len = XFS_MAX_DEDUPE_LEN;
-
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
@@ -1474,7 +1386,7 @@ xfs_filemap_page_mkwrite(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else {
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
@@ -1501,15 +1413,9 @@ xfs_filemap_fault(
return xfs_filemap_page_mkwrite(vma, vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- if (IS_DAX(inode)) {
- /*
- * we do not want to trigger unwritten extent conversion on read
- * faults - that is unnecessary overhead and would also require
- * changes to xfs_get_blocks_direct() to map unwritten extent
- * ioend for conversion on read-only mappings.
- */
- ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
- } else
+ if (IS_DAX(inode))
+ ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1545,7 +1451,7 @@ xfs_filemap_pmd_fault(
}
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
+ ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (flags & FAULT_FLAG_WRITE)
@@ -1625,7 +1531,6 @@ const struct file_operations xfs_file_operations = {
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
- .copy_file_range = xfs_file_copy_range,
.clone_file_range = xfs_file_clone_range,
.dedupe_file_range = xfs_file_dedupe_range,
};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f295049db681..ff4d6311c7f4 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -70,8 +70,6 @@ xfs_inode_alloc(
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-
/* initialise the xfs inode */
ip->i_ino = ino;
ip->i_mount = mp;
@@ -123,7 +121,6 @@ __xfs_inode_free(
{
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!xfs_isiflocked(ip));
XFS_STATS_DEC(ip->i_mount, vn_active);
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
@@ -133,6 +130,8 @@ void
xfs_inode_free(
struct xfs_inode *ip)
{
+ ASSERT(!xfs_isiflocked(ip));
+
/*
* Because we use RCU freeing we need to ensure the inode always
* appears to be reclaimed with an invalid inode number when in the
@@ -393,8 +392,8 @@ xfs_iget_cache_hit(
xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
inode->i_state = I_NEW;
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ ASSERT(!rwsem_is_locked(&inode->i_rwsem));
+ init_rwsem(&inode->i_rwsem);
spin_unlock(&ip->i_flags_lock);
spin_unlock(&pag->pag_ici_lock);
@@ -981,6 +980,7 @@ restart:
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
+ /* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip, false);
goto reclaim;
}
@@ -989,10 +989,10 @@ restart:
goto out_ifunlock;
xfs_iunpin_wait(ip);
}
- if (xfs_iflags_test(ip, XFS_ISTALE))
- goto reclaim;
- if (xfs_inode_clean(ip))
+ if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
goto reclaim;
+ }
/*
* Never flush out dirty data during non-blocking reclaim, as it would
@@ -1030,25 +1030,24 @@ restart:
xfs_buf_relse(bp);
}
- xfs_iflock(ip);
reclaim:
+ ASSERT(!xfs_isiflocked(ip));
+
/*
* Because we use RCU freeing we need to ensure the inode always appears
* to be reclaimed with an invalid inode number when in the free state.
- * We do this as early as possible under the ILOCK and flush lock so
- * that xfs_iflush_cluster() can be guaranteed to detect races with us
- * here. By doing this, we guarantee that once xfs_iflush_cluster has
- * locked both the XFS_ILOCK and the flush lock that it will see either
- * a valid, flushable inode that will serialise correctly against the
- * locks below, or it will see a clean (and invalid) inode that it can
- * skip.
+ * We do this as early as possible under the ILOCK so that
+ * xfs_iflush_cluster() can be guaranteed to detect races with us here.
+ * By doing this, we guarantee that once xfs_iflush_cluster has locked
+ * XFS_ILOCK that it will see either a valid, flushable inode that will
+ * serialise correctly, or it will see a clean (and invalid) inode that
+ * it can skip.
*/
spin_lock(&ip->i_flags_lock);
ip->i_flags = XFS_IRECLAIM;
ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock);
- xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
@@ -1580,10 +1579,15 @@ xfs_inode_free_cowblocks(
struct xfs_eofblocks *eofb = args;
bool need_iolock = true;
int match;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
- if (!xfs_reflink_has_real_cow_blocks(ip)) {
+ /*
+ * Just clear the tag if we have an empty cow fork or none at all. It's
+ * possible the inode was fully unshared since it was originally tagged.
+ */
+ if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
trace_xfs_inode_free_cowblocks_invalid(ip);
xfs_inode_clear_cowblocks_tag(ip);
return 0;
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index d45ca72af6fb..865ad1373e5e 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -133,7 +133,7 @@ xfs_icreate_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-static struct xfs_item_ops xfs_icreate_item_ops = {
+static const struct xfs_item_ops xfs_icreate_item_ops = {
.iop_size = xfs_icreate_item_size,
.iop_format = xfs_icreate_item_format,
.iop_pin = xfs_icreate_item_pin,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4e560e6a12c1..b9557795eb74 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -142,31 +142,31 @@ xfs_ilock_attr_map_shared(
}
/*
- * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
- * the i_lock. This routine allows various combinations of the locks to be
- * obtained.
+ * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
+ * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
+ * various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
* the mmap lock second and the ilock last in order to prevent deadlock.
*
* Basic locking order:
*
- * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
*
* mmap_sem locking order:
*
- * i_iolock -> page lock -> mmap_sem
+ * i_rwsem -> page lock -> mmap_sem
* mmap_sem -> i_mmap_lock -> page_lock
*
* The difference in mmap_sem locking order mean that we cannot hold the
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
* in get_user_pages() to map the user pages into the kernel address space for
- * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
* page faults already hold the mmap_sem.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
- * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
* taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
@@ -191,10 +191,13 @@ xfs_ilock(
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
- if (lock_flags & XFS_IOLOCK_EXCL)
- mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
- else if (lock_flags & XFS_IOLOCK_SHARED)
- mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_IOLOCK_EXCL) {
+ down_write_nested(&VFS_I(ip)->i_rwsem,
+ XFS_IOLOCK_DEP(lock_flags));
+ } else if (lock_flags & XFS_IOLOCK_SHARED) {
+ down_read_nested(&VFS_I(ip)->i_rwsem,
+ XFS_IOLOCK_DEP(lock_flags));
+ }
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
@@ -240,10 +243,10 @@ xfs_ilock_nowait(
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) {
- if (!mrtryupdate(&ip->i_iolock))
+ if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
goto out;
} else if (lock_flags & XFS_IOLOCK_SHARED) {
- if (!mrtryaccess(&ip->i_iolock))
+ if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
goto out;
}
@@ -271,9 +274,9 @@ out_undo_mmaplock:
mrunlock_shared(&ip->i_mmaplock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
- mrunlock_excl(&ip->i_iolock);
+ up_write(&VFS_I(ip)->i_rwsem);
else if (lock_flags & XFS_IOLOCK_SHARED)
- mrunlock_shared(&ip->i_iolock);
+ up_read(&VFS_I(ip)->i_rwsem);
out:
return 0;
}
@@ -310,9 +313,9 @@ xfs_iunlock(
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL)
- mrunlock_excl(&ip->i_iolock);
+ up_write(&VFS_I(ip)->i_rwsem);
else if (lock_flags & XFS_IOLOCK_SHARED)
- mrunlock_shared(&ip->i_iolock);
+ up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrunlock_excl(&ip->i_mmaplock);
@@ -345,7 +348,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_MMAPLOCK_EXCL)
mrdemote(&ip->i_mmaplock);
if (lock_flags & XFS_IOLOCK_EXCL)
- mrdemote(&ip->i_iolock);
+ downgrade_write(&VFS_I(ip)->i_rwsem);
trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
}
@@ -370,8 +373,9 @@ xfs_isilocked(
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
if (!(lock_flags & XFS_IOLOCK_SHARED))
- return !!ip->i_iolock.mr_writer;
- return rwsem_is_locked(&ip->i_iolock.mr_lock);
+ return !debug_locks ||
+ lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
+ return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
}
ASSERT(0);
@@ -421,11 +425,7 @@ xfs_lock_inumorder(int lock_mode, int subclass)
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
- ASSERT(xfs_lockdep_subclass_ok(subclass +
- XFS_IOLOCK_PARENT_VAL));
class += subclass << XFS_IOLOCK_SHIFT;
- if (lock_mode & XFS_IOLOCK_PARENT)
- class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
}
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
@@ -477,8 +477,6 @@ xfs_lock_inodes(
XFS_ILOCK_EXCL));
ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
XFS_ILOCK_SHARED)));
- ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
- inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
@@ -581,10 +579,8 @@ xfs_lock_two_inodes(
int attempts = 0;
xfs_log_item_t *lp;
- if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
- ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
- ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+ if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
@@ -715,7 +711,6 @@ xfs_lookup(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- xfs_ilock(dp, XFS_IOLOCK_SHARED);
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
if (error)
goto out_unlock;
@@ -724,14 +719,12 @@ xfs_lookup(
if (error)
goto out_free_name;
- xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return 0;
out_free_name:
if (ci_name)
kmem_free(ci_name->name);
out_unlock:
- xfs_iunlock(dp, XFS_IOLOCK_SHARED);
*ipp = NULL;
return error;
}
@@ -1215,8 +1208,7 @@ xfs_create(
if (error)
goto out_release_inode;
- xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
- XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
+ xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
xfs_defer_init(&dfops, &first_block);
@@ -1252,7 +1244,7 @@ xfs_create(
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
- xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1325,7 +1317,7 @@ xfs_create(
xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error)
- xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
@@ -1466,11 +1458,10 @@ xfs_link(
if (error)
goto std_return;
- xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
/*
* If we are using project inheritance, we only allow hard link
@@ -2041,7 +2032,6 @@ xfs_iunlink(
agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
- xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
return 0;
@@ -2133,7 +2123,6 @@ xfs_iunlink_remove(
agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
- xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
} else {
@@ -2579,10 +2568,9 @@ xfs_remove(
goto std_return;
}
- xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
@@ -2963,12 +2951,6 @@ xfs_rename(
* whether the target directory is the same as the source
* directory, we can lock from 2 to 4 inodes.
*/
- if (!new_parent)
- xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
- else
- xfs_lock_two_inodes(src_dp, target_dp,
- XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
-
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
/*
@@ -2976,9 +2958,9 @@ xfs_rename(
* we can rely on either trans_commit or trans_cancel to unlock
* them.
*/
- xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
if (new_parent)
- xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
if (target_ip)
xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f14c1de2549d..10dcf27b4c85 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,7 +56,6 @@ typedef struct xfs_inode {
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
- mrlock_t i_iolock; /* inode IO lock */
mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -246,6 +245,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
+static inline int xfs_isiflocked(struct xfs_inode *ip)
+{
+ return xfs_iflags_test(ip, XFS_IFLOCK);
+}
+
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
@@ -261,16 +265,12 @@ static inline void xfs_iflock(struct xfs_inode *ip)
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
+ ASSERT(xfs_isiflocked(ip));
xfs_iflags_clear(ip, XFS_IFLOCK);
smp_mb();
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
-static inline int xfs_isiflocked(struct xfs_inode *ip)
-{
- return xfs_iflags_test(ip, XFS_IFLOCK);
-}
-
/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
@@ -332,7 +332,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* IOLOCK values
*
* 0-3 subclass value
- * 4-7 PARENT subclass values
+ * 4-7 unused
*
* MMAPLOCK values
*
@@ -347,10 +347,8 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
*
*/
#define XFS_IOLOCK_SHIFT 16
-#define XFS_IOLOCK_PARENT_VAL 4
-#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
+#define XFS_IOLOCK_MAX_SUBCLASS 3
#define XFS_IOLOCK_DEP_MASK 0x000f0000
-#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_SHIFT 20
#define XFS_MMAPLOCK_NUMORDER 0
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9610e9c00952..d90e7811ccdd 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork(
struct xfs_bmbt_rec *p;
ASSERT(ip->i_df.if_u1.if_extents != NULL);
- ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
+ ASSERT(xfs_iext_count(&ip->i_df) > 0);
p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
@@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork(
ip->i_afp->if_bytes > 0) {
struct xfs_bmbt_rec *p;
- ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
+ ASSERT(xfs_iext_count(ip->i_afp) ==
ip->i_d.di_anextents);
ASSERT(ip->i_afp->if_u1.if_extents != NULL);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c245bed3249b..c67cfb451fd3 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -287,7 +287,7 @@ xfs_readlink_by_handle(
return PTR_ERR(dentry);
/* Restrict this handle operation to symlinks only. */
- if (!d_inode(dentry)->i_op->readlink) {
+ if (!d_is_symlink(dentry)) {
error = -EINVAL;
goto out_dput;
}
@@ -297,7 +297,7 @@ xfs_readlink_by_handle(
goto out_dput;
}
- error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen);
+ error = vfs_readlink(dentry, hreq->ohandle, olen);
out_dput:
dput(dentry);
@@ -639,7 +639,7 @@ xfs_ioc_space(
return error;
xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, false);
+ error = xfs_break_layouts(inode, &iolock);
if (error)
goto out_unlock;
@@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr(
if (attr) {
if (ip->i_afp) {
if (ip->i_afp->if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = ip->i_afp->if_bytes /
- sizeof(xfs_bmbt_rec_t);
+ fa.fsx_nextents = xfs_iext_count(ip->i_afp);
else
fa.fsx_nextents = ip->i_d.di_anextents;
} else
fa.fsx_nextents = 0;
} else {
if (ip->i_df.if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = ip->i_df.if_bytes /
- sizeof(xfs_bmbt_rec_t);
+ fa.fsx_nextents = xfs_iext_count(&ip->i_df);
else
fa.fsx_nextents = ip->i_d.di_nextents;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 436e109bb01e..0d147428971e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -395,11 +395,12 @@ xfs_iomap_prealloc_size(
struct xfs_inode *ip,
loff_t offset,
loff_t count,
- xfs_extnum_t idx,
- struct xfs_bmbt_irec *prev)
+ xfs_extnum_t idx)
{
struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ struct xfs_bmbt_irec prev;
int shift = 0;
int64_t freesp;
xfs_fsblock_t qblocks;
@@ -419,8 +420,8 @@ xfs_iomap_prealloc_size(
*/
if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
- idx == 0 ||
- prev->br_startoff + prev->br_blockcount < offset_fsb)
+ !xfs_iext_get_extent(ifp, idx - 1, &prev) ||
+ prev.br_startoff + prev.br_blockcount < offset_fsb)
return mp->m_writeio_blocks;
/*
@@ -439,8 +440,8 @@ xfs_iomap_prealloc_size(
* always extends to MAXEXTLEN rather than falling short due to things
* like stripe unit/width alignment of real extents.
*/
- if (prev->br_blockcount <= (MAXEXTLEN >> 1))
- alloc_blocks = prev->br_blockcount << 1;
+ if (prev.br_blockcount <= (MAXEXTLEN >> 1))
+ alloc_blocks = prev.br_blockcount << 1;
else
alloc_blocks = XFS_B_TO_FSB(mp, offset);
if (!alloc_blocks)
@@ -535,11 +536,11 @@ xfs_file_iomap_begin_delay(
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t maxbytes_fsb =
XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
- xfs_fileoff_t end_fsb, orig_end_fsb;
+ xfs_fileoff_t end_fsb;
int error = 0, eof = 0;
struct xfs_bmbt_irec got;
- struct xfs_bmbt_irec prev;
xfs_extnum_t idx;
+ xfs_fsblock_t prealloc_blocks = 0;
ASSERT(!XFS_IS_REALTIME_INODE(ip));
ASSERT(!xfs_get_extsz_hint(ip));
@@ -563,8 +564,7 @@ xfs_file_iomap_begin_delay(
goto out_unlock;
}
- xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
- &got, &prev);
+ eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
if (!eof && got.br_startoff <= offset_fsb) {
if (xfs_is_reflink_inode(ip)) {
bool shared;
@@ -595,35 +595,32 @@ xfs_file_iomap_begin_delay(
* the lower level functions are updated.
*/
count = min_t(loff_t, count, 1024 * PAGE_SIZE);
- end_fsb = orig_end_fsb =
- min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
+ end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
if (eof) {
- xfs_fsblock_t prealloc_blocks;
-
- prealloc_blocks =
- xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
+ prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
if (prealloc_blocks) {
xfs_extlen_t align;
xfs_off_t end_offset;
+ xfs_fileoff_t p_end_fsb;
end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
- end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
- prealloc_blocks;
+ p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
+ prealloc_blocks;
align = xfs_eof_alignment(ip, 0);
if (align)
- end_fsb = roundup_64(end_fsb, align);
+ p_end_fsb = roundup_64(p_end_fsb, align);
- end_fsb = min(end_fsb, maxbytes_fsb);
- ASSERT(end_fsb > offset_fsb);
+ p_end_fsb = min(p_end_fsb, maxbytes_fsb);
+ ASSERT(p_end_fsb > offset_fsb);
+ prealloc_blocks = p_end_fsb - end_fsb;
}
}
retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
- end_fsb - offset_fsb, &got,
- &prev, &idx, eof);
+ end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
switch (error) {
case 0:
break;
@@ -631,8 +628,8 @@ retry:
case -EDQUOT:
/* retry without any preallocation */
trace_xfs_delalloc_enospc(ip, offset, count);
- if (end_fsb != orig_end_fsb) {
- end_fsb = orig_end_fsb;
+ if (prealloc_blocks) {
+ prealloc_blocks = 0;
goto retry;
}
/*FALLTHRU*/
@@ -640,13 +637,6 @@ retry:
goto out_unlock;
}
- /*
- * Tag the inode as speculatively preallocated so we can reclaim this
- * space on demand, if necessary.
- */
- if (end_fsb != orig_end_fsb)
- xfs_inode_set_eofblocks_tag(ip);
-
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -960,6 +950,19 @@ static inline bool imap_needs_alloc(struct inode *inode,
(IS_DAX(inode) && ISUNWRITTEN(imap));
}
+static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
+{
+ /*
+ * COW writes will allocate delalloc space, so we need to make sure
+ * to take the lock exclusively here.
+ */
+ if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
+ return true;
+ if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
+ return true;
+ return false;
+}
+
static int
xfs_file_iomap_begin(
struct inode *inode,
@@ -979,18 +982,14 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
- !xfs_get_extsz_hint(ip)) {
+ if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
+ !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
return xfs_file_iomap_begin_delay(inode, offset, length, flags,
iomap);
}
- /*
- * COW writes will allocate delalloc space, so we need to make sure
- * to take the lock exclusively here.
- */
- if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+ if (need_excl_ilock(ip, flags)) {
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
} else {
@@ -1003,17 +1002,41 @@ xfs_file_iomap_begin(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
+ if (xfs_is_reflink_inode(ip) &&
+ (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
+ shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
+ if (shared) {
+ xfs_iunlock(ip, lockmode);
+ goto alloc_done;
+ }
+ ASSERT(!isnullstartblock(imap.br_startblock));
+ }
+
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
- if (flags & IOMAP_REPORT) {
+ if ((flags & IOMAP_REPORT) ||
+ (xfs_is_reflink_inode(ip) &&
+ (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
if (error)
goto out_unlock;
+
+ /*
+ * We're here because we're trying to do a directio write to a
+ * region that isn't aligned to a filesystem block. If the
+ * extent is shared, fall back to buffered mode to handle the
+ * RMW.
+ */
+ if (!(flags & IOMAP_REPORT) && shared) {
+ trace_xfs_reflink_bounce_dio_write(ip, &imap);
+ error = -EREMCHG;
+ goto out_unlock;
+ }
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
@@ -1048,6 +1071,7 @@ xfs_file_iomap_begin(
if (error)
return error;
+alloc_done:
iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 405a65cd9d6b..308bebb6dfd2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -983,15 +983,13 @@ xfs_vn_setattr(
struct xfs_inode *ip = XFS_I(d_inode(dentry));
uint iolock = XFS_IOLOCK_EXCL;
- xfs_ilock(ip, iolock);
- error = xfs_break_layouts(d_inode(dentry), &iolock, true);
- if (!error) {
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- iolock |= XFS_MMAPLOCK_EXCL;
+ error = xfs_break_layouts(d_inode(dentry), &iolock);
+ if (error)
+ return error;
- error = xfs_vn_setattr_size(dentry, iattr);
- }
- xfs_iunlock(ip, iolock);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ error = xfs_vn_setattr_size(dentry, iattr);
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
} else {
error = xfs_vn_setattr_nonsize(dentry, iattr);
}
@@ -1122,7 +1120,6 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
};
static const struct inode_operations xfs_symlink_inode_operations = {
- .readlink = generic_readlink,
.get_link = xfs_vn_get_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
@@ -1131,7 +1128,6 @@ static const struct inode_operations xfs_symlink_inode_operations = {
};
static const struct inode_operations xfs_inline_symlink_inode_operations = {
- .readlink = generic_readlink,
.get_link = xfs_vn_get_link_inline,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 68640fb63a54..a415f822f2c1 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -78,6 +78,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/freezer.h>
#include <linux/list_sort.h>
#include <linux/ratelimit.h>
+#include <linux/rhashtable.h>
#include <asm/page.h>
#include <asm/div64.h>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3b74fa011bb1..c39ac14ff540 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1668,7 +1668,7 @@ xlog_cksum(
__uint32_t crc;
/* first generate the crc for the record header ... */
- crc = xfs_start_cksum((char *)rhead,
+ crc = xfs_start_cksum_update((char *)rhead,
sizeof(struct xlog_rec_header),
offsetof(struct xlog_rec_header, h_crc));
@@ -1862,26 +1862,21 @@ xlog_sync(
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
- bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
+ bp->b_flags &= ~XBF_FLUSH;
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
- if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
- bp->b_flags |= XBF_FUA;
-
- /*
- * Flush the data device before flushing the log to make
- * sure all meta data written back from the AIL actually made
- * it to disk before stamping the new log tail LSN into the
- * log buffer. For an external log we need to issue the
- * flush explicitly, and unfortunately synchronously here;
- * for an internal log we can simply use the block layer
- * state machine for preflushes.
- */
- if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
- xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
- else
- bp->b_flags |= XBF_FLUSH;
- }
+ /*
+ * Flush the data device before flushing the log to make sure all meta
+ * data written back from the AIL actually made it to disk before
+ * stamping the new log tail LSN into the log buffer. For an external
+ * log we need to issue the flush explicitly, and unfortunately
+ * synchronously here; for an internal log we can simply use the block
+ * layer state machine for preflushes.
+ */
+ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
+ else
+ bp->b_flags |= XBF_FLUSH;
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1906,10 +1901,8 @@ xlog_sync(
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
bp->b_fspriv = iclog;
- bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
- if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
- bp->b_flags |= XBF_FUA;
+ bp->b_flags &= ~XBF_FLUSH;
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9b3d7c76915d..4a98762ec8b4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2025,7 +2025,7 @@ xlog_peek_buffer_cancelled(
struct xlog *log,
xfs_daddr_t blkno,
uint len,
- ushort flags)
+ unsigned short flags)
{
struct list_head *bucket;
struct xfs_buf_cancel *bcp;
@@ -2065,7 +2065,7 @@ xlog_check_buffer_cancelled(
struct xlog *log,
xfs_daddr_t blkno,
uint len,
- ushort flags)
+ unsigned short flags)
{
struct xfs_buf_cancel *bcp;
@@ -5113,19 +5113,21 @@ xlog_recover_process(
struct list_head *buffer_list)
{
int error;
+ __le32 old_crc = rhead->h_crc;
__le32 crc;
+
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
/*
* Nothing else to do if this is a CRC verification pass. Just return
* if this a record with a non-zero crc. Unfortunately, mkfs always
- * sets h_crc to 0 so we must consider this valid even on v5 supers.
+ * sets old_crc to 0 so we must consider this valid even on v5 supers.
* Otherwise, return EFSBADCRC on failure so the callers up the stack
* know precisely what failed.
*/
if (pass == XLOG_RECOVER_CRCPASS) {
- if (rhead->h_crc && crc != rhead->h_crc)
+ if (old_crc && crc != old_crc)
return -EFSBADCRC;
return 0;
}
@@ -5136,11 +5138,11 @@ xlog_recover_process(
* zero CRC check prevents warnings from being emitted when upgrading
* the kernel from one that does not add CRCs by default.
*/
- if (crc != rhead->h_crc) {
- if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
+ if (crc != old_crc) {
+ if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",
- le32_to_cpu(rhead->h_crc),
+ le32_to_cpu(old_crc),
le32_to_cpu(crc));
xfs_hex_dump(dp, 32);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b341f10cf481..9b9540db17a6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -157,6 +157,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
+ xfs_buf_hash_destroy(pag);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
@@ -212,8 +213,8 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- spin_lock_init(&pag->pag_buf_lock);
- pag->pag_buf_tree = RB_ROOT;
+ if (xfs_buf_hash_init(pag))
+ goto out_unwind;
if (radix_tree_preload(GFP_NOFS))
goto out_unwind;
@@ -239,9 +240,11 @@ xfs_initialize_perag(
return 0;
out_unwind:
+ xfs_buf_hash_destroy(pag);
kmem_free(pag);
for (; index > first_initialised; index--) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
+ xfs_buf_hash_destroy(pag);
kmem_free(pag);
}
return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 819b80b15bfb..84f785218907 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -393,8 +393,8 @@ typedef struct xfs_perag {
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
/* buffer cache index */
- spinlock_t pag_buf_lock; /* lock for pag_buf_tree */
- struct rb_root pag_buf_tree; /* ordered tree of active buffers */
+ spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
+ struct rhashtable pag_buf_hash;
/* for rcu-safe freeing */
struct rcu_head rcu_head;
@@ -424,6 +424,9 @@ xfs_perag_resv(
}
}
+int xfs_buf_hash_init(xfs_perag_t *pag);
+void xfs_buf_hash_destroy(xfs_perag_t *pag);
+
extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 93a7aafa56d6..2f2dc3c09ad0 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -32,8 +32,7 @@
int
xfs_break_layouts(
struct inode *inode,
- uint *iolock,
- bool with_imutex)
+ uint *iolock)
{
struct xfs_inode *ip = XFS_I(inode);
int error;
@@ -42,12 +41,8 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
- if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
- inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
- if (with_imutex)
- inode_lock(inode);
xfs_ilock(ip, *iolock);
}
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index e8339f74966b..b587cb99b2b7 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -8,10 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
struct iattr *iattr);
-int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
+int xfs_break_layouts(struct inode *inode, uint *iolock);
#else
static inline int
-xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
+xfs_break_layouts(struct inode *inode, uint *iolock)
{
return 0;
}
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index a60d9e2739d1..45e50ea90769 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks(
return error;
}
rtblks = 0;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ nextents = xfs_iext_count(ifp);
for (idx = 0; idx < nextents; idx++)
rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
*O_rtblks = (xfs_qcnt_t)rtblks;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index a279b4e7f5fe..07593a362cd0 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -243,12 +243,11 @@ xfs_reflink_reserve_cow(
struct xfs_bmbt_irec *imap,
bool *shared)
{
- struct xfs_bmbt_irec got, prev;
- xfs_fileoff_t end_fsb, orig_end_fsb;
- int eof = 0, error = 0;
- bool trimmed;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got;
+ int error = 0;
+ bool eof = false, trimmed;
xfs_extnum_t idx;
- xfs_extlen_t align;
/*
* Search the COW fork extent list first. This serves two purposes:
@@ -258,8 +257,9 @@ xfs_reflink_reserve_cow(
* extent list is generally faster than going out to the shared extent
* tree.
*/
- xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
+
+ if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
+ eof = true;
if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap);
xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
@@ -285,33 +285,12 @@ xfs_reflink_reserve_cow(
if (error)
return error;
- end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
-
- align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
- if (align)
- end_fsb = roundup_64(end_fsb, align);
-
-retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
- end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
+ imap->br_blockcount, 0, &got, &idx, eof);
+ if (error == -ENOSPC || error == -EDQUOT)
trace_xfs_reflink_cow_enospc(ip, imap);
- if (end_fsb != orig_end_fsb) {
- end_fsb = orig_end_fsb;
- goto retry;
- }
- /*FALLTHRU*/
- default:
+ if (error)
return error;
- }
-
- if (end_fsb != orig_end_fsb)
- xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got);
return 0;
@@ -418,87 +397,65 @@ xfs_reflink_allocate_cow_range(
}
/*
- * Find the CoW reservation (and whether or not it needs block allocation)
- * for a given byte offset of a file.
+ * Find the CoW reservation for a given byte offset of a file.
*/
bool
xfs_reflink_find_cow_mapping(
struct xfs_inode *ip,
xfs_off_t offset,
- struct xfs_bmbt_irec *imap,
- bool *need_alloc)
+ struct xfs_bmbt_irec *imap)
{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
- xfs_fileoff_t bno;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_fileoff_t offset_fsb;
+ struct xfs_bmbt_irec got;
xfs_extnum_t idx;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(xfs_is_reflink_inode(ip));
- /* Find the extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- bno = XFS_B_TO_FSBT(ip->i_mount, offset);
- gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
- if (!gotp)
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return false;
-
- xfs_bmbt_get_all(gotp, &irec);
- if (bno >= irec.br_startoff + irec.br_blockcount ||
- bno < irec.br_startoff)
+ if (got.br_startoff > offset_fsb)
return false;
trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
- &irec);
-
- /* If it's still delalloc, we must allocate later. */
- *imap = irec;
- *need_alloc = !!(isnullstartblock(irec.br_startblock));
-
+ &got);
+ *imap = got;
return true;
}
/*
* Trim an extent to end at the next CoW reservation past offset_fsb.
*/
-int
+void
xfs_reflink_trim_irec_to_next_cow(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
struct xfs_bmbt_irec *imap)
{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_bmbt_irec got;
xfs_extnum_t idx;
if (!xfs_is_reflink_inode(ip))
- return 0;
+ return;
/* Find the extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
- if (!gotp)
- return 0;
- xfs_bmbt_get_all(gotp, &irec);
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+ return;
/* This is the extent before; try sliding up one. */
- if (irec.br_startoff < offset_fsb) {
- idx++;
- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
- return 0;
- gotp = xfs_iext_get_ext(ifp, idx);
- xfs_bmbt_get_all(gotp, &irec);
+ if (got.br_startoff < offset_fsb) {
+ if (!xfs_iext_get_extent(ifp, idx + 1, &got))
+ return;
}
- if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
- return 0;
+ if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
+ return;
- imap->br_blockcount = irec.br_startoff - imap->br_startoff;
+ imap->br_blockcount = got.br_startoff - imap->br_startoff;
trace_xfs_reflink_trim_irec(ip, imap);
-
- return 0;
}
/*
@@ -512,18 +469,15 @@ xfs_reflink_cancel_cow_blocks(
xfs_fileoff_t end_fsb)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- struct xfs_bmbt_irec got, prev, del;
+ struct xfs_bmbt_irec got, del;
xfs_extnum_t idx;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error = 0, eof = 0;
+ int error = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
-
- xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
- if (eof)
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
return 0;
while (got.br_startoff < end_fsb) {
@@ -566,9 +520,8 @@ xfs_reflink_cancel_cow_blocks(
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
}
- if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+ if (!xfs_iext_get_extent(ifp, ++idx, &got))
break;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
/* clear tag if cow fork is emptied */
@@ -638,13 +591,13 @@ xfs_reflink_end_cow(
xfs_off_t count)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- struct xfs_bmbt_irec got, prev, del;
+ struct xfs_bmbt_irec got, del;
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
- int error, eof = 0;
+ int error;
unsigned int resblks;
xfs_filblks_t rlen;
xfs_extnum_t idx;
@@ -668,13 +621,11 @@ xfs_reflink_end_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
- &got, &prev);
-
/* If there is a hole at end_fsb - 1 go to the previous extent */
- if (eof || got.br_startoff > end_fsb) {
+ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
+ got.br_startoff > end_fsb) {
ASSERT(idx > 0);
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+ xfs_iext_get_extent(ifp, --idx, &got);
}
/* Walk backwards until we're out of the I/O range... */
@@ -722,11 +673,9 @@ xfs_reflink_end_cow(
error = xfs_defer_finish(&tp, &dfops, ip);
if (error)
goto out_defer;
-
next_extent:
- if (idx < 0)
+ if (!xfs_iext_get_extent(ifp, idx, &got))
break;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
error = xfs_trans_commit(tp);
@@ -1165,111 +1114,6 @@ err:
}
/*
- * Read a page's worth of file data into the page cache. Return the page
- * locked.
- */
-static struct page *
-xfs_get_page(
- struct inode *inode,
- xfs_off_t offset)
-{
- struct address_space *mapping;
- struct page *page;
- pgoff_t n;
-
- n = offset >> PAGE_SHIFT;
- mapping = inode->i_mapping;
- page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return page;
- if (!PageUptodate(page)) {
- put_page(page);
- return ERR_PTR(-EIO);
- }
- lock_page(page);
- return page;
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- */
-static int
-xfs_compare_extents(
- struct inode *src,
- xfs_off_t srcoff,
- struct inode *dest,
- xfs_off_t destoff,
- xfs_off_t len,
- bool *is_same)
-{
- xfs_off_t src_poff;
- xfs_off_t dest_poff;
- void *src_addr;
- void *dest_addr;
- struct page *src_page;
- struct page *dest_page;
- xfs_off_t cmp_len;
- bool same;
- int error;
-
- error = -EINVAL;
- same = true;
- while (len) {
- src_poff = srcoff & (PAGE_SIZE - 1);
- dest_poff = destoff & (PAGE_SIZE - 1);
- cmp_len = min(PAGE_SIZE - src_poff,
- PAGE_SIZE - dest_poff);
- cmp_len = min(cmp_len, len);
- ASSERT(cmp_len > 0);
-
- trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
- XFS_I(dest), destoff);
-
- src_page = xfs_get_page(src, srcoff);
- if (IS_ERR(src_page)) {
- error = PTR_ERR(src_page);
- goto out_error;
- }
- dest_page = xfs_get_page(dest, destoff);
- if (IS_ERR(dest_page)) {
- error = PTR_ERR(dest_page);
- unlock_page(src_page);
- put_page(src_page);
- goto out_error;
- }
- src_addr = kmap_atomic(src_page);
- dest_addr = kmap_atomic(dest_page);
-
- flush_dcache_page(src_page);
- flush_dcache_page(dest_page);
-
- if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
- same = false;
-
- kunmap_atomic(dest_addr);
- kunmap_atomic(src_addr);
- unlock_page(dest_page);
- unlock_page(src_page);
- put_page(dest_page);
- put_page(src_page);
-
- if (!same)
- break;
-
- srcoff += cmp_len;
- destoff += cmp_len;
- len -= cmp_len;
- }
-
- *is_same = same;
- return 0;
-
-out_error:
- trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
- return error;
-}
-
-/*
* Link a range of blocks from one file to another.
*/
int
@@ -1286,14 +1130,11 @@ xfs_reflink_remap_range(
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
- loff_t bs = inode_out->i_sb->s_blocksize;
bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
xfs_extlen_t cowextsize;
- loff_t isize;
ssize_t ret;
- loff_t blen;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
@@ -1302,34 +1143,14 @@ xfs_reflink_remap_range(
return -EIO;
/* Lock both files against IO */
- if (same_inode) {
- xfs_ilock(src, XFS_IOLOCK_EXCL);
+ lock_two_nondirectories(inode_in, inode_out);
+ if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
- } else {
- xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
+ else
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
- }
-
- /* Don't touch certain kinds of inodes */
- ret = -EPERM;
- if (IS_IMMUTABLE(inode_out))
- goto out_unlock;
- ret = -ETXTBSY;
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
- goto out_unlock;
-
-
- /* Don't reflink dirs, pipes, sockets... */
- ret = -EISDIR;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- goto out_unlock;
+ /* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
- if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
- goto out_unlock;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- goto out_unlock;
-
/* Don't reflink realtime inodes */
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
goto out_unlock;
@@ -1338,91 +1159,18 @@ xfs_reflink_remap_range(
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
- /* Are we going all the way to the end? */
- isize = i_size_read(inode_in);
- if (isize == 0) {
- ret = 0;
- goto out_unlock;
- }
-
- if (len == 0)
- len = isize - pos_in;
-
- /* Ensure offsets don't wrap and the input is inside i_size */
- if (pos_in + len < pos_in || pos_out + len < pos_out ||
- pos_in + len > isize)
- goto out_unlock;
-
- /* Don't allow dedupe past EOF in the dest file */
- if (is_dedupe) {
- loff_t disize;
-
- disize = i_size_read(inode_out);
- if (pos_out >= disize || pos_out + len > disize)
- goto out_unlock;
- }
-
- /* If we're linking to EOF, continue to the block boundary. */
- if (pos_in + len == isize)
- blen = ALIGN(isize, bs) - pos_in;
- else
- blen = len;
-
- /* Only reflink if we're aligned to block boundaries */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
- !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
- goto out_unlock;
-
- /* Don't allow overlapped reflink within the same file */
- if (same_inode) {
- if (pos_out + blen > pos_in && pos_out < pos_in + blen)
- goto out_unlock;
- }
-
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode_in);
- if (!same_inode)
- inode_dio_wait(inode_out);
-
- ret = filemap_write_and_wait_range(inode_in->i_mapping,
- pos_in, pos_in + len - 1);
- if (ret)
- goto out_unlock;
-
- ret = filemap_write_and_wait_range(inode_out->i_mapping,
- pos_out, pos_out + len - 1);
- if (ret)
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret <= 0)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
- /*
- * Check that the extents are the same.
- */
- if (is_dedupe) {
- bool is_same = false;
-
- ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
- len, &is_same);
- if (ret)
- goto out_unlock;
- if (!is_same) {
- ret = -EBADE;
- goto out_unlock;
- }
- }
-
+ /* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
goto out_unlock;
- /*
- * Invalidate the page cache so that we can clear any CoW mappings
- * in the destination file.
- */
- truncate_inode_pages_range(&inode_out->i_data, pos_out,
- PAGE_ALIGN(pos_out + len) - 1);
-
dfsbno = XFS_B_TO_FSBT(mp, pos_out);
sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
@@ -1431,6 +1179,10 @@ xfs_reflink_remap_range(
if (ret)
goto out_unlock;
+ /* Zap any page cache for the destination file's range. */
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
/*
* Carry the cowextsize hint from src to dest if we're sharing the
* entire source file to the entire destination file, the source file
@@ -1447,11 +1199,9 @@ xfs_reflink_remap_range(
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(src, XFS_IOLOCK_EXCL);
- if (src->i_ino != dest->i_ino) {
+ if (!same_inode)
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(dest, XFS_IOLOCK_EXCL);
- }
+ unlock_two_nondirectories(inode_in, inode_out);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
@@ -1697,37 +1447,3 @@ out:
trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
return error;
}
-
-/*
- * Does this inode have any real CoW reservations?
- */
-bool
-xfs_reflink_has_real_cow_blocks(
- struct xfs_inode *ip)
-{
- struct xfs_bmbt_irec irec;
- struct xfs_ifork *ifp;
- struct xfs_bmbt_rec_host *gotp;
- xfs_extnum_t idx;
-
- if (!xfs_is_reflink_inode(ip))
- return false;
-
- /* Go find the old extent in the CoW fork. */
- ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- gotp = xfs_iext_bno_to_ext(ifp, 0, &idx);
- while (gotp) {
- xfs_bmbt_get_all(gotp, &irec);
-
- if (!isnullstartblock(irec.br_startblock))
- return true;
-
- /* Roll on... */
- idx++;
- if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
- break;
- gotp = xfs_iext_get_ext(ifp, idx);
- }
-
- return false;
-}
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index fad11607c9ad..aa6a4d64bd35 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -31,8 +31,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
- struct xfs_bmbt_irec *imap, bool *need_alloc);
-extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap);
+extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
@@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
-extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip);
-
#endif /* __XFS_REFLINK_H */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 12d48cd8f8a4..f11282c96887 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -80,9 +80,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
}
/* extra precision counters */
for_each_possible_cpu(i) {
- xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes;
- xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes;
- xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes;
+ xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
+ xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
+ xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
}
len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
@@ -106,9 +106,9 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
for_each_possible_cpu(c) {
preempt_disable();
/* save vn_active, it's a universal truth! */
- vn_active = per_cpu_ptr(stats, c)->vn_active;
+ vn_active = per_cpu_ptr(stats, c)->s.vn_active;
memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
- per_cpu_ptr(stats, c)->vn_active = vn_active;
+ per_cpu_ptr(stats, c)->s.vn_active = vn_active;
preempt_enable();
}
}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 79ad2e69fc33..375840f5a99a 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -22,9 +22,37 @@
#include <linux/percpu.h>
/*
+ * The btree stats arrays have fixed offsets for the different stats. We
+ * store the base index in the btree cursor via XFS_STATS_CALC_INDEX() and
+ * that allows us to use fixed offsets into the stats array for each btree
+ * stat. These index offsets are defined in the order they will be emitted
+ * in the stats files, so it is possible to add new btree stat types by
+ * appending to the enum list below.
+ */
+enum {
+ __XBTS_lookup = 0,
+ __XBTS_compare = 1,
+ __XBTS_insrec = 2,
+ __XBTS_delrec = 3,
+ __XBTS_newroot = 4,
+ __XBTS_killroot = 5,
+ __XBTS_increment = 6,
+ __XBTS_decrement = 7,
+ __XBTS_lshift = 8,
+ __XBTS_rshift = 9,
+ __XBTS_split = 10,
+ __XBTS_join = 11,
+ __XBTS_alloc = 12,
+ __XBTS_free = 13,
+ __XBTS_moves = 14,
+
+ __XBTS_MAX = 15,
+};
+
+/*
* XFS global statistics
*/
-struct xfsstats {
+struct __xfsstats {
# define XFSSTAT_END_EXTENT_ALLOC 4
__uint32_t xs_allocx;
__uint32_t xs_allocb;
@@ -117,118 +145,20 @@ struct xfsstats {
__uint32_t xb_page_found;
__uint32_t xb_get_read;
/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
- __uint32_t xs_abtb_2_lookup;
- __uint32_t xs_abtb_2_compare;
- __uint32_t xs_abtb_2_insrec;
- __uint32_t xs_abtb_2_delrec;
- __uint32_t xs_abtb_2_newroot;
- __uint32_t xs_abtb_2_killroot;
- __uint32_t xs_abtb_2_increment;
- __uint32_t xs_abtb_2_decrement;
- __uint32_t xs_abtb_2_lshift;
- __uint32_t xs_abtb_2_rshift;
- __uint32_t xs_abtb_2_split;
- __uint32_t xs_abtb_2_join;
- __uint32_t xs_abtb_2_alloc;
- __uint32_t xs_abtb_2_free;
- __uint32_t xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
- __uint32_t xs_abtc_2_lookup;
- __uint32_t xs_abtc_2_compare;
- __uint32_t xs_abtc_2_insrec;
- __uint32_t xs_abtc_2_delrec;
- __uint32_t xs_abtc_2_newroot;
- __uint32_t xs_abtc_2_killroot;
- __uint32_t xs_abtc_2_increment;
- __uint32_t xs_abtc_2_decrement;
- __uint32_t xs_abtc_2_lshift;
- __uint32_t xs_abtc_2_rshift;
- __uint32_t xs_abtc_2_split;
- __uint32_t xs_abtc_2_join;
- __uint32_t xs_abtc_2_alloc;
- __uint32_t xs_abtc_2_free;
- __uint32_t xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
- __uint32_t xs_bmbt_2_lookup;
- __uint32_t xs_bmbt_2_compare;
- __uint32_t xs_bmbt_2_insrec;
- __uint32_t xs_bmbt_2_delrec;
- __uint32_t xs_bmbt_2_newroot;
- __uint32_t xs_bmbt_2_killroot;
- __uint32_t xs_bmbt_2_increment;
- __uint32_t xs_bmbt_2_decrement;
- __uint32_t xs_bmbt_2_lshift;
- __uint32_t xs_bmbt_2_rshift;
- __uint32_t xs_bmbt_2_split;
- __uint32_t xs_bmbt_2_join;
- __uint32_t xs_bmbt_2_alloc;
- __uint32_t xs_bmbt_2_free;
- __uint32_t xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
- __uint32_t xs_ibt_2_lookup;
- __uint32_t xs_ibt_2_compare;
- __uint32_t xs_ibt_2_insrec;
- __uint32_t xs_ibt_2_delrec;
- __uint32_t xs_ibt_2_newroot;
- __uint32_t xs_ibt_2_killroot;
- __uint32_t xs_ibt_2_increment;
- __uint32_t xs_ibt_2_decrement;
- __uint32_t xs_ibt_2_lshift;
- __uint32_t xs_ibt_2_rshift;
- __uint32_t xs_ibt_2_split;
- __uint32_t xs_ibt_2_join;
- __uint32_t xs_ibt_2_alloc;
- __uint32_t xs_ibt_2_free;
- __uint32_t xs_ibt_2_moves;
-#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
- __uint32_t xs_fibt_2_lookup;
- __uint32_t xs_fibt_2_compare;
- __uint32_t xs_fibt_2_insrec;
- __uint32_t xs_fibt_2_delrec;
- __uint32_t xs_fibt_2_newroot;
- __uint32_t xs_fibt_2_killroot;
- __uint32_t xs_fibt_2_increment;
- __uint32_t xs_fibt_2_decrement;
- __uint32_t xs_fibt_2_lshift;
- __uint32_t xs_fibt_2_rshift;
- __uint32_t xs_fibt_2_split;
- __uint32_t xs_fibt_2_join;
- __uint32_t xs_fibt_2_alloc;
- __uint32_t xs_fibt_2_free;
- __uint32_t xs_fibt_2_moves;
-#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15)
- __uint32_t xs_rmap_2_lookup;
- __uint32_t xs_rmap_2_compare;
- __uint32_t xs_rmap_2_insrec;
- __uint32_t xs_rmap_2_delrec;
- __uint32_t xs_rmap_2_newroot;
- __uint32_t xs_rmap_2_killroot;
- __uint32_t xs_rmap_2_increment;
- __uint32_t xs_rmap_2_decrement;
- __uint32_t xs_rmap_2_lshift;
- __uint32_t xs_rmap_2_rshift;
- __uint32_t xs_rmap_2_split;
- __uint32_t xs_rmap_2_join;
- __uint32_t xs_rmap_2_alloc;
- __uint32_t xs_rmap_2_free;
- __uint32_t xs_rmap_2_moves;
-#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + 15)
- __uint32_t xs_refcbt_2_lookup;
- __uint32_t xs_refcbt_2_compare;
- __uint32_t xs_refcbt_2_insrec;
- __uint32_t xs_refcbt_2_delrec;
- __uint32_t xs_refcbt_2_newroot;
- __uint32_t xs_refcbt_2_killroot;
- __uint32_t xs_refcbt_2_increment;
- __uint32_t xs_refcbt_2_decrement;
- __uint32_t xs_refcbt_2_lshift;
- __uint32_t xs_refcbt_2_rshift;
- __uint32_t xs_refcbt_2_split;
- __uint32_t xs_refcbt_2_join;
- __uint32_t xs_refcbt_2_alloc;
- __uint32_t xs_refcbt_2_free;
- __uint32_t xs_refcbt_2_moves;
+#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX)
+ __uint32_t xs_abtb_2[__XBTS_MAX];
+#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
+ __uint32_t xs_abtc_2[__XBTS_MAX];
+#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
+ __uint32_t xs_bmbt_2[__XBTS_MAX];
+#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
+ __uint32_t xs_ibt_2[__XBTS_MAX];
+#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
+ __uint32_t xs_fibt_2[__XBTS_MAX];
+#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
+ __uint32_t xs_rmap_2[__XBTS_MAX];
+#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
+ __uint32_t xs_refcbt_2[__XBTS_MAX];
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6)
__uint32_t xs_qm_dqreclaims;
__uint32_t xs_qm_dqreclaim_misses;
@@ -245,26 +175,58 @@ struct xfsstats {
__uint64_t xs_read_bytes;
};
+struct xfsstats {
+ union {
+ struct __xfsstats s;
+ uint32_t a[XFSSTAT_END_XQMSTAT];
+ };
+};
+
+/*
+ * simple wrapper for getting the array index of s struct member offset
+ */
+#define XFS_STATS_CALC_INDEX(member) \
+ (offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
+
+
int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
void xfs_stats_clearall(struct xfsstats __percpu *stats);
extern struct xstats xfsstats;
#define XFS_STATS_INC(mp, v) \
do { \
- per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \
- per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v++; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v++; \
} while (0)
#define XFS_STATS_DEC(mp, v) \
do { \
- per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \
- per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v--; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v--; \
} while (0)
#define XFS_STATS_ADD(mp, v, inc) \
do { \
- per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \
- per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->s.v += (inc); \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->s.v += (inc); \
+} while (0)
+
+#define XFS_STATS_INC_OFF(mp, off) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]++; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]++; \
+} while (0)
+
+#define XFS_STATS_DEC_OFF(mp, off) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off]; \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off]; \
+} while (0)
+
+#define XFS_STATS_ADD_OFF(mp, off, inc) \
+do { \
+ per_cpu_ptr(xfsstats.xs_stats, current_cpu())->a[off] += (inc); \
+ per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->a[off] += (inc); \
} while (0)
#if defined(CONFIG_PROC_FS)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ade4691e3f74..eecbaac08eba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -104,9 +104,6 @@ static const match_table_t tokens = {
{Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */
{Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
{Opt_norecovery,"norecovery"}, /* don't run XFS recovery */
- {Opt_barrier, "barrier"}, /* use writer barriers for log write and
- * unwritten extent conversion */
- {Opt_nobarrier, "nobarrier"}, /* .. disable */
{Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */
{Opt_inode32, "inode32"}, /* inode allocation limited to
* XFS_MAXINUMBER_32 */
@@ -134,6 +131,12 @@ static const match_table_t tokens = {
{Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */
{Opt_dax, "dax"}, /* Enable direct access to bdev pages */
+
+ /* Deprecated mount options scheduled for removal */
+ {Opt_barrier, "barrier"}, /* use writer barriers for log write and
+ * unwritten extent conversion */
+ {Opt_nobarrier, "nobarrier"}, /* .. disable */
+
{Opt_err, NULL},
};
@@ -301,12 +304,6 @@ xfs_parseargs(
case Opt_nouuid:
mp->m_flags |= XFS_MOUNT_NOUUID;
break;
- case Opt_barrier:
- mp->m_flags |= XFS_MOUNT_BARRIER;
- break;
- case Opt_nobarrier:
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- break;
case Opt_ikeep:
mp->m_flags |= XFS_MOUNT_IKEEP;
break;
@@ -374,6 +371,14 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_DAX;
break;
#endif
+ case Opt_barrier:
+ xfs_warn(mp, "%s option is deprecated, ignoring.", p);
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ break;
+ case Opt_nobarrier:
+ xfs_warn(mp, "%s option is deprecated, ignoring.", p);
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ break;
default:
xfs_warn(mp, "unknown mount option [%s].", p);
return -EINVAL;
@@ -943,7 +948,7 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip);
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+ ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
@@ -1238,9 +1243,11 @@ xfs_fs_remount(
token = match_token(p, tokens, args);
switch (token) {
case Opt_barrier:
+ xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags |= XFS_MOUNT_BARRIER;
break;
case Opt_nobarrier:
+ xfs_warn(mp, "%s option is deprecated, ignoring.", p);
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 58142aeeeea6..f2cb45ed1d54 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -238,8 +238,7 @@ xfs_symlink(
if (error)
goto out_release_inode;
- xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
- XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
+ xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
/*
@@ -287,7 +286,7 @@ xfs_symlink(
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
- xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = false;
/*
@@ -412,7 +411,7 @@ out_release_inode:
xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error)
- xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error;
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0907752be62d..69c5bcd9a51b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -355,7 +355,6 @@ DEFINE_BUF_EVENT(xfs_buf_rele);
DEFINE_BUF_EVENT(xfs_buf_iodone);
DEFINE_BUF_EVENT(xfs_buf_submit);
DEFINE_BUF_EVENT(xfs_buf_submit_wait);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
DEFINE_BUF_EVENT(xfs_buf_trylock_fail);
@@ -367,19 +366,15 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse);
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
/* not really buffer traces, but the buf provides useful information */
DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
/* pass flags explicitly */
DECLARE_EVENT_CLASS(xfs_buf_flags_class,
@@ -541,7 +536,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
DECLARE_EVENT_CLASS(xfs_filestream_class,
TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
@@ -680,7 +674,6 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
DEFINE_INODE_EVENT(xfs_dir_fsync);
DEFINE_INODE_EVENT(xfs_file_fsync);
DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
DEFINE_INODE_EVENT(xfs_update_time);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@@ -798,7 +791,6 @@ TRACE_EVENT(xfs_irec_merge_post,
DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
@@ -939,7 +931,6 @@ DEFINE_DQUOT_EVENT(xfs_dqget_miss);
DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
DEFINE_DQUOT_EVENT(xfs_dqget_dup);
DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
DEFINE_DQUOT_EVENT(xfs_dqput_free);
DEFINE_DQUOT_EVENT(xfs_dqrele);
DEFINE_DQUOT_EVENT(xfs_dqflush);
@@ -1815,7 +1806,6 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_addname);
DEFINE_ATTR_EVENT(xfs_attr_sf_create);
DEFINE_ATTR_EVENT(xfs_attr_sf_lookup);
DEFINE_ATTR_EVENT(xfs_attr_sf_remove);
-DEFINE_ATTR_EVENT(xfs_attr_sf_removename);
DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf);
DEFINE_ATTR_EVENT(xfs_attr_leaf_add);
@@ -1844,7 +1834,6 @@ DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall);
DEFINE_ATTR_EVENT(xfs_attr_node_addname);
DEFINE_ATTR_EVENT(xfs_attr_node_get);
-DEFINE_ATTR_EVENT(xfs_attr_node_lookup);
DEFINE_ATTR_EVENT(xfs_attr_node_replace);
DEFINE_ATTR_EVENT(xfs_attr_node_removename);
@@ -2440,11 +2429,9 @@ DEFINE_DEFER_EVENT(xfs_defer_finish_done);
DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
-DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
-DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
@@ -3092,87 +3079,6 @@ DEFINE_EVENT(xfs_double_io_class, name, \
struct xfs_inode *dest, xfs_off_t doffset), \
TP_ARGS(src, soffset, len, dest, doffset))
-/* two-file vfs io tracepoint class */
-DECLARE_EVENT_CLASS(xfs_double_vfs_io_class,
- TP_PROTO(struct inode *src, u64 soffset, u64 len,
- struct inode *dest, u64 doffset),
- TP_ARGS(src, soffset, len, dest, doffset),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(unsigned long, src_ino)
- __field(loff_t, src_isize)
- __field(loff_t, src_offset)
- __field(size_t, len)
- __field(unsigned long, dest_ino)
- __field(loff_t, dest_isize)
- __field(loff_t, dest_offset)
- ),
- TP_fast_assign(
- __entry->dev = src->i_sb->s_dev;
- __entry->src_ino = src->i_ino;
- __entry->src_isize = i_size_read(src);
- __entry->src_offset = soffset;
- __entry->len = len;
- __entry->dest_ino = dest->i_ino;
- __entry->dest_isize = i_size_read(dest);
- __entry->dest_offset = doffset;
- ),
- TP_printk("dev %d:%d count %zd "
- "ino 0x%lx isize 0x%llx offset 0x%llx -> "
- "ino 0x%lx isize 0x%llx offset 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->len,
- __entry->src_ino,
- __entry->src_isize,
- __entry->src_offset,
- __entry->dest_ino,
- __entry->dest_isize,
- __entry->dest_offset)
-)
-
-#define DEFINE_DOUBLE_VFS_IO_EVENT(name) \
-DEFINE_EVENT(xfs_double_vfs_io_class, name, \
- TP_PROTO(struct inode *src, u64 soffset, u64 len, \
- struct inode *dest, u64 doffset), \
- TP_ARGS(src, soffset, len, dest, doffset))
-
-/* CoW write tracepoint */
-DECLARE_EVENT_CLASS(xfs_copy_on_write_class,
- TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk,
- xfs_extlen_t len, xfs_fsblock_t new_pblk),
- TP_ARGS(ip, lblk, pblk, len, new_pblk),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fileoff_t, lblk)
- __field(xfs_fsblock_t, pblk)
- __field(xfs_extlen_t, len)
- __field(xfs_fsblock_t, new_pblk)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->lblk = lblk;
- __entry->pblk = pblk;
- __entry->len = len;
- __entry->new_pblk = new_pblk;
- ),
- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx pblk 0x%llx "
- "len 0x%x new_pblk %llu",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->lblk,
- __entry->pblk,
- __entry->len,
- __entry->new_pblk)
-)
-
-#define DEFINE_COW_EVENT(name) \
-DEFINE_EVENT(xfs_copy_on_write_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, \
- xfs_extlen_t len, xfs_fsblock_t new_pblk), \
- TP_ARGS(ip, lblk, pblk, len, new_pblk))
-
/* inode/irec events */
DECLARE_EVENT_CLASS(xfs_inode_irec_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec),
@@ -3292,8 +3198,6 @@ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reflink_main_loop_error);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_read_iomap_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
@@ -3302,9 +3206,6 @@ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error);
/* ioctl tracepoints */
-DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_reflink);
-DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_clone_range);
-DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_file_extent_same);
TRACE_EVENT(xfs_ioctl_clone,
TP_PROTO(struct inode *src, struct inode *dest),
TP_ARGS(src, dest),
@@ -3334,11 +3235,7 @@ TRACE_EVENT(xfs_ioctl_clone,
/* unshare tracepoints */
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare);
-DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cow_eof_block);
-DEFINE_PAGE_EVENT(xfs_reflink_unshare_page);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_cow_eof_block_error);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_dirty_page_error);
/* copy on write */
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
@@ -3361,14 +3258,8 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
-DEFINE_COW_EVENT(xfs_reflink_fork_buf);
-DEFINE_COW_EVENT(xfs_reflink_finish_fork_buf);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_fork_buf_error);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_finish_fork_buf_error);
-DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
/* rmap swapext tracepoints */
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 62900938f26d..0594db435972 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -130,7 +130,7 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
-static int
+static void
__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
char *prefix,
@@ -148,7 +148,7 @@ __xfs_xattr_put_listent(
if (arraytop > context->firstu) {
context->count = -1; /* insufficient space */
context->seen_enough = 1;
- return 0;
+ return;
}
offset = (char *)context->alist + context->count;
strncpy(offset, prefix, prefix_len);
@@ -159,10 +159,10 @@ __xfs_xattr_put_listent(
compute_size:
context->count += prefix_len + namelen + 1;
- return 0;
+ return;
}
-static int
+static void
xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
@@ -180,23 +180,19 @@ xfs_xattr_put_listent(
if (namelen == SGI_ACL_FILE_SIZE &&
strncmp(name, SGI_ACL_FILE,
SGI_ACL_FILE_SIZE) == 0) {
- int ret = __xfs_xattr_put_listent(
+ __xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_ACCESS,
strlen(XATTR_POSIX_ACL_ACCESS));
- if (ret)
- return ret;
} else if (namelen == SGI_ACL_DEFAULT_SIZE &&
strncmp(name, SGI_ACL_DEFAULT,
SGI_ACL_DEFAULT_SIZE) == 0) {
- int ret = __xfs_xattr_put_listent(
+ __xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_DEFAULT,
strlen(XATTR_POSIX_ACL_DEFAULT));
- if (ret)
- return ret;
}
#endif
@@ -205,7 +201,7 @@ xfs_xattr_put_listent(
* see them.
*/
if (!capable(CAP_SYS_ADMIN))
- return 0;
+ return;
prefix = XATTR_TRUSTED_PREFIX;
prefix_len = XATTR_TRUSTED_PREFIX_LEN;
@@ -217,8 +213,9 @@ xfs_xattr_put_listent(
prefix_len = XATTR_USER_PREFIX_LEN;
}
- return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
- namelen);
+ __xfs_xattr_put_listent(context, prefix, prefix_len, name,
+ namelen);
+ return;
}
ssize_t