aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/scrub/dir.c15
-rw-r--r--fs/xfs/scrub/inode.c14
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/scrub/repair.c3
-rw-r--r--fs/xfs/scrub/scrub.c4
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/xfs_buf_item_recover.c2
-rw-r--r--fs/xfs/xfs_dquot.c79
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_icache.c3
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_ioctl.h5
-rw-r--r--fs/xfs/xfs_iops.c40
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_log_cil.c52
-rw-r--r--fs/xfs/xfs_log_recover.c26
-rw-r--r--fs/xfs/xfs_mount.c10
-rw-r--r--fs/xfs/xfs_qm_syscalls.c11
-rw-r--r--fs/xfs/xfs_reflink.c5
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--fs/xfs/xfs_symlink.c33
-rw-r--r--fs/xfs/xfs_sysfs.c16
-rw-r--r--fs/xfs/xfs_trans.c11
24 files changed, 185 insertions, 175 deletions
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 200a63f58fe7..38897adde7b5 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -497,6 +497,7 @@ STATIC int
xchk_directory_leaf1_bestfree(
struct xfs_scrub *sc,
struct xfs_da_args *args,
+ xfs_dir2_db_t last_data_db,
xfs_dablk_t lblk)
{
struct xfs_dir3_icleaf_hdr leafhdr;
@@ -534,10 +535,14 @@ xchk_directory_leaf1_bestfree(
}
/*
- * There should be as many bestfree slots as there are dir data
- * blocks that can fit under i_size.
+ * There must be enough bestfree slots to cover all the directory data
+ * blocks that we scanned. It is possible for there to be a hole
+ * between the last data block and i_disk_size. This seems like an
+ * oversight to the scrub author, but as we have been writing out
+ * directories like this (and xfs_repair doesn't mind them) for years,
+ * that's what we have to check.
*/
- if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_disk_size)) {
+ if (bestcount != last_data_db + 1) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out;
}
@@ -669,6 +674,7 @@ xchk_directory_blocks(
xfs_fileoff_t lblk;
struct xfs_iext_cursor icur;
xfs_dablk_t dabno;
+ xfs_dir2_db_t last_data_db = 0;
bool found;
int is_block = 0;
int error;
@@ -712,6 +718,7 @@ xchk_directory_blocks(
args.geo->fsbcount);
lblk < got.br_startoff + got.br_blockcount;
lblk += args.geo->fsbcount) {
+ last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
error = xchk_directory_data_bestfree(sc, lblk,
is_block);
if (error)
@@ -734,7 +741,7 @@ xchk_directory_blocks(
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out;
}
- error = xchk_directory_leaf1_bestfree(sc, &args,
+ error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
leaf_lblk);
if (error)
goto out;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 2405b09d03d0..eac15af7b08c 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -233,6 +233,7 @@ xchk_dinode(
unsigned long long isize;
uint64_t flags2;
uint32_t nextents;
+ prid_t prid;
uint16_t flags;
uint16_t mode;
@@ -267,6 +268,7 @@ xchk_dinode(
* so just mark this inode for preening.
*/
xchk_ino_set_preen(sc, ino);
+ prid = 0;
break;
case 2:
case 3:
@@ -279,12 +281,17 @@ xchk_dinode(
if (dip->di_projid_hi != 0 &&
!xfs_has_projid32(mp))
xchk_ino_set_corrupt(sc, ino);
+
+ prid = be16_to_cpu(dip->di_projid_lo);
break;
default:
xchk_ino_set_corrupt(sc, ino);
return;
}
+ if (xfs_has_projid32(mp))
+ prid |= (prid_t)be16_to_cpu(dip->di_projid_hi) << 16;
+
/*
* di_uid/di_gid -- -1 isn't invalid, but there's no way that
* userspace could have created that.
@@ -293,6 +300,13 @@ xchk_dinode(
dip->di_gid == cpu_to_be32(-1U))
xchk_ino_set_warning(sc, ino);
+ /*
+ * project id of -1 isn't supposed to be valid, but the kernel didn't
+ * always validate that.
+ */
+ if (prid == -1U)
+ xchk_ino_set_warning(sc, ino);
+
/* di_format */
switch (dip->di_format) {
case XFS_DINODE_FMT_DEV:
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index d6c1b00a4fc8..3c7506c7553c 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -48,10 +48,10 @@ xchk_setup_quota(
dqtype = xchk_quota_to_dqtype(sc);
if (dqtype == 0)
return -EINVAL;
- sc->flags |= XCHK_HAS_QUOTAOFFLOCK;
- mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
+
if (!xfs_this_quota_on(sc->mp, dqtype))
return -ENOENT;
+
error = xchk_setup_fs(sc);
if (error)
return error;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 8f3cba14ada3..1e7b6b209ee8 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -25,6 +25,7 @@
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_quota.h"
+#include "xfs_qm.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -912,11 +913,13 @@ xrep_force_quotacheck(
if (!(flag & sc->mp->m_qflags))
return;
+ mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
sc->mp->m_qflags &= ~flag;
spin_lock(&sc->mp->m_sb_lock);
sc->mp->m_sb.sb_qflags &= ~flag;
spin_unlock(&sc->mp->m_sb_lock);
xfs_log_sb(sc->tp);
+ mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
}
/*
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 8d528d35b725..b11870d07c56 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -173,10 +173,6 @@ xchk_teardown(
mnt_drop_write_file(sc->file);
if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc);
- if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
- mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
- sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
- }
if (sc->buf) {
kmem_free(sc->buf);
sc->buf = NULL;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 80e5026bba44..3de5287e98d8 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -88,7 +88,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
-#define XCHK_HAS_QUOTAOFFLOCK (1 << 1) /* we hold the quotaoff lock */
#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 70ca5751b13e..e484251dc9c8 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn(
}
if (lsn != (xfs_lsn_t)-1) {
- if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+ if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
goto recover_immediately;
return lsn;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e48ae227bb11..5afedcbc78c7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -289,13 +289,12 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
*/
STATIC int
xfs_dquot_disk_alloc(
- struct xfs_trans **tpp,
struct xfs_dquot *dqp,
struct xfs_buf **bpp)
{
struct xfs_bmbt_irec map;
- struct xfs_trans *tp = *tpp;
- struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_trans *tp;
+ struct xfs_mount *mp = dqp->q_mount;
struct xfs_buf *bp;
xfs_dqtype_t qtype = xfs_dquot_type(dqp);
struct xfs_inode *quotip = xfs_quota_inode(mp, qtype);
@@ -304,29 +303,35 @@ xfs_dquot_disk_alloc(
trace_xfs_dqalloc(dqp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
+ XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
+ if (error)
+ return error;
+
xfs_ilock(quotip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, quotip, 0);
+
if (!xfs_this_quota_on(dqp->q_mount, qtype)) {
/*
* Return if this type of quotas is turned off while we didn't
* have an inode lock
*/
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return -ESRCH;
+ error = -ESRCH;
+ goto err_cancel;
}
- xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
-
error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK,
XFS_IEXT_ADD_NOSPLIT_CNT);
if (error)
- return error;
+ goto err_cancel;
/* Create the block mapping. */
error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map,
&nmaps);
if (error)
- return error;
+ goto err_cancel;
+
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
ASSERT(nmaps == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -341,7 +346,7 @@ xfs_dquot_disk_alloc(
error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
if (error)
- return error;
+ goto err_cancel;
bp->b_ops = &xfs_dquot_buf_ops;
/*
@@ -371,16 +376,25 @@ xfs_dquot_disk_alloc(
* is responsible for unlocking any buffer passed back, either
* manually or by committing the transaction. On error, the buffer is
* released and not passed back.
+ *
+ * Keep the quota inode ILOCKed until after the transaction commit to
+ * maintain the atomicity of bmap/rmap updates.
*/
xfs_trans_bhold(tp, bp);
- error = xfs_defer_finish(tpp);
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(quotip, XFS_ILOCK_EXCL);
if (error) {
- xfs_trans_bhold_release(*tpp, bp);
- xfs_trans_brelse(*tpp, bp);
+ xfs_buf_relse(bp);
return error;
}
+
*bpp = bp;
return 0;
+
+err_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+ return error;
}
/*
@@ -629,43 +643,6 @@ xfs_dquot_to_disk(
ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer);
}
-/* Allocate and initialize the dquot buffer for this in-core dquot. */
-static int
-xfs_qm_dqread_alloc(
- struct xfs_mount *mp,
- struct xfs_dquot *dqp,
- struct xfs_buf **bpp)
-{
- struct xfs_trans *tp;
- int error;
-
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
- XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
- if (error)
- goto err;
-
- error = xfs_dquot_disk_alloc(&tp, dqp, bpp);
- if (error)
- goto err_cancel;
-
- error = xfs_trans_commit(tp);
- if (error) {
- /*
- * Buffer was held to the transaction, so we have to unlock it
- * manually here because we're not passing it back.
- */
- xfs_buf_relse(*bpp);
- *bpp = NULL;
- goto err;
- }
- return 0;
-
-err_cancel:
- xfs_trans_cancel(tp);
-err:
- return error;
-}
-
/*
* Read in the ondisk dquot using dqtobp() then copy it to an incore version,
* and release the buffer immediately. If @can_alloc is true, fill any
@@ -689,7 +666,7 @@ xfs_qm_dqread(
/* Try to read the buffer, allocating if necessary. */
error = xfs_dquot_disk_read(mp, dqp, &bp);
if (error == -ENOENT && can_alloc)
- error = xfs_qm_dqread_alloc(mp, dqp, &bp);
+ error = xfs_dquot_disk_alloc(dqp, &bp);
if (error)
goto err;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 81c445e9489b..749fd18c4f32 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -213,11 +213,12 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(ag_resv_fail),
NULL,
};
+ATTRIBUTE_GROUPS(xfs_errortag);
static struct kobj_type xfs_errortag_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_errortag_sysfs_ops,
- .default_attrs = xfs_errortag_attrs,
+ .default_groups = xfs_errortag_groups,
};
int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index da4af2142a2b..d019c98eb839 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -749,7 +749,8 @@ again:
/*
* If we have a real type for an on-disk inode, we can setup the inode
- * now. If it's a new inode being created, xfs_ialloc will handle it.
+ * now. If it's a new inode being created, xfs_init_new_inode will
+ * handle it.
*/
if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
xfs_setup_existing_inode(ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6771f357ad2c..04bf467b1090 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -988,8 +988,8 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1142,8 +1142,8 @@ xfs_create_tmpfile(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 174cd8950cb6..8ea47a9d5aad 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -372,7 +372,7 @@ int
xfs_ioc_attr_list(
struct xfs_inode *dp,
void __user *ubuf,
- int bufsize,
+ size_t bufsize,
int flags,
struct xfs_attrlist_cursor __user *ucursor)
{
@@ -687,7 +687,8 @@ xfs_ioc_space(
if (bf->l_start > XFS_ISIZE(ip)) {
error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
+ bf->l_start - XFS_ISIZE(ip),
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 28453a6d4461..845d3bcab74b 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -38,8 +38,9 @@ xfs_readlink_by_handle(
int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode,
uint32_t opcode, void __user *uname, void __user *value,
uint32_t *len, uint32_t flags);
-int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize,
- int flags, struct xfs_attrlist_cursor __user *ucursor);
+int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf,
+ size_t bufsize, int flags,
+ struct xfs_attrlist_cursor __user *ucursor);
extern struct dentry *
xfs_handle_to_dentry(
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a607d6aca5c4..3447c19e99da 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -511,27 +511,6 @@ xfs_vn_get_link(
return ERR_PTR(error);
}
-STATIC const char *
-xfs_vn_get_link_inline(
- struct dentry *dentry,
- struct inode *inode,
- struct delayed_call *done)
-{
- struct xfs_inode *ip = XFS_I(inode);
- char *link;
-
- ASSERT(ip->i_df.if_format == XFS_DINODE_FMT_LOCAL);
-
- /*
- * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
- * if_data is junk.
- */
- link = ip->i_df.if_u1.if_data;
- if (XFS_IS_CORRUPT(ip->i_mount, !link))
- return ERR_PTR(-EFSCORRUPTED);
- return link;
-}
-
static uint32_t
xfs_stat_blksize(
struct xfs_inode *ip)
@@ -1250,14 +1229,6 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.update_time = xfs_vn_update_time,
};
-static const struct inode_operations xfs_inline_symlink_inode_operations = {
- .get_link = xfs_vn_get_link_inline,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .listxattr = xfs_vn_listxattr,
- .update_time = xfs_vn_update_time,
-};
-
/* Figure out if this file actually supports DAX. */
static bool
xfs_inode_supports_dax(
@@ -1332,9 +1303,9 @@ xfs_diflags_to_iflags(
* Initialize the Linux inode.
*
* When reading existing inodes from disk this is called directly from xfs_iget,
- * when creating a new inode it is called from xfs_ialloc after setting up the
- * inode. These callers have different criteria for clearing XFS_INEW, so leave
- * it up to the caller to deal with unlocking the inode appropriately.
+ * when creating a new inode it is called from xfs_init_new_inode after setting
+ * up the inode. These callers have different criteria for clearing XFS_INEW, so
+ * leave it up to the caller to deal with unlocking the inode appropriately.
*/
void
xfs_setup_inode(
@@ -1408,10 +1379,7 @@ xfs_setup_iops(
inode->i_fop = &xfs_dir_file_operations;
break;
case S_IFLNK:
- if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL)
- inode->i_op = &xfs_inline_symlink_inode_operations;
- else
- inode->i_op = &xfs_symlink_inode_operations;
+ inode->i_op = &xfs_symlink_inode_operations;
break;
default:
inode->i_op = &xfs_inode_operations;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c174262a074e..09a8fba84ff9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/ratelimit.h>
#include <linux/rhashtable.h>
#include <linux/xattr.h>
+#include <linux/mnt_idmapping.h>
#include <asm/page.h>
#include <asm/div64.h>
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 6c93c8ada6f3..83a039762b81 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -103,6 +103,39 @@ xlog_cil_iovec_space(
}
/*
+ * shadow buffers can be large, so we need to use kvmalloc() here to ensure
+ * success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts to fall
+ * back to vmalloc, so we can't actually do anything useful with gfp flags to
+ * control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() will do
+ * direct reclaim and compaction in the slow path, both of which are
+ * horrendously expensive. We just want kmalloc to fail fast and fall back to
+ * vmalloc if it can't get somethign straight away from the free lists or buddy
+ * allocator. Hence we have to open code kvmalloc outselves here.
+ *
+ * Also, we are in memalloc_nofs_save task context here, so despite the use of
+ * GFP_KERNEL here, we are actually going to be doing GFP_NOFS allocations. This
+ * is actually the only way to make vmalloc() do GFP_NOFS allocations, so lets
+ * just all pretend this is a GFP_KERNEL context operation....
+ */
+static inline void *
+xlog_cil_kvmalloc(
+ size_t buf_size)
+{
+ gfp_t flags = GFP_KERNEL;
+ void *p;
+
+ flags &= ~__GFP_DIRECT_RECLAIM;
+ flags |= __GFP_NOWARN | __GFP_NORETRY;
+ do {
+ p = kmalloc(buf_size, flags);
+ if (!p)
+ p = vmalloc(buf_size);
+ } while (!p);
+
+ return p;
+}
+
+/*
* Allocate or pin log vector buffers for CIL insertion.
*
* The CIL currently uses disposable buffers for copying a snapshot of the
@@ -203,25 +236,16 @@ xlog_cil_alloc_shadow_bufs(
*/
if (!lip->li_lv_shadow ||
buf_size > lip->li_lv_shadow->lv_size) {
-
/*
* We free and allocate here as a realloc would copy
- * unnecessary data. We don't use kmem_zalloc() for the
+ * unnecessary data. We don't use kvzalloc() for the
* same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec
* storage.
*/
kmem_free(lip->li_lv_shadow);
+ lv = xlog_cil_kvmalloc(buf_size);
- /*
- * We are in transaction context, which means this
- * allocation will pick up GFP_NOFS from the
- * memalloc_nofs_save/restore context the transaction
- * holds. This means we can use GFP_KERNEL here so the
- * generic kvmalloc() code will run vmalloc on
- * contiguous page allocation failure as we require.
- */
- lv = kvmalloc(buf_size, GFP_KERNEL);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
@@ -1442,9 +1466,9 @@ out_shutdown:
*/
bool
xfs_log_item_in_current_chkpt(
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip)
{
- struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
+ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
if (list_empty(&lip->li_cil))
return false;
@@ -1454,7 +1478,7 @@ xfs_log_item_in_current_chkpt(
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
- return lip->li_seq == ctx->sequence;
+ return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
}
/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 53366cc0bc9e..96c997ed2ec8 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -27,7 +27,7 @@
#include "xfs_buf_item.h"
#include "xfs_ag.h"
#include "xfs_quota.h"
-
+#include "xfs_reflink.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
@@ -3498,6 +3498,28 @@ xlog_recover_finish(
xlog_recover_process_iunlinks(log);
xlog_recover_check_summary(log);
+
+ /*
+ * Recover any CoW staging blocks that are still referenced by the
+ * ondisk refcount metadata. During mount there cannot be any live
+ * staging extents as we have not permitted any user modifications.
+ * Therefore, it is safe to free them all right now, even on a
+ * read-only mount.
+ */
+ error = xfs_reflink_recover_cow(log->l_mp);
+ if (error) {
+ xfs_alert(log->l_mp,
+ "Failed to recover leftover CoW staging extents, err %d.",
+ error);
+ /*
+ * If we get an error here, make sure the log is shut down
+ * but return zero so that any log items committed since the
+ * end of intents processing can be pushed through the CIL
+ * and AIL.
+ */
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ }
+
return 0;
}
@@ -3528,8 +3550,6 @@ xlog_recover_check_summary(
uint64_t ifree;
int error;
- mp = log->l_mp;
-
freeblks = 0LL;
itotal = 0LL;
ifree = 0LL;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 359109b6f0d3..bed73e8002a5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -936,15 +936,6 @@ xfs_mountfs(
xfs_warn(mp,
"Unable to allocate reserve blocks. Continuing without reserve pool.");
- /* Recover any CoW blocks that never got remapped. */
- error = xfs_reflink_recover_cow(mp);
- if (error) {
- xfs_err(mp,
- "Error %d recovering leftover CoW allocations.", error);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- goto out_quota;
- }
-
/* Reserve AG blocks for future btree expansion. */
error = xfs_fs_reserve_ag_blocks(mp);
if (error && error != -ENOSPC)
@@ -955,7 +946,6 @@ xfs_mountfs(
out_agresv:
xfs_fs_unreserve_ag_blocks(mp);
- out_quota:
xfs_qm_unmount_quotas(mp);
out_rtunmount:
xfs_rtunmount_inodes(mp);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 47fe60e1a887..7d5a31827681 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -303,13 +303,6 @@ xfs_qm_scall_setqlim(
return 0;
/*
- * We don't want to race with a quotaoff so take the quotaoff lock.
- * We don't hold an inode lock, so there's nothing else to stop
- * a quotaoff from happening.
- */
- mutex_lock(&q->qi_quotaofflock);
-
- /*
* Get the dquot (locked) before we start, as we need to do a
* transaction to allocate it if it doesn't exist. Once we have the
* dquot, unlock it so we can start the next transaction safely. We hold
@@ -319,7 +312,7 @@ xfs_qm_scall_setqlim(
error = xfs_qm_dqget(mp, id, type, true, &dqp);
if (error) {
ASSERT(error != -ENOENT);
- goto out_unlock;
+ return error;
}
defq = xfs_get_defquota(q, xfs_dquot_type(dqp));
@@ -415,8 +408,6 @@ xfs_qm_scall_setqlim(
out_rele:
xfs_qm_dqrele(dqp);
-out_unlock:
- mutex_unlock(&q->qi_quotaofflock);
return error;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cb0edb1d68ef..8b6c7163f684 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -749,7 +749,10 @@ xfs_reflink_end_cow(
}
/*
- * Free leftover CoW reservations that didn't get cleaned out.
+ * Free all CoW staging blocks that are still referenced by the ondisk refcount
+ * metadata. The ondisk metadata does not track which inode created the
+ * staging extent, so callers must ensure that there are no cached inodes with
+ * live CoW staging extents.
*/
int
xfs_reflink_recover_cow(
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 778b57b1f020..c7ac486ca5d3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1739,15 +1739,6 @@ xfs_remount_rw(
*/
xfs_restore_resvblks(mp);
xfs_log_work_queue(mp);
-
- /* Recover any CoW blocks that never got remapped. */
- error = xfs_reflink_recover_cow(mp);
- if (error) {
- xfs_err(mp,
- "Error %d recovering leftover CoW allocations.", error);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
xfs_blockgc_start(mp);
/* Create the per-AG metadata reservation pool .*/
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index fc2c6a404647..affbedf78160 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
+#include "xfs_error.h"
/* ----- Kernel only functions below ----- */
int
@@ -96,17 +97,15 @@ xfs_readlink_bmap_ilocked(
int
xfs_readlink(
- struct xfs_inode *ip,
- char *link)
+ struct xfs_inode *ip,
+ char *link)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fsize_t pathlen;
- int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsize_t pathlen;
+ int error = -EFSCORRUPTED;
trace_xfs_readlink(ip);
- ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
-
if (xfs_is_shutdown(mp))
return -EIO;
@@ -121,12 +120,22 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
ASSERT(0);
- error = -EFSCORRUPTED;
goto out;
}
-
- error = xfs_readlink_bmap_ilocked(ip, link);
+ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+ /*
+ * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED
+ * if if_data is junk.
+ */
+ if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_u1.if_data))
+ goto out;
+
+ memcpy(link, ip->i_df.if_u1.if_data, pathlen + 1);
+ error = 0;
+ } else {
+ error = xfs_readlink_bmap_ilocked(ip, link);
+ }
out:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -184,8 +193,8 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 8608f804388f..574b80c29fe1 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -67,11 +67,12 @@ static const struct sysfs_ops xfs_sysfs_ops = {
static struct attribute *xfs_mp_attrs[] = {
NULL,
};
+ATTRIBUTE_GROUPS(xfs_mp);
struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_mp_attrs,
+ .default_groups = xfs_mp_groups,
};
#ifdef DEBUG
@@ -239,11 +240,12 @@ static struct attribute *xfs_dbg_attrs[] = {
#endif
NULL,
};
+ATTRIBUTE_GROUPS(xfs_dbg);
struct kobj_type xfs_dbg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_dbg_attrs,
+ .default_groups = xfs_dbg_groups,
};
#endif /* DEBUG */
@@ -296,11 +298,12 @@ static struct attribute *xfs_stats_attrs[] = {
ATTR_LIST(stats_clear),
NULL,
};
+ATTRIBUTE_GROUPS(xfs_stats);
struct kobj_type xfs_stats_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_stats_attrs,
+ .default_groups = xfs_stats_groups,
};
/* xlog */
@@ -381,11 +384,12 @@ static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(write_grant_head),
NULL,
};
+ATTRIBUTE_GROUPS(xfs_log);
struct kobj_type xfs_log_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_log_attrs,
+ .default_groups = xfs_log_groups,
};
/*
@@ -534,12 +538,12 @@ static struct attribute *xfs_error_attrs[] = {
ATTR_LIST(retry_timeout_seconds),
NULL,
};
-
+ATTRIBUTE_GROUPS(xfs_error);
static struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
- .default_attrs = xfs_error_attrs,
+ .default_groups = xfs_error_groups,
};
static struct kobj_type xfs_error_ktype = {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 234a9d9c2f43..59e2f9031b9f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -942,8 +942,17 @@ xfs_trans_cancel(
trace_xfs_trans_cancel(tp, _RET_IP_);
- if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
+ /*
+ * It's never valid to cancel a transaction with deferred ops attached,
+ * because the transaction is effectively dirty. Complain about this
+ * loudly before freeing the in-memory defer items.
+ */
+ if (!list_empty(&tp->t_dfops)) {
+ ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ dirty = true;
xfs_defer_cancel(tp);
+ }
/*
* See if the caller is relying on us to shut down the