aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile71
-rw-r--r--fs/xfs/kmem.c21
-rw-r--r--fs/xfs/libxfs/xfs_ag.h (renamed from fs/xfs/xfs_ag.h)42
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c (renamed from fs/xfs/xfs_alloc.c)78
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h (renamed from fs/xfs/xfs_alloc.h)0
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c (renamed from fs/xfs/xfs_alloc_btree.c)19
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h (renamed from fs/xfs/xfs_alloc_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr.c (renamed from fs/xfs/xfs_attr.c)444
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c (renamed from fs/xfs/xfs_attr_leaf.c)298
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h (renamed from fs/xfs/xfs_attr_leaf.h)3
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c (renamed from fs/xfs/xfs_attr_remote.c)101
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h (renamed from fs/xfs/xfs_attr_remote.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h (renamed from fs/xfs/xfs_attr_sf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bit.h (renamed from fs/xfs/xfs_bit.h)7
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c (renamed from fs/xfs/xfs_bmap.c)506
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h (renamed from fs/xfs/xfs_bmap.h)23
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c (renamed from fs/xfs/xfs_bmap_btree.c)120
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h (renamed from fs/xfs/xfs_bmap_btree.h)2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c (renamed from fs/xfs/xfs_btree.c)198
-rw-r--r--fs/xfs/libxfs/xfs_btree.h (renamed from fs/xfs/xfs_btree.h)7
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h (renamed from fs/xfs/xfs_cksum.h)0
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c (renamed from fs/xfs/xfs_da_btree.c)241
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h (renamed from fs/xfs/xfs_da_btree.h)28
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c (renamed from fs/xfs/xfs_da_format.c)36
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h (renamed from fs/xfs/xfs_da_format.h)154
-rw-r--r--fs/xfs/libxfs/xfs_dinode.h (renamed from fs/xfs/xfs_dinode.h)2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c (renamed from fs/xfs/xfs_dir2.c)470
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h (renamed from fs/xfs/xfs_dir2.h)30
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c (renamed from fs/xfs/xfs_dir2_block.c)130
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c (renamed from fs/xfs/xfs_dir2_data.c)111
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c (renamed from fs/xfs/xfs_dir2_leaf.c)241
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c (renamed from fs/xfs/xfs_dir2_node.c)271
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h (renamed from fs/xfs/xfs_dir2_priv.h)142
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c (renamed from fs/xfs/xfs_dir2_sf.c)168
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c (renamed from fs/xfs/xfs_dquot_buf.c)20
-rw-r--r--fs/xfs/libxfs/xfs_format.h (renamed from fs/xfs/xfs_format.h)30
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c (renamed from fs/xfs/xfs_ialloc.c)819
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h (renamed from fs/xfs/xfs_ialloc.h)23
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c (renamed from fs/xfs/xfs_ialloc_btree.c)87
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h (renamed from fs/xfs/xfs_ialloc_btree.h)3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c (renamed from fs/xfs/xfs_inode_buf.c)34
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h (renamed from fs/xfs/xfs_inode_buf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c (renamed from fs/xfs/xfs_inode_fork.c)56
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h (renamed from fs/xfs/xfs_inode_fork.h)3
-rw-r--r--fs/xfs/libxfs/xfs_inum.h (renamed from fs/xfs/xfs_inum.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h (renamed from fs/xfs/xfs_log_format.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h (renamed from fs/xfs/xfs_log_recover.h)0
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c (renamed from fs/xfs/xfs_log_rlimit.c)2
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h (renamed from fs/xfs/xfs_quota_defs.h)4
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c (renamed from fs/xfs/xfs_rtbitmap.c)1
-rw-r--r--fs/xfs/libxfs/xfs_sb.c (renamed from fs/xfs/xfs_sb.c)116
-rw-r--r--fs/xfs/libxfs/xfs_sb.h (renamed from fs/xfs/xfs_sb.h)245
-rw-r--r--fs/xfs/libxfs/xfs_shared.h (renamed from fs/xfs/xfs_shared.h)6
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c (renamed from fs/xfs/xfs_symlink_remote.c)21
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c (renamed from fs/xfs/xfs_trans_resv.c)146
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h (renamed from fs/xfs/xfs_trans_resv.h)3
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h (renamed from fs/xfs/xfs_trans_space.h)12
-rw-r--r--fs/xfs/xfs_acl.c161
-rw-r--r--fs/xfs/xfs_acl.h9
-rw-r--r--fs/xfs/xfs_aops.c290
-rw-r--r--fs/xfs/xfs_attr_inactive.c22
-rw-r--r--fs/xfs/xfs_attr_list.c48
-rw-r--r--fs/xfs/xfs_bmap_util.c389
-rw-r--r--fs/xfs/xfs_bmap_util.h15
-rw-r--r--fs/xfs/xfs_buf.c161
-rw-r--r--fs/xfs/xfs_buf.h54
-rw-r--r--fs/xfs/xfs_buf_item.c152
-rw-r--r--fs/xfs/xfs_dir2_readdir.c161
-rw-r--r--fs/xfs/xfs_discard.c18
-rw-r--r--fs/xfs/xfs_dquot.c107
-rw-r--r--fs/xfs/xfs_dquot.h17
-rw-r--r--fs/xfs/xfs_dquot_item.c67
-rw-r--r--fs/xfs/xfs_dquot_item.h3
-rw-r--r--fs/xfs/xfs_error.c50
-rw-r--r--fs/xfs/xfs_error.h14
-rw-r--r--fs/xfs/xfs_export.c8
-rw-r--r--fs/xfs/xfs_extfree_item.c23
-rw-r--r--fs/xfs/xfs_file.c276
-rw-r--r--fs/xfs/xfs_filestream.c684
-rw-r--r--fs/xfs/xfs_filestream.h34
-rw-r--r--fs/xfs/xfs_fs.h8
-rw-r--r--fs/xfs/xfs_fsops.c89
-rw-r--r--fs/xfs/xfs_icache.c160
-rw-r--r--fs/xfs/xfs_icache.h19
-rw-r--r--fs/xfs/xfs_icreate_item.c10
-rw-r--r--fs/xfs/xfs_inode.c447
-rw-r--r--fs/xfs/xfs_inode.h31
-rw-r--r--fs/xfs/xfs_inode_item.c378
-rw-r--r--fs/xfs/xfs_inode_item.h5
-rw-r--r--fs/xfs/xfs_ioctl.c306
-rw-r--r--fs/xfs/xfs_ioctl32.c110
-rw-r--r--fs/xfs/xfs_iomap.c69
-rw-r--r--fs/xfs/xfs_iops.c291
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c589
-rw-r--r--fs/xfs/xfs_itable.h23
-rw-r--r--fs/xfs/xfs_linux.h29
-rw-r--r--fs/xfs/xfs_log.c143
-rw-r--r--fs/xfs/xfs_log.h57
-rw-r--r--fs/xfs/xfs_log_cil.c195
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c335
-rw-r--r--fs/xfs/xfs_mount.c159
-rw-r--r--fs/xfs/xfs_mount.h13
-rw-r--r--fs/xfs/xfs_mru_cache.c163
-rw-r--r--fs/xfs/xfs_mru_cache.h31
-rw-r--r--fs/xfs/xfs_qm.c452
-rw-r--r--fs/xfs/xfs_qm.h19
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c66
-rw-r--r--fs/xfs/xfs_quota_priv.h42
-rw-r--r--fs/xfs/xfs_quotaops.c41
-rw-r--r--fs/xfs/xfs_rtalloc.c26
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_super.c155
-rw-r--r--fs/xfs/xfs_super.h15
-rw-r--r--fs/xfs/xfs_symlink.c38
-rw-r--r--fs/xfs/xfs_sysfs.c165
-rw-r--r--fs/xfs/xfs_sysfs.h59
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h61
-rw-r--r--fs/xfs/xfs_trans.c24
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c9
-rw-r--r--fs/xfs/xfs_trans_buf.c49
-rw-r--r--fs/xfs/xfs_trans_dquot.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_types.h31
-rw-r--r--fs/xfs/xfs_vnode.h55
-rw-r--r--fs/xfs/xfs_xattr.c10
133 files changed, 7101 insertions, 6284 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 399e8cec6e60..5d47b4df61ea 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,7 @@
config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
+ depends on (64BIT || LBDAF)
select EXPORTFS
select LIBCRC32C
help
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c21f43506661..d61799949580 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -17,6 +17,7 @@
#
ccflags-y += -I$(src) # needed for trace events
+ccflags-y += -I$(src)/libxfs
ccflags-$(CONFIG_XFS_DEBUG) += -g
@@ -25,6 +26,39 @@ obj-$(CONFIG_XFS_FS) += xfs.o
# this one should be compiled first, as the tracing macros can easily blow up
xfs-y += xfs_trace.o
+# build the libxfs code first
+xfs-y += $(addprefix libxfs/, \
+ xfs_alloc.o \
+ xfs_alloc_btree.o \
+ xfs_attr.o \
+ xfs_attr_leaf.o \
+ xfs_attr_remote.o \
+ xfs_bmap.o \
+ xfs_bmap_btree.o \
+ xfs_btree.o \
+ xfs_da_btree.o \
+ xfs_da_format.o \
+ xfs_dir2.o \
+ xfs_dir2_block.o \
+ xfs_dir2_data.o \
+ xfs_dir2_leaf.o \
+ xfs_dir2_node.o \
+ xfs_dir2_sf.o \
+ xfs_dquot_buf.o \
+ xfs_ialloc.o \
+ xfs_ialloc_btree.o \
+ xfs_inode_fork.o \
+ xfs_inode_buf.o \
+ xfs_log_rlimit.o \
+ xfs_sb.o \
+ xfs_symlink_remote.o \
+ xfs_trans_resv.o \
+ )
+# xfs_rtbitmap is shared with libxfs
+xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
+ xfs_rtbitmap.o \
+ )
+
# highlevel code
xfs-y += xfs_aops.o \
xfs_attr_inactive.o \
@@ -45,53 +79,27 @@ xfs-y += xfs_aops.o \
xfs_ioctl.o \
xfs_iomap.o \
xfs_iops.o \
+ xfs_inode.o \
xfs_itable.o \
xfs_message.o \
xfs_mount.o \
xfs_mru_cache.o \
xfs_super.o \
xfs_symlink.o \
+ xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
kmem.o \
uuid.o
-# code shared with libxfs
-xfs-y += xfs_alloc.o \
- xfs_alloc_btree.o \
- xfs_attr.o \
- xfs_attr_leaf.o \
- xfs_attr_remote.o \
- xfs_bmap.o \
- xfs_bmap_btree.o \
- xfs_btree.o \
- xfs_da_btree.o \
- xfs_da_format.o \
- xfs_dir2.o \
- xfs_dir2_block.o \
- xfs_dir2_data.o \
- xfs_dir2_leaf.o \
- xfs_dir2_node.o \
- xfs_dir2_sf.o \
- xfs_dquot_buf.o \
- xfs_ialloc.o \
- xfs_ialloc_btree.o \
- xfs_icreate_item.o \
- xfs_inode.o \
- xfs_inode_fork.o \
- xfs_inode_buf.o \
- xfs_log_recover.o \
- xfs_log_rlimit.o \
- xfs_sb.o \
- xfs_symlink_remote.o \
- xfs_trans_resv.o
-
# low-level transaction/log code
xfs-y += xfs_log.o \
xfs_log_cil.o \
xfs_buf_item.o \
xfs_extfree_item.o \
+ xfs_icreate_item.o \
xfs_inode_item.o \
+ xfs_log_recover.o \
xfs_trans_ail.o \
xfs_trans_buf.o \
xfs_trans_extfree.o \
@@ -107,8 +115,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs_quotaops.o
# xfs_rtbitmap is shared with libxfs
-xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \
- xfs_rtbitmap.o
+xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 66a36befc5c0..844e288b9576 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
void *
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
{
+ unsigned noio_flag = 0;
void *ptr;
+ gfp_t lflags;
ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
if (ptr)
return ptr;
- return vzalloc(size);
+
+ /*
+ * __vmalloc() will allocate data pages and auxillary structures (e.g.
+ * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
+ * here. Hence we need to tell memory reclaim that we are in such a
+ * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+ * the filesystem here and potentially deadlocking.
+ */
+ if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ noio_flag = memalloc_noio_save();
+
+ lflags = kmem_flags_convert(flags);
+ ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+
+ if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+ memalloc_noio_restore(noio_flag);
+
+ return ptr;
}
void
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 3fc109819c34..6e247a99f5db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
/* structure must be padded to 64 bit alignment */
} xfs_agf_t;
+#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
+
#define XFS_AGF_MAGICNUM 0x00000001
#define XFS_AGF_VERSIONNUM 0x00000002
#define XFS_AGF_SEQNO 0x00000004
@@ -158,28 +160,38 @@ typedef struct xfs_agi {
* still being referenced.
*/
__be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
-
+ /*
+ * This marks the end of logging region 1 and start of logging region 2.
+ */
uuid_t agi_uuid; /* uuid of filesystem */
__be32 agi_crc; /* crc of agi sector */
__be32 agi_pad32;
__be64 agi_lsn; /* last write sequence */
+ __be32 agi_free_root; /* root of the free inode btree */
+ __be32 agi_free_level;/* levels in free inode btree */
+
/* structure must be padded to 64 bit alignment */
} xfs_agi_t;
-#define XFS_AGI_MAGICNUM 0x00000001
-#define XFS_AGI_VERSIONNUM 0x00000002
-#define XFS_AGI_SEQNO 0x00000004
-#define XFS_AGI_LENGTH 0x00000008
-#define XFS_AGI_COUNT 0x00000010
-#define XFS_AGI_ROOT 0x00000020
-#define XFS_AGI_LEVEL 0x00000040
-#define XFS_AGI_FREECOUNT 0x00000080
-#define XFS_AGI_NEWINO 0x00000100
-#define XFS_AGI_DIRINO 0x00000200
-#define XFS_AGI_UNLINKED 0x00000400
-#define XFS_AGI_NUM_BITS 11
-#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1)
+#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
+
+#define XFS_AGI_MAGICNUM (1 << 0)
+#define XFS_AGI_VERSIONNUM (1 << 1)
+#define XFS_AGI_SEQNO (1 << 2)
+#define XFS_AGI_LENGTH (1 << 3)
+#define XFS_AGI_COUNT (1 << 4)
+#define XFS_AGI_ROOT (1 << 5)
+#define XFS_AGI_LEVEL (1 << 6)
+#define XFS_AGI_FREECOUNT (1 << 7)
+#define XFS_AGI_NEWINO (1 << 8)
+#define XFS_AGI_DIRINO (1 << 9)
+#define XFS_AGI_UNLINKED (1 << 10)
+#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
+#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
+#define XFS_AGI_FREE_ROOT (1 << 11)
+#define XFS_AGI_FREE_LEVEL (1 << 12)
+#define XFS_AGI_NUM_BITS_R2 13
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
@@ -222,6 +234,8 @@ typedef struct xfs_agfl {
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
} xfs_agfl_t;
+#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
+
/*
* tags for inode radix tree
*/
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9eab2dfdcbb5..4bffffe038a1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -257,16 +257,14 @@ xfs_alloc_fix_len(
k = rlen % args->prod;
if (k == args->mod)
return;
- if (k > args->mod) {
- if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
- return;
- } else {
- if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
- (int)args->minlen)
- return;
- }
- ASSERT(rlen >= args->minlen);
- ASSERT(rlen <= args->maxlen);
+ if (k > args->mod)
+ rlen = rlen - (k - args->mod);
+ else
+ rlen = rlen - args->prod + (args->mod - k);
+ if ((int)rlen < (int)args->minlen)
+ return;
+ ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
+ ASSERT(rlen % args->prod == args->mod);
args->len = rlen;
}
@@ -474,7 +472,6 @@ xfs_agfl_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agfl_ok = 1;
/*
* There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +482,13 @@ xfs_agfl_read_verify(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agfl, agfl_crc));
+ if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_agfl_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
- agfl_ok = agfl_ok && xfs_agfl_verify(bp);
-
- if (!agfl_ok) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -508,16 +503,15 @@ xfs_agfl_write_verify(
return;
if (!xfs_agfl_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
if (bip)
XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agfl, agfl_crc));
+ xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -545,7 +539,6 @@ xfs_alloc_read_agfl(
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (error)
return error;
- ASSERT(!xfs_buf_geterror(bp));
xfs_buf_set_ref(bp, XFS_AGFL_REF);
*bpp = bp;
return 0;
@@ -566,7 +559,7 @@ xfs_alloc_update_counters(
xfs_trans_agblocks_delta(tp, len);
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
be32_to_cpu(agf->agf_length)))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
return 0;
@@ -2238,19 +2231,17 @@ xfs_agf_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agf_ok = 1;
-
- if (xfs_sb_version_hascrc(&mp->m_sb))
- agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agf, agf_crc));
- agf_ok = agf_ok && xfs_agf_verify(mp, bp);
-
- if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
- XFS_RANDOM_ALLOC_READ_AGF))) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+ XFS_ERRTAG_ALLOC_READ_AGF,
+ XFS_RANDOM_ALLOC_READ_AGF))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -2261,8 +2252,8 @@ xfs_agf_write_verify(
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -2272,8 +2263,7 @@ xfs_agf_write_verify(
if (bip)
XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agf, agf_crc));
+ xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
@@ -2611,11 +2601,11 @@ xfs_free_extent(
*/
args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
if (args.agno >= args.mp->m_sb.sb_agcount)
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
if (args.agbno >= args.mp->m_sb.sb_agblocks)
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
args.pag = xfs_perag_get(args.mp, args.agno);
ASSERT(args.pag);
@@ -2627,7 +2617,7 @@ xfs_free_extent(
/* validate the extent size is legal now we have the agf locked */
if (args.agbno + len >
be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index feacb061bab7..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 13085429e523..e0e83e24d3ef 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -70,7 +70,6 @@ xfs_allocbt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
- int length,
int *stat)
{
int error;
@@ -355,12 +354,14 @@ static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_sblock_verify_crc(bp) &&
- xfs_allocbt_verify(bp))) {
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_allocbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
}
}
@@ -370,9 +371,9 @@ xfs_allocbt_write_verify(
{
if (!xfs_allocbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_btree_sblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45e189e7e81c..45e189e7e81c 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index b86127072ac3..353fb425faef 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -77,17 +77,27 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
STATIC int
-xfs_attr_name_to_xname(
- struct xfs_name *xname,
- const unsigned char *aname)
+xfs_attr_args_init(
+ struct xfs_da_args *args,
+ struct xfs_inode *dp,
+ const unsigned char *name,
+ int flags)
{
- if (!aname)
- return EINVAL;
- xname->name = aname;
- xname->len = strlen((char *)aname);
- if (xname->len >= MAXNAMELEN)
- return EFAULT; /* match IRIX behaviour */
+ if (!name)
+ return -EINVAL;
+
+ memset(args, 0, sizeof(*args));
+ args->geo = dp->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK;
+ args->dp = dp;
+ args->flags = flags;
+ args->name = name;
+ args->namelen = strlen((const char *)name);
+ if (args->namelen >= MAXNAMELEN)
+ return -EFAULT; /* match IRIX behaviour */
+
+ args->hashval = xfs_da_hashname(args->name, args->namelen);
return 0;
}
@@ -106,78 +116,46 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
-STATIC int
-xfs_attr_get_int(
+int
+xfs_attr_get(
struct xfs_inode *ip,
- struct xfs_name *name,
+ const unsigned char *name,
unsigned char *value,
int *valuelenp,
int flags)
{
- xfs_da_args_t args;
- int error;
+ struct xfs_da_args args;
+ uint lock_mode;
+ int error;
+
+ XFS_STATS_INC(xs_attr_get);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
if (!xfs_inode_hasattr(ip))
- return ENOATTR;
+ return -ENOATTR;
+
+ error = xfs_attr_args_init(&args, ip, name, flags);
+ if (error)
+ return error;
- /*
- * Fill in the arg structure for this request.
- */
- memset((char *)&args, 0, sizeof(args));
- args.name = name->name;
- args.namelen = name->len;
args.value = value;
args.valuelen = *valuelenp;
- args.flags = flags;
- args.hashval = xfs_da_hashname(args.name, args.namelen);
- args.dp = ip;
- args.whichfork = XFS_ATTR_FORK;
- /*
- * Decide on what work routines to call based on the inode size.
- */
- if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ lock_mode = xfs_ilock_attr_map_shared(ip);
+ if (!xfs_inode_hasattr(ip))
+ error = -ENOATTR;
+ else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
error = xfs_attr_shortform_getvalue(&args);
- } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
+ else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
error = xfs_attr_leaf_get(&args);
- } else {
+ else
error = xfs_attr_node_get(&args);
- }
+ xfs_iunlock(ip, lock_mode);
- /*
- * Return the number of bytes in the value to the caller.
- */
*valuelenp = args.valuelen;
-
- if (error == EEXIST)
- error = 0;
- return(error);
-}
-
-int
-xfs_attr_get(
- xfs_inode_t *ip,
- const unsigned char *name,
- unsigned char *value,
- int *valuelenp,
- int flags)
-{
- int error;
- struct xfs_name xname;
-
- XFS_STATS_INC(xs_attr_get);
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return(EIO);
-
- error = xfs_attr_name_to_xname(&xname, name);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return(error);
+ return error == -EEXIST ? 0 : error;
}
/*
@@ -185,12 +163,10 @@ xfs_attr_get(
*/
STATIC int
xfs_attr_calc_size(
- struct xfs_inode *ip,
- int namelen,
- int valuelen,
+ struct xfs_da_args *args,
int *local)
{
- struct xfs_mount *mp = ip->i_mount;
+ struct xfs_mount *mp = args->dp->i_mount;
int size;
int nblks;
@@ -198,12 +174,10 @@ xfs_attr_calc_size(
* Determine space new attribute will use, and if it would be
* "local" or "remote" (note: local != inline).
*/
- size = xfs_attr_leaf_newentsize(namelen, valuelen,
- mp->m_sb.sb_blocksize, local);
-
+ size = xfs_attr_leaf_newentsize(args, local);
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
if (*local) {
- if (size > (mp->m_sb.sb_blocksize >> 1)) {
+ if (size > (args->geo->blksize / 2)) {
/* Double split possible */
nblks *= 2;
}
@@ -212,7 +186,7 @@ xfs_attr_calc_size(
* Out of line attribute, cannot double split, but
* make room for the attribute value itself.
*/
- uint dblocks = XFS_B_TO_FSB(mp, valuelen);
+ uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
nblks += dblocks;
nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
}
@@ -220,26 +194,38 @@ xfs_attr_calc_size(
return nblks;
}
-STATIC int
-xfs_attr_set_int(
- struct xfs_inode *dp,
- struct xfs_name *name,
- unsigned char *value,
- int valuelen,
- int flags)
+int
+xfs_attr_set(
+ struct xfs_inode *dp,
+ const unsigned char *name,
+ unsigned char *value,
+ int valuelen,
+ int flags)
{
- xfs_da_args_t args;
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
- int error, err2, committed;
struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_args args;
+ struct xfs_bmap_free flist;
struct xfs_trans_res tres;
+ xfs_fsblock_t firstblock;
int rsvd = (flags & ATTR_ROOT) != 0;
- int local;
+ int error, err2, committed, local;
+
+ XFS_STATS_INC(xs_attr_set);
+
+ if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ return -EIO;
+
+ error = xfs_attr_args_init(&args, dp, name, flags);
+ if (error)
+ return error;
+
+ args.value = value;
+ args.valuelen = valuelen;
+ args.firstblock = &firstblock;
+ args.flist = &flist;
+ args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+ args.total = xfs_attr_calc_size(&args, &local);
- /*
- * Attach the dquots to the inode.
- */
error = xfs_qm_dqattach(dp, 0);
if (error)
return error;
@@ -250,32 +236,14 @@ xfs_attr_set_int(
*/
if (XFS_IFORK_Q(dp) == 0) {
int sf_size = sizeof(xfs_attr_sf_hdr_t) +
- XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
+ XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
- if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
- return(error);
+ error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
+ if (error)
+ return error;
}
/*
- * Fill in the arg structure for this request.
- */
- memset((char *)&args, 0, sizeof(args));
- args.name = name->name;
- args.namelen = name->len;
- args.value = value;
- args.valuelen = valuelen;
- args.flags = flags;
- args.hashval = xfs_da_hashname(args.name, args.namelen);
- args.dp = dp;
- args.firstblock = &firstblock;
- args.flist = &flist;
- args.whichfork = XFS_ATTR_FORK;
- args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
- /* Size is now blocks for attribute data */
- args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);
-
- /*
* Start our first transaction of the day.
*
* All future transactions during this code must be "chained" off
@@ -302,7 +270,7 @@ xfs_attr_set_int(
error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
if (error) {
xfs_trans_cancel(args.trans, 0);
- return(error);
+ return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -312,7 +280,7 @@ xfs_attr_set_int(
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
- return (error);
+ return error;
}
xfs_trans_ijoin(args.trans, dp, 0);
@@ -321,9 +289,9 @@ xfs_attr_set_int(
* If the attribute list is non-existent or a shortform list,
* upgrade it to a single-leaf-block attribute list.
*/
- if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
- ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
- (dp->i_d.di_anextents == 0))) {
+ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
+ (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ dp->i_d.di_anextents == 0)) {
/*
* Build initial attribute list (if required).
@@ -336,7 +304,7 @@ xfs_attr_set_int(
* the inode.
*/
error = xfs_attr_shortform_addname(&args);
- if (error != ENOSPC) {
+ if (error != -ENOSPC) {
/*
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
@@ -348,9 +316,8 @@ xfs_attr_set_int(
* the transaction goes to disk before returning
* to the user.
*/
- if (mp->m_flags & XFS_MOUNT_WSYNC) {
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
- }
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_trans_ichgtime(args.trans, dp,
@@ -360,7 +327,7 @@ xfs_attr_set_int(
XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error == 0 ? err2 : error);
+ return error ? error : err2;
}
/*
@@ -398,22 +365,19 @@ xfs_attr_set_int(
}
- if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+ if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
error = xfs_attr_leaf_addname(&args);
- } else {
+ else
error = xfs_attr_node_addname(&args);
- }
- if (error) {
+ if (error)
goto out;
- }
/*
* If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user.
*/
- if (mp->m_flags & XFS_MOUNT_WSYNC) {
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
- }
if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
@@ -425,65 +389,47 @@ xfs_attr_set_int(
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
out:
- if (args.trans)
+ if (args.trans) {
xfs_trans_cancel(args.trans,
XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+ }
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
}
+/*
+ * Generic handler routine to remove a name from an attribute list.
+ * Transitions attribute list from Btree to shortform as necessary.
+ */
int
-xfs_attr_set(
- xfs_inode_t *dp,
- const unsigned char *name,
- unsigned char *value,
- int valuelen,
- int flags)
+xfs_attr_remove(
+ struct xfs_inode *dp,
+ const unsigned char *name,
+ int flags)
{
- int error;
- struct xfs_name xname;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_args args;
+ struct xfs_bmap_free flist;
+ xfs_fsblock_t firstblock;
+ int error;
- XFS_STATS_INC(xs_attr_set);
+ XFS_STATS_INC(xs_attr_remove);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return (EIO);
+ return -EIO;
+
+ if (!xfs_inode_hasattr(dp))
+ return -ENOATTR;
- error = xfs_attr_name_to_xname(&xname, name);
+ error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
return error;
- return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
-}
-
-/*
- * Generic handler routine to remove a name from an attribute list.
- * Transitions attribute list from Btree to shortform as necessary.
- */
-STATIC int
-xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
-{
- xfs_da_args_t args;
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
- int error;
- xfs_mount_t *mp = dp->i_mount;
-
- /*
- * Fill in the arg structure for this request.
- */
- memset((char *)&args, 0, sizeof(args));
- args.name = name->name;
- args.namelen = name->len;
- args.flags = flags;
- args.hashval = xfs_da_hashname(args.name, args.namelen);
- args.dp = dp;
args.firstblock = &firstblock;
args.flist = &flist;
- args.total = 0;
- args.whichfork = XFS_ATTR_FORK;
/*
* we have no control over the attribute names that userspace passes us
@@ -492,9 +438,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
*/
args.op_flags = XFS_DA_OP_OKNOENT;
- /*
- * Attach the dquots to the inode.
- */
error = xfs_qm_dqattach(dp, 0);
if (error)
return error;
@@ -523,7 +466,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
XFS_ATTRRM_SPACE_RES(mp), 0);
if (error) {
xfs_trans_cancel(args.trans, 0);
- return(error);
+ return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -533,35 +476,26 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
*/
xfs_trans_ijoin(args.trans, dp, 0);
- /*
- * Decide on what work routines to call based on the inode size.
- */
if (!xfs_inode_hasattr(dp)) {
- error = XFS_ERROR(ENOATTR);
- goto out;
- }
- if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ error = -ENOATTR;
+ } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
error = xfs_attr_shortform_remove(&args);
- if (error) {
- goto out;
- }
} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_removename(&args);
} else {
error = xfs_attr_node_removename(&args);
}
- if (error) {
+
+ if (error)
goto out;
- }
/*
* If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user.
*/
- if (mp->m_flags & XFS_MOUNT_WSYNC) {
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
- }
if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
@@ -573,45 +507,17 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
out:
- if (args.trans)
+ if (args.trans) {
xfs_trans_cancel(args.trans,
XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
-}
-
-int
-xfs_attr_remove(
- xfs_inode_t *dp,
- const unsigned char *name,
- int flags)
-{
- int error;
- struct xfs_name xname;
-
- XFS_STATS_INC(xs_attr_remove);
-
- if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return (EIO);
-
- error = xfs_attr_name_to_xname(&xname, name);
- if (error)
- return error;
-
- xfs_ilock(dp, XFS_ILOCK_SHARED);
- if (!xfs_inode_hasattr(dp)) {
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return XFS_ERROR(ENOATTR);
}
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
- return xfs_attr_remove_int(dp, &xname, flags);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ return error;
}
-
/*========================================================================
* External routines when attribute list is inside the inode
*========================================================================*/
@@ -628,28 +534,28 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
trace_xfs_attr_sf_addname(args);
retval = xfs_attr_shortform_lookup(args);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
- return(retval);
- } else if (retval == EEXIST) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
+ return retval;
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
- return(retval);
+ return retval;
retval = xfs_attr_shortform_remove(args);
ASSERT(retval == 0);
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(XFS_ERROR(ENOSPC));
+ return -ENOSPC;
newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
if (!forkoff)
- return(XFS_ERROR(ENOSPC));
+ return -ENOSPC;
xfs_attr_shortform_add(args, forkoff);
- return(0);
+ return 0;
}
@@ -686,10 +592,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* the given flags produce an error or call for an atomic rename.
*/
retval = xfs_attr3_leaf_lookup_int(bp, args);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
xfs_trans_brelse(args->trans, bp);
return retval;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE) { /* pure create op */
xfs_trans_brelse(args->trans, bp);
return retval;
@@ -697,11 +603,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
trace_xfs_attr_leaf_replace(args);
+ /* save the attribute state for later removal*/
args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
+ args->rmtvaluelen2 = args->rmtvaluelen;
+
+ /*
+ * clear the remote attr state now that it is saved so that the
+ * values reflect the state of the attribute we are about to
+ * add, not the attribute we just found and will remove later.
+ */
+ args->rmtblkno = 0;
+ args->rmtblkcnt = 0;
+ args->rmtvaluelen = 0;
}
/*
@@ -709,7 +626,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* if required.
*/
retval = xfs_attr3_leaf_add(bp, args);
- if (retval == ENOSPC) {
+ if (retval == -ENOSPC) {
/*
* Promote the attribute list to the Btree format, then
* Commit that transaction so that the node_addname() call
@@ -725,7 +642,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -741,13 +658,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
/*
* Fob the whole rest of the problem off on the Btree code.
*/
error = xfs_attr_node_addname(args);
- return(error);
+ return error;
}
/*
@@ -756,7 +673,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
/*
* If there was an out-of-line value, allocate the blocks we
@@ -767,7 +684,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -783,7 +700,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_attr3_leaf_flipflags(args);
if (error)
- return(error);
+ return error;
/*
* Dismantle the "old" attribute/value pair by removing
@@ -793,10 +710,11 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
args->blkno = args->blkno2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
+ args->rmtvaluelen = args->rmtvaluelen2;
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -826,7 +744,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -877,7 +795,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
- if (error == ENOATTR) {
+ if (error == -ENOATTR) {
xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -932,7 +850,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
- if (error != EEXIST) {
+ if (error != -EEXIST) {
xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -945,7 +863,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
}
/*========================================================================
- * External routines when attribute list size > XFS_LBSIZE(mp).
+ * External routines when attribute list size > geo->blksize
*========================================================================*/
/*
@@ -978,8 +896,6 @@ restart:
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;
- state->blocksize = state->mp->m_sb.sb_blocksize;
- state->node_ents = state->mp->m_attr_node_ents;
/*
* Search to see if name already exists, and get back a pointer
@@ -990,25 +906,34 @@ restart:
goto out;
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
goto out;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
trace_xfs_attr_node_replace(args);
+ /* save the attribute state for later removal*/
args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
+ args->rmtvaluelen2 = args->rmtvaluelen;
+
+ /*
+ * clear the remote attr state now that it is saved so that the
+ * values reflect the state of the attribute we are about to
+ * add, not the attribute we just found and will remove later.
+ */
args->rmtblkno = 0;
args->rmtblkcnt = 0;
+ args->rmtvaluelen = 0;
}
retval = xfs_attr3_leaf_add(blk->bp, state->args);
- if (retval == ENOSPC) {
+ if (retval == -ENOSPC) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
@@ -1106,7 +1031,7 @@ restart:
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -1132,10 +1057,11 @@ restart:
args->blkno = args->blkno2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
+ args->rmtvaluelen = args->rmtvaluelen2;
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -1147,8 +1073,6 @@ restart:
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;
- state->blocksize = state->mp->m_sb.sb_blocksize;
- state->node_ents = state->mp->m_attr_node_ents;
state->inleaf = 0;
error = xfs_da3_node_lookup_int(state, &retval);
if (error)
@@ -1210,8 +1134,8 @@ out:
if (state)
xfs_da_state_free(state);
if (error)
- return(error);
- return(retval);
+ return error;
+ return retval;
}
/*
@@ -1239,14 +1163,12 @@ xfs_attr_node_removename(xfs_da_args_t *args)
state = xfs_da_state_alloc();
state->args = args;
state->mp = dp->i_mount;
- state->blocksize = state->mp->m_sb.sb_blocksize;
- state->node_ents = state->mp->m_attr_node_ents;
/*
* Search to see if name exists, and get back a pointer to it.
*/
error = xfs_da3_node_lookup_int(state, &retval);
- if (error || (retval != EEXIST)) {
+ if (error || (retval != -EEXIST)) {
if (error == 0)
error = retval;
goto out;
@@ -1375,7 +1297,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
out:
xfs_da_state_free(state);
- return(error);
+ return error;
}
/*
@@ -1423,7 +1345,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
}
}
- return(0);
+ return 0;
}
/*
@@ -1454,7 +1376,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
} else {
blk->bp = NULL;
}
@@ -1473,13 +1395,13 @@ xfs_attr_refillstate(xfs_da_state_t *state)
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
} else {
blk->bp = NULL;
}
}
- return(0);
+ return 0;
}
/*
@@ -1502,8 +1424,6 @@ xfs_attr_node_get(xfs_da_args_t *args)
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
- state->blocksize = state->mp->m_sb.sb_blocksize;
- state->node_ents = state->mp->m_attr_node_ents;
/*
* Search to see if name exists, and get back a pointer to it.
@@ -1511,7 +1431,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
error = xfs_da3_node_lookup_int(state, &retval);
if (error) {
retval = error;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->bp != NULL);
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1535,5 +1455,5 @@ xfs_attr_node_get(xfs_da_args_t *args)
}
xfs_da_state_free(state);
- return(retval);
+ return retval;
}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 7b126f46a2f9..b1f73dbbf3d8 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -80,11 +80,12 @@ STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
/*
* Utility routines.
*/
-STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf,
+STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
+ struct xfs_attr_leafblock *src_leaf,
struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
struct xfs_attr_leafblock *dst_leaf,
struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
- int move_count, struct xfs_mount *mp);
+ int move_count);
STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
void
@@ -213,8 +214,8 @@ xfs_attr3_leaf_write_verify(
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_attr3_leaf_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -224,7 +225,7 @@ xfs_attr3_leaf_write_verify(
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
}
/*
@@ -239,13 +240,14 @@ xfs_attr3_leaf_read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_ATTR3_LEAF_CRC_OFF)) ||
- !xfs_attr3_leaf_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_attr3_leaf_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
@@ -545,7 +547,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
break;
}
if (i == end)
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
/*
* Fix up the attribute fork data, covering the hole
@@ -580,7 +582,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
xfs_sbversion_add_attr2(mp, args->trans);
- return(0);
+ return 0;
}
/*
@@ -609,9 +611,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
continue;
if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
}
/*
@@ -638,18 +640,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
continue;
if (args->flags & ATTR_KERNOVAL) {
args->valuelen = sfe->valuelen;
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
if (args->valuelen < sfe->valuelen) {
args->valuelen = sfe->valuelen;
- return(XFS_ERROR(ERANGE));
+ return -ERANGE;
}
args->valuelen = sfe->valuelen;
memcpy(args->value, &sfe->nameval[args->namelen],
args->valuelen);
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
}
/*
@@ -689,7 +691,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
* If we hit an IO error middle of the transaction inside
* grow_inode(), we may have inconsistent data. Bail out.
*/
- if (error == EIO)
+ if (error == -EIO)
goto out;
xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
@@ -710,6 +712,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
memset((char *)&nargs, 0, sizeof(nargs));
nargs.dp = dp;
+ nargs.geo = args->geo;
nargs.firstblock = args->firstblock;
nargs.flist = args->flist;
nargs.total = args->total;
@@ -727,9 +730,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
sfe->namelen);
nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
- ASSERT(error == ENOATTR);
+ ASSERT(error == -ENOATTR);
error = xfs_attr3_leaf_add(bp, &nargs);
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
if (error)
goto out;
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
@@ -738,7 +741,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
out:
kmem_free(tmpbuffer);
- return(error);
+ return error;
}
/*
@@ -766,12 +769,12 @@ xfs_attr_shortform_allfit(
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* don't copy partial entries */
if (!(entry->flags & XFS_ATTR_LOCAL))
- return(0);
+ return 0;
name_loc = xfs_attr3_leaf_name_local(leaf, i);
if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(0);
+ return 0;
if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(0);
+ return 0;
bytes += sizeof(struct xfs_attr_sf_entry) - 1
+ name_loc->namelen
+ be16_to_cpu(name_loc->valuelen);
@@ -804,18 +807,18 @@ xfs_attr3_leaf_to_shortform(
trace_xfs_attr_leaf_to_sf(args);
- tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
+ tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
if (!tmpbuffer)
- return ENOMEM;
+ return -ENOMEM;
- memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
+ memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
entry = xfs_attr3_leaf_entryp(leaf);
/* XXX (dgc): buffer is about to be marked stale - why zero it? */
- memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
+ memset(bp->b_addr, 0, args->geo->blksize);
/*
* Clean out the prior contents of the attribute list.
@@ -837,6 +840,7 @@ xfs_attr3_leaf_to_shortform(
* Copy the attributes
*/
memset((char *)&nargs, 0, sizeof(nargs));
+ nargs.geo = args->geo;
nargs.dp = dp;
nargs.firstblock = args->firstblock;
nargs.flist = args->flist;
@@ -903,12 +907,12 @@ xfs_attr3_leaf_to_node(
/* copy leaf to new buffer, update identifiers */
xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
bp2->b_ops = bp1->b_ops;
- memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp));
+ memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
hdr3->blkno = cpu_to_be64(bp2->b_bn);
}
- xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1);
+ xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
/*
* Set up the new root node.
@@ -929,7 +933,7 @@ xfs_attr3_leaf_to_node(
btree[0].before = cpu_to_be32(blkno);
icnodehdr.count = 1;
dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
- xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1);
+ xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);
error = 0;
out:
return error;
@@ -965,10 +969,10 @@ xfs_attr3_leaf_create(
bp->b_ops = &xfs_attr3_leaf_buf_ops;
xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
leaf = bp->b_addr;
- memset(leaf, 0, XFS_LBSIZE(mp));
+ memset(leaf, 0, args->geo->blksize);
memset(&ichdr, 0, sizeof(ichdr));
- ichdr.firstused = XFS_LBSIZE(mp);
+ ichdr.firstused = args->geo->blksize;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
@@ -987,7 +991,7 @@ xfs_attr3_leaf_create(
ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
- xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1);
+ xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
*bpp = bp;
return 0;
@@ -1013,10 +1017,10 @@ xfs_attr3_leaf_split(
ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
error = xfs_da_grow_inode(state->args, &blkno);
if (error)
- return(error);
+ return error;
error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
if (error)
- return(error);
+ return error;
newblk->blkno = blkno;
newblk->magic = XFS_ATTR_LEAF_MAGIC;
@@ -1027,7 +1031,7 @@ xfs_attr3_leaf_split(
xfs_attr3_leaf_rebalance(state, oldblk, newblk);
error = xfs_da3_blk_link(state, oldblk, newblk);
if (error)
- return(error);
+ return error;
/*
* Save info on "old" attribute for "atomic rename" ops, leaf_add()
@@ -1049,7 +1053,7 @@ xfs_attr3_leaf_split(
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
- return(error);
+ return error;
}
/*
@@ -1073,8 +1077,7 @@ xfs_attr3_leaf_add(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
ASSERT(args->index >= 0 && args->index <= ichdr.count);
- entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
- args->trans->t_mountp->m_sb.sb_blocksize, NULL);
+ entsize = xfs_attr_leaf_newentsize(args, NULL);
/*
* Search through freemap for first-fit on new name length.
@@ -1105,7 +1108,7 @@ xfs_attr3_leaf_add(
* no good and we should just give up.
*/
if (!ichdr.holes && sum < entsize)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Compact the entries to coalesce free space.
@@ -1118,7 +1121,7 @@ xfs_attr3_leaf_add(
* free region, in freemap[0]. If it is not big enough, give up.
*/
if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
- tmp = ENOSPC;
+ tmp = -ENOSPC;
goto out_log_hdr;
}
@@ -1173,17 +1176,14 @@ xfs_attr3_leaf_add_work(
* Allocate space for the new string (at the end of the run).
*/
mp = args->trans->t_mountp;
- ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp));
+ ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);
ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
ASSERT(ichdr->freemap[mapindex].size >=
- xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
- mp->m_sb.sb_blocksize, NULL));
- ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp));
+ xfs_attr_leaf_newentsize(args, NULL));
+ ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);
ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
- ichdr->freemap[mapindex].size -=
- xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
- mp->m_sb.sb_blocksize, &tmp);
+ ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp);
entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
ichdr->freemap[mapindex].size);
@@ -1228,6 +1228,7 @@ xfs_attr3_leaf_add_work(
name_rmt->valueblk = 0;
args->rmtblkno = 1;
args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+ args->rmtvaluelen = args->valuelen;
}
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1267,14 +1268,13 @@ xfs_attr3_leaf_compact(
struct xfs_attr_leafblock *leaf_dst;
struct xfs_attr3_icleaf_hdr ichdr_src;
struct xfs_trans *trans = args->trans;
- struct xfs_mount *mp = trans->t_mountp;
char *tmpbuffer;
trace_xfs_attr_leaf_compact(args);
- tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
- memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
- memset(bp->b_addr, 0, XFS_LBSIZE(mp));
+ tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+ memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
+ memset(bp->b_addr, 0, args->geo->blksize);
leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
leaf_dst = bp->b_addr;
@@ -1287,7 +1287,7 @@ xfs_attr3_leaf_compact(
/* Initialise the incore headers */
ichdr_src = *ichdr_dst; /* struct copy */
- ichdr_dst->firstused = XFS_LBSIZE(mp);
+ ichdr_dst->firstused = args->geo->blksize;
ichdr_dst->usedbytes = 0;
ichdr_dst->count = 0;
ichdr_dst->holes = 0;
@@ -1302,13 +1302,13 @@ xfs_attr3_leaf_compact(
* Copy all entry's in the same (sorted) order,
* but allocate name/value pairs packed and in sequence.
*/
- xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
- ichdr_src.count, mp);
+ xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0,
+ leaf_dst, ichdr_dst, 0, ichdr_src.count);
/*
* this logs the entire buffer, but the caller must write the header
* back to the buffer when it is finished modifying it.
*/
- xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+ xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
kmem_free(tmpbuffer);
}
@@ -1459,8 +1459,8 @@ xfs_attr3_leaf_rebalance(
/*
* Move high entries from leaf1 to low end of leaf2.
*/
- xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count,
- leaf2, &ichdr2, 0, count, state->mp);
+ xfs_attr3_leaf_moveents(args, leaf1, &ichdr1,
+ ichdr1.count - count, leaf2, &ichdr2, 0, count);
} else if (count > ichdr1.count) {
/*
@@ -1488,14 +1488,14 @@ xfs_attr3_leaf_rebalance(
/*
* Move low entries from leaf2 to high end of leaf1.
*/
- xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1,
- ichdr1.count, count, state->mp);
+ xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1,
+ ichdr1.count, count);
}
xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
- xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
- xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+ xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
+ xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
/*
* Copy out last hashval in each block for B-tree code.
@@ -1590,11 +1590,9 @@ xfs_attr3_leaf_figure_balance(
max = ichdr1->count + ichdr2->count;
half = (max + 1) * sizeof(*entry);
half += ichdr1->usedbytes + ichdr2->usedbytes +
- xfs_attr_leaf_newentsize(state->args->namelen,
- state->args->valuelen,
- state->blocksize, NULL);
+ xfs_attr_leaf_newentsize(state->args, NULL);
half /= 2;
- lastdelta = state->blocksize;
+ lastdelta = state->args->geo->blksize;
entry = xfs_attr3_leaf_entryp(leaf1);
for (count = index = 0; count < max; entry++, index++, count++) {
@@ -1604,10 +1602,7 @@ xfs_attr3_leaf_figure_balance(
*/
if (count == blk1->index) {
tmp = totallen + sizeof(*entry) +
- xfs_attr_leaf_newentsize(
- state->args->namelen,
- state->args->valuelen,
- state->blocksize, NULL);
+ xfs_attr_leaf_newentsize(state->args, NULL);
if (XFS_ATTR_ABS(half - tmp) > lastdelta)
break;
lastdelta = XFS_ATTR_ABS(half - tmp);
@@ -1643,10 +1638,7 @@ xfs_attr3_leaf_figure_balance(
totallen -= count * sizeof(*entry);
if (foundit) {
totallen -= sizeof(*entry) +
- xfs_attr_leaf_newentsize(
- state->args->namelen,
- state->args->valuelen,
- state->blocksize, NULL);
+ xfs_attr_leaf_newentsize(state->args, NULL);
}
*countarg = count;
@@ -1698,9 +1690,9 @@ xfs_attr3_leaf_toosmall(
bytes = xfs_attr3_leaf_hdr_size(leaf) +
ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
ichdr.usedbytes;
- if (bytes > (state->blocksize >> 1)) {
+ if (bytes > (state->args->geo->blksize >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
- return(0);
+ return 0;
}
/*
@@ -1719,7 +1711,7 @@ xfs_attr3_leaf_toosmall(
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
@@ -1748,11 +1740,12 @@ xfs_attr3_leaf_toosmall(
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
blkno, -1, &bp);
if (error)
- return(error);
+ return error;
xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
- bytes = state->blocksize - (state->blocksize >> 2) -
+ bytes = state->args->geo->blksize -
+ (state->args->geo->blksize >> 2) -
ichdr.usedbytes - ichdr2.usedbytes -
((ichdr.count + ichdr2.count) *
sizeof(xfs_attr_leaf_entry_t)) -
@@ -1764,7 +1757,7 @@ xfs_attr3_leaf_toosmall(
}
if (i >= 2) {
*action = 0;
- return(0);
+ return 0;
}
/*
@@ -1780,13 +1773,13 @@ xfs_attr3_leaf_toosmall(
0, &retval);
}
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
*action = 1;
}
- return(0);
+ return 0;
}
/*
@@ -1803,7 +1796,6 @@ xfs_attr3_leaf_remove(
struct xfs_attr_leafblock *leaf;
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entry;
- struct xfs_mount *mp = args->trans->t_mountp;
int before;
int after;
int smallest;
@@ -1817,7 +1809,7 @@ xfs_attr3_leaf_remove(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
- ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8);
+ ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
ASSERT(args->index >= 0 && args->index < ichdr.count);
ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
xfs_attr3_leaf_hdr_size(leaf));
@@ -1825,7 +1817,7 @@ xfs_attr3_leaf_remove(
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
- ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+ ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
/*
* Scan through free region table:
@@ -1840,8 +1832,8 @@ xfs_attr3_leaf_remove(
smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
entsize = xfs_attr_leaf_entsize(leaf, args->index);
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
- ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp));
- ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp));
+ ASSERT(ichdr.freemap[i].base < args->geo->blksize);
+ ASSERT(ichdr.freemap[i].size < args->geo->blksize);
if (ichdr.freemap[i].base == tablesize) {
ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
@@ -1918,11 +1910,11 @@ xfs_attr3_leaf_remove(
* removing the name.
*/
if (smallest) {
- tmp = XFS_LBSIZE(mp);
+ tmp = args->geo->blksize;
entry = xfs_attr3_leaf_entryp(leaf);
for (i = ichdr.count - 1; i >= 0; entry++, i--) {
ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
- ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
+ ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
if (be16_to_cpu(entry->nameidx) < tmp)
tmp = be16_to_cpu(entry->nameidx);
@@ -1945,7 +1937,7 @@ xfs_attr3_leaf_remove(
tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
ichdr.count * sizeof(xfs_attr_leaf_entry_t);
- return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */
+ return tmp < args->geo->magicpct; /* leaf is < 37% full */
}
/*
@@ -1962,7 +1954,6 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr3_icleaf_hdr drophdr;
struct xfs_attr3_icleaf_hdr savehdr;
struct xfs_attr_leaf_entry *entry;
- struct xfs_mount *mp = state->mp;
trace_xfs_attr_leaf_unbalance(state->args);
@@ -1989,13 +1980,15 @@ xfs_attr3_leaf_unbalance(
*/
if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
drop_blk->bp, &drophdr)) {
- xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+ xfs_attr3_leaf_moveents(state->args,
+ drop_leaf, &drophdr, 0,
save_leaf, &savehdr, 0,
- drophdr.count, mp);
+ drophdr.count);
} else {
- xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+ xfs_attr3_leaf_moveents(state->args,
+ drop_leaf, &drophdr, 0,
save_leaf, &savehdr,
- savehdr.count, drophdr.count, mp);
+ savehdr.count, drophdr.count);
}
} else {
/*
@@ -2005,7 +1998,7 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
+ tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);
/*
* Copy the header into the temp leaf so that all the stuff
@@ -2018,35 +2011,39 @@ xfs_attr3_leaf_unbalance(
tmphdr.magic = savehdr.magic;
tmphdr.forw = savehdr.forw;
tmphdr.back = savehdr.back;
- tmphdr.firstused = state->blocksize;
+ tmphdr.firstused = state->args->geo->blksize;
/* write the header to the temp buffer to initialise it */
xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
drop_blk->bp, &drophdr)) {
- xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+ xfs_attr3_leaf_moveents(state->args,
+ drop_leaf, &drophdr, 0,
tmp_leaf, &tmphdr, 0,
- drophdr.count, mp);
- xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
+ drophdr.count);
+ xfs_attr3_leaf_moveents(state->args,
+ save_leaf, &savehdr, 0,
tmp_leaf, &tmphdr, tmphdr.count,
- savehdr.count, mp);
+ savehdr.count);
} else {
- xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
+ xfs_attr3_leaf_moveents(state->args,
+ save_leaf, &savehdr, 0,
tmp_leaf, &tmphdr, 0,
- savehdr.count, mp);
- xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
+ savehdr.count);
+ xfs_attr3_leaf_moveents(state->args,
+ drop_leaf, &drophdr, 0,
tmp_leaf, &tmphdr, tmphdr.count,
- drophdr.count, mp);
+ drophdr.count);
}
- memcpy(save_leaf, tmp_leaf, state->blocksize);
+ memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
savehdr = tmphdr; /* struct copy */
kmem_free(tmp_leaf);
}
xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
- state->blocksize - 1);
+ state->args->geo->blksize - 1);
/*
* Copy out last hashval in each block for B-tree code.
@@ -2092,7 +2089,7 @@ xfs_attr3_leaf_lookup_int(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
- ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
+ ASSERT(ichdr.count < args->geo->blksize / 8);
/*
* Binary search. (note: small blocks will skip this loop)
@@ -2126,7 +2123,7 @@ xfs_attr3_leaf_lookup_int(
}
if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
args->index = probe;
- return XFS_ERROR(ENOATTR);
+ return -ENOATTR;
}
/*
@@ -2155,7 +2152,7 @@ xfs_attr3_leaf_lookup_int(
if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
} else {
name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
if (name_rmt->namelen != args->namelen)
@@ -2166,16 +2163,16 @@ xfs_attr3_leaf_lookup_int(
if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
- args->valuelen = be32_to_cpu(name_rmt->valuelen);
+ args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
- args->valuelen);
- return XFS_ERROR(EEXIST);
+ args->rmtvaluelen);
+ return -EEXIST;
}
}
args->index = probe;
- return XFS_ERROR(ENOATTR);
+ return -ENOATTR;
}
/*
@@ -2196,7 +2193,7 @@ xfs_attr3_leaf_getvalue(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
- ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
+ ASSERT(ichdr.count < args->geo->blksize / 8);
ASSERT(args->index < ichdr.count);
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2211,7 +2208,7 @@ xfs_attr3_leaf_getvalue(
}
if (args->valuelen < valuelen) {
args->valuelen = valuelen;
- return XFS_ERROR(ERANGE);
+ return -ERANGE;
}
args->valuelen = valuelen;
memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
@@ -2219,19 +2216,19 @@ xfs_attr3_leaf_getvalue(
name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
ASSERT(name_rmt->namelen == args->namelen);
ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
- valuelen = be32_to_cpu(name_rmt->valuelen);
+ args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
- valuelen);
+ args->rmtvaluelen);
if (args->flags & ATTR_KERNOVAL) {
- args->valuelen = valuelen;
+ args->valuelen = args->rmtvaluelen;
return 0;
}
- if (args->valuelen < valuelen) {
- args->valuelen = valuelen;
- return XFS_ERROR(ERANGE);
+ if (args->valuelen < args->rmtvaluelen) {
+ args->valuelen = args->rmtvaluelen;
+ return -ERANGE;
}
- args->valuelen = valuelen;
+ args->valuelen = args->rmtvaluelen;
}
return 0;
}
@@ -2247,14 +2244,14 @@ xfs_attr3_leaf_getvalue(
/*ARGSUSED*/
STATIC void
xfs_attr3_leaf_moveents(
+ struct xfs_da_args *args,
struct xfs_attr_leafblock *leaf_s,
struct xfs_attr3_icleaf_hdr *ichdr_s,
int start_s,
struct xfs_attr_leafblock *leaf_d,
struct xfs_attr3_icleaf_hdr *ichdr_d,
int start_d,
- int count,
- struct xfs_mount *mp)
+ int count)
{
struct xfs_attr_leaf_entry *entry_s;
struct xfs_attr_leaf_entry *entry_d;
@@ -2274,10 +2271,10 @@ xfs_attr3_leaf_moveents(
ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
ASSERT(ichdr_s->magic == ichdr_d->magic);
- ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8);
+ ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
+ xfs_attr3_leaf_hdr_size(leaf_s));
- ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8);
+ ASSERT(ichdr_d->count < args->geo->blksize / 8);
ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
+ xfs_attr3_leaf_hdr_size(leaf_d));
@@ -2329,11 +2326,11 @@ xfs_attr3_leaf_moveents(
entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
entry_d->flags = entry_s->flags;
ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
- <= XFS_LBSIZE(mp));
+ <= args->geo->blksize);
memmove(xfs_attr3_leaf_name(leaf_d, desti),
xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
- <= XFS_LBSIZE(mp));
+ <= args->geo->blksize);
memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
ichdr_s->usedbytes -= tmp;
ichdr_d->usedbytes += tmp;
@@ -2354,7 +2351,7 @@ xfs_attr3_leaf_moveents(
tmp = count * sizeof(xfs_attr_leaf_entry_t);
entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
ASSERT(((char *)entry_s + tmp) <=
- ((char *)leaf_s + XFS_LBSIZE(mp)));
+ ((char *)leaf_s + args->geo->blksize));
memset(entry_s, 0, tmp);
} else {
/*
@@ -2369,7 +2366,7 @@ xfs_attr3_leaf_moveents(
tmp = count * sizeof(xfs_attr_leaf_entry_t);
entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
ASSERT(((char *)entry_s + tmp) <=
- ((char *)leaf_s + XFS_LBSIZE(mp)));
+ ((char *)leaf_s + args->geo->blksize));
memset(entry_s, 0, tmp);
}
@@ -2437,22 +2434,21 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
* a "local" or a "remote" attribute.
*/
int
-xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
+xfs_attr_leaf_newentsize(
+ struct xfs_da_args *args,
+ int *local)
{
- int size;
+ int size;
- size = xfs_attr_leaf_entsize_local(namelen, valuelen);
- if (size < xfs_attr_leaf_entsize_local_max(blocksize)) {
- if (local) {
+ size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen);
+ if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) {
+ if (local)
*local = 1;
- }
- } else {
- size = xfs_attr_leaf_entsize_remote(namelen);
- if (local) {
- *local = 0;
- }
+ return size;
}
- return size;
+ if (local)
+ *local = 0;
+ return xfs_attr_leaf_entsize_remote(args->namelen);
}
@@ -2485,7 +2481,7 @@ xfs_attr3_leaf_clearflag(
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
- return(error);
+ return error;
leaf = bp->b_addr;
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2518,7 +2514,7 @@ xfs_attr3_leaf_clearflag(
ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
- name_rmt->valuelen = cpu_to_be32(args->valuelen);
+ name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
}
@@ -2552,7 +2548,7 @@ xfs_attr3_leaf_setflag(
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
- return(error);
+ return error;
leaf = bp->b_addr;
#ifdef DEBUG
@@ -2676,7 +2672,7 @@ xfs_attr3_leaf_flipflags(
ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
- name_rmt->valuelen = cpu_to_be32(args->valuelen);
+ name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
xfs_trans_log_buf(args->trans, bp1,
XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 3ec5ec0b8678..e2929da7c3ba 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -96,8 +96,7 @@ int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
-int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
- int *local);
+int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp);
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 739e0a52deda..7510ab8058a4 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks(
*/
static bool
xfs_attr3_rmt_hdr_ok(
- struct xfs_mount *mp,
void *ptr,
xfs_ino_t ino,
uint32_t offset,
@@ -110,7 +109,7 @@ xfs_attr3_rmt_verify(
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
- be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
+ be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
return false;
if (rmt->rm_owner == 0)
return false;
@@ -125,8 +124,8 @@ xfs_attr3_rmt_read_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
char *ptr;
int len;
- bool corrupt = false;
xfs_daddr_t bno;
+ int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -135,27 +134,25 @@ xfs_attr3_rmt_read_verify(
ptr = bp->b_addr;
bno = bp->b_bn;
len = BBTOB(bp->b_length);
- ASSERT(len >= XFS_LBSIZE(mp));
+ ASSERT(len >= blksize);
while (len > 0) {
- if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
- XFS_ATTR3_RMT_CRC_OFF)) {
- corrupt = true;
+ if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
+ xfs_buf_ioerror(bp, -EFSBADCRC);
break;
}
- if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
- corrupt = true;
+ if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
break;
}
- len -= XFS_LBSIZE(mp);
- ptr += XFS_LBSIZE(mp);
- bno += mp->m_bsize;
+ len -= blksize;
+ ptr += blksize;
+ bno += BTOBB(blksize);
}
- if (corrupt) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- } else
+ if (bp->b_error)
+ xfs_verifier_error(bp);
+ else
ASSERT(len == 0);
}
@@ -168,6 +165,7 @@ xfs_attr3_rmt_write_verify(
char *ptr;
int len;
xfs_daddr_t bno;
+ int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -176,13 +174,12 @@ xfs_attr3_rmt_write_verify(
ptr = bp->b_addr;
bno = bp->b_bn;
len = BBTOB(bp->b_length);
- ASSERT(len >= XFS_LBSIZE(mp));
+ ASSERT(len >= blksize);
while (len > 0) {
- if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
- XFS_CORRUPTION_ERROR(__func__,
- XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
if (bip) {
@@ -191,11 +188,11 @@ xfs_attr3_rmt_write_verify(
rmt = (struct xfs_attr3_rmt_hdr *)ptr;
rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
}
- xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
+ xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
- len -= XFS_LBSIZE(mp);
- ptr += XFS_LBSIZE(mp);
- bno += mp->m_bsize;
+ len -= blksize;
+ ptr += blksize;
+ bno += BTOBB(blksize);
}
ASSERT(len == 0);
}
@@ -244,22 +241,23 @@ xfs_attr_rmtval_copyout(
char *src = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
int len = BBTOB(bp->b_length);
+ int blksize = mp->m_attr_geo->blksize;
- ASSERT(len >= XFS_LBSIZE(mp));
+ ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
int hdr_size = 0;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+ int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
- if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+ if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
bno, *offset, byte_cnt, ino);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
}
@@ -267,9 +265,9 @@ xfs_attr_rmtval_copyout(
memcpy(*dst, src + hdr_size, byte_cnt);
/* roll buffer forwards */
- len -= XFS_LBSIZE(mp);
- src += XFS_LBSIZE(mp);
- bno += mp->m_bsize;
+ len -= blksize;
+ src += blksize;
+ bno += BTOBB(blksize);
/* roll attribute data forwards */
*valuelen -= byte_cnt;
@@ -291,12 +289,13 @@ xfs_attr_rmtval_copyin(
char *dst = bp->b_addr;
xfs_daddr_t bno = bp->b_bn;
int len = BBTOB(bp->b_length);
+ int blksize = mp->m_attr_geo->blksize;
- ASSERT(len >= XFS_LBSIZE(mp));
+ ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
int hdr_size;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+ int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
@@ -308,17 +307,17 @@ xfs_attr_rmtval_copyin(
* If this is the last block, zero the remainder of it.
* Check that we are actually the last block, too.
*/
- if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
+ if (byte_cnt + hdr_size < blksize) {
ASSERT(*valuelen - byte_cnt == 0);
- ASSERT(len == XFS_LBSIZE(mp));
+ ASSERT(len == blksize);
memset(dst + hdr_size + byte_cnt, 0,
- XFS_LBSIZE(mp) - hdr_size - byte_cnt);
+ blksize - hdr_size - byte_cnt);
}
/* roll buffer forwards */
- len -= XFS_LBSIZE(mp);
- dst += XFS_LBSIZE(mp);
- bno += mp->m_bsize;
+ len -= blksize;
+ dst += blksize;
+ bno += BTOBB(blksize);
/* roll attribute data forwards */
*valuelen -= byte_cnt;
@@ -340,7 +339,7 @@ xfs_attr_rmtval_get(
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
__uint8_t *dst = args->value;
- int valuelen = args->valuelen;
+ int valuelen;
int nmap;
int error;
int blkcnt = args->rmtblkcnt;
@@ -350,7 +349,9 @@ xfs_attr_rmtval_get(
trace_xfs_attr_rmtval_get(args);
ASSERT(!(args->flags & ATTR_KERNOVAL));
+ ASSERT(args->rmtvaluelen == args->valuelen);
+ valuelen = args->rmtvaluelen;
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
@@ -418,7 +419,7 @@ xfs_attr_rmtval_set(
* attributes have headers, we can't just do a straight byte to FSB
* conversion and have to take the header space into account.
*/
- blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+ blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
XFS_ATTR_FORK);
if (error)
@@ -451,7 +452,7 @@ xfs_attr_rmtval_set(
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -472,7 +473,7 @@ xfs_attr_rmtval_set(
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
}
/*
@@ -483,7 +484,7 @@ xfs_attr_rmtval_set(
*/
lblkno = args->rmtblkno;
blkcnt = args->rmtblkcnt;
- valuelen = args->valuelen;
+ valuelen = args->rmtvaluelen;
while (valuelen > 0) {
struct xfs_buf *bp;
xfs_daddr_t dblkno;
@@ -497,7 +498,7 @@ xfs_attr_rmtval_set(
blkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
- return(error);
+ return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -507,7 +508,7 @@ xfs_attr_rmtval_set(
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
@@ -562,7 +563,7 @@ xfs_attr_rmtval_remove(
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
- return(error);
+ return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -621,7 +622,7 @@ xfs_attr_rmtval_remove(
*/
error = xfs_trans_roll(&args->trans, args->dp);
if (error)
- return (error);
+ return error;
}
- return(0);
+ return 0;
}
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 5a9acfa156d7..5a9acfa156d7 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3ba53..919756e3ba53 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index f1e3c907044d..e1649c0d3e02 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
@@ -66,8 +66,11 @@ static inline int xfs_lowbit64(__uint64_t v)
n = ffs(w);
} else { /* upper bits */
w = (__uint32_t)(v >> 32);
- if (w && (n = ffs(w)))
- n += 32;
+ if (w) {
+ n = ffs(w);
+ if (n)
+ n += 32;
+ }
}
return n - 1;
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 3ef11b22e750..86df952d3e24 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -94,7 +94,7 @@ xfs_bmap_compute_maxlevels(
maxleafents = MAXAEXTNUM;
sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
}
- maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
+ maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
minleafrecs = mp->m_bmap_dmnr[0];
minnoderecs = mp->m_bmap_dmnr[1];
maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -233,7 +233,6 @@ xfs_default_attroffset(
*/
STATIC void
xfs_bmap_forkoff_reset(
- xfs_mount_t *mp,
xfs_inode_t *ip,
int whichfork)
{
@@ -393,7 +392,7 @@ xfs_bmap_check_leaf_extents(
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
@@ -905,7 +904,7 @@ xfs_bmap_local_to_extents_empty(
ASSERT(ifp->if_bytes == 0);
ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
- xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
+ xfs_bmap_forkoff_reset(ip, whichfork);
ifp->if_flags &= ~XFS_IFINLINE;
ifp->if_flags |= XFS_IFEXTENTS;
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
@@ -1034,7 +1033,7 @@ xfs_bmap_add_attrfork_btree(
goto error0;
if (stat == 0) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
*firstblock = cur->bc_private.b.firstblock;
cur->bc_private.b.allocated = 0;
@@ -1099,10 +1098,11 @@ xfs_bmap_add_attrfork_local(
if (S_ISDIR(ip->i_d.di_mode)) {
memset(&dargs, 0, sizeof(dargs));
+ dargs.geo = ip->i_mount->m_dir_geo;
dargs.dp = ip;
dargs.firstblock = firstblock;
dargs.flist = flist;
- dargs.total = ip->i_mount->m_dirblkfsbs;
+ dargs.total = dargs.geo->fsbcount;
dargs.whichfork = XFS_DATA_FORK;
dargs.trans = tp;
return xfs_dir2_sf_to_block(&dargs);
@@ -1115,7 +1115,7 @@ xfs_bmap_add_attrfork_local(
/* should only be called for types that support local format data */
ASSERT(0);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
@@ -1192,7 +1192,7 @@ xfs_bmap_add_attrfork(
break;
default:
ASSERT(0);
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto trans_cancel;
}
@@ -1299,7 +1299,7 @@ xfs_bmap_read_extents(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
/*
@@ -1399,7 +1399,7 @@ xfs_bmap_read_extents(
return 0;
error0:
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
@@ -1429,11 +1429,7 @@ xfs_bmap_search_multi_extents(
gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
gotp->br_state = XFS_EXT_INVALID;
-#if XFS_BIG_BLKNOS
gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
- gotp->br_startblock = 0xffffa5a5;
-#endif
prevp->br_startoff = NULLFILEOFF;
ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
@@ -1576,7 +1572,7 @@ xfs_bmap_last_before(
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EIO);
+ return -EIO;
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
*last_block = 0;
return 0;
@@ -1635,7 +1631,7 @@ xfs_bmap_last_extent(
* blocks at the end of the file which do not start at the previous data block,
* we will try to align the new blocks at stripe unit boundaries.
*
- * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
* at, or past the EOF.
*/
STATIC int
@@ -1650,9 +1646,14 @@ xfs_bmap_isaeof(
bma->aeof = 0;
error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
&is_empty);
- if (error || is_empty)
+ if (error)
return error;
+ if (is_empty) {
+ bma->aeof = 1;
+ return 0;
+ }
+
/*
* Check if we are allocation or past the last extent, or at least into
* the last delayed allocated extent.
@@ -1670,7 +1671,6 @@ xfs_bmap_isaeof(
*/
int
xfs_bmap_last_offset(
- struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t *last_block,
int whichfork)
@@ -1686,7 +1686,7 @@ xfs_bmap_last_offset(
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
if (error || is_empty)
@@ -3319,7 +3319,7 @@ xfs_bmap_extsize_align(
if (orig_off < align_off ||
orig_end > align_off + align_alen ||
align_alen - temp < orig_alen)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Try to fix it by moving the start up.
*/
@@ -3344,7 +3344,7 @@ xfs_bmap_extsize_align(
* Result doesn't cover the request, fail it.
*/
if (orig_off < align_off || orig_end > align_off + align_alen)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
ASSERT(orig_end <= align_off + align_alen);
@@ -3512,6 +3512,67 @@ xfs_bmap_adjacent(
#undef ISVALID
}
+static int
+xfs_bmap_longest_free_extent(
+ struct xfs_trans *tp,
+ xfs_agnumber_t ag,
+ xfs_extlen_t *blen,
+ int *notinit)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ xfs_extlen_t longest;
+ int error = 0;
+
+ pag = xfs_perag_get(mp, ag);
+ if (!pag->pagf_init) {
+ error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
+ if (error)
+ goto out;
+
+ if (!pag->pagf_init) {
+ *notinit = 1;
+ goto out;
+ }
+ }
+
+ longest = xfs_alloc_longest_free_extent(mp, pag);
+ if (*blen < longest)
+ *blen = longest;
+
+out:
+ xfs_perag_put(pag);
+ return error;
+}
+
+static void
+xfs_bmap_select_minlen(
+ struct xfs_bmalloca *ap,
+ struct xfs_alloc_arg *args,
+ xfs_extlen_t *blen,
+ int notinit)
+{
+ if (notinit || *blen < ap->minlen) {
+ /*
+ * Since we did a BUF_TRYLOCK above, it is possible that
+ * there is space for this request.
+ */
+ args->minlen = ap->minlen;
+ } else if (*blen < args->maxlen) {
+ /*
+ * If the best seen length is less than the request length,
+ * use the best as the minimum.
+ */
+ args->minlen = *blen;
+ } else {
+ /*
+ * Otherwise we've seen an extent as big as maxlen, use that
+ * as the minimum.
+ */
+ args->minlen = args->maxlen;
+ }
+}
+
STATIC int
xfs_bmap_btalloc_nullfb(
struct xfs_bmalloca *ap,
@@ -3519,111 +3580,74 @@ xfs_bmap_btalloc_nullfb(
xfs_extlen_t *blen)
{
struct xfs_mount *mp = ap->ip->i_mount;
- struct xfs_perag *pag;
xfs_agnumber_t ag, startag;
int notinit = 0;
int error;
- if (ap->userdata && xfs_inode_is_filestream(ap->ip))
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- else
- args->type = XFS_ALLOCTYPE_START_BNO;
+ args->type = XFS_ALLOCTYPE_START_BNO;
args->total = ap->total;
- /*
- * Search for an allocation group with a single extent large enough
- * for the request. If one isn't found, then adjust the minimum
- * allocation size to the largest space found.
- */
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
- pag = xfs_perag_get(mp, ag);
while (*blen < args->maxlen) {
- if (!pag->pagf_init) {
- error = xfs_alloc_pagf_init(mp, args->tp, ag,
- XFS_ALLOC_FLAG_TRYLOCK);
- if (error) {
- xfs_perag_put(pag);
- return error;
- }
- }
-
- /*
- * See xfs_alloc_fix_freelist...
- */
- if (pag->pagf_init) {
- xfs_extlen_t longest;
- longest = xfs_alloc_longest_free_extent(mp, pag);
- if (*blen < longest)
- *blen = longest;
- } else
- notinit = 1;
-
- if (xfs_inode_is_filestream(ap->ip)) {
- if (*blen >= args->maxlen)
- break;
-
- if (ap->userdata) {
- /*
- * If startag is an invalid AG, we've
- * come here once before and
- * xfs_filestream_new_ag picked the
- * best currently available.
- *
- * Don't continue looping, since we
- * could loop forever.
- */
- if (startag == NULLAGNUMBER)
- break;
-
- error = xfs_filestream_new_ag(ap, &ag);
- xfs_perag_put(pag);
- if (error)
- return error;
+ error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+ &notinit);
+ if (error)
+ return error;
- /* loop again to set 'blen'*/
- startag = NULLAGNUMBER;
- pag = xfs_perag_get(mp, ag);
- continue;
- }
- }
if (++ag == mp->m_sb.sb_agcount)
ag = 0;
if (ag == startag)
break;
- xfs_perag_put(pag);
- pag = xfs_perag_get(mp, ag);
}
- xfs_perag_put(pag);
- /*
- * Since the above loop did a BUF_TRYLOCK, it is
- * possible that there is space for this request.
- */
- if (notinit || *blen < ap->minlen)
- args->minlen = ap->minlen;
- /*
- * If the best seen length is less than the request
- * length, use the best as the minimum.
- */
- else if (*blen < args->maxlen)
- args->minlen = *blen;
- /*
- * Otherwise we've seen an extent as big as maxlen,
- * use that as the minimum.
- */
- else
- args->minlen = args->maxlen;
+ xfs_bmap_select_minlen(ap, args, blen, notinit);
+ return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc_filestreams(
+ struct xfs_bmalloca *ap,
+ struct xfs_alloc_arg *args,
+ xfs_extlen_t *blen)
+{
+ struct xfs_mount *mp = ap->ip->i_mount;
+ xfs_agnumber_t ag;
+ int notinit = 0;
+ int error;
+
+ args->type = XFS_ALLOCTYPE_NEAR_BNO;
+ args->total = ap->total;
+
+ ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
+ if (ag == NULLAGNUMBER)
+ ag = 0;
+
+ error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
+ if (error)
+ return error;
+
+ if (*blen < args->maxlen) {
+ error = xfs_filestream_new_ag(ap, &ag);
+ if (error)
+ return error;
+
+ error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+ &notinit);
+ if (error)
+ return error;
+
+ }
+
+ xfs_bmap_select_minlen(ap, args, blen, notinit);
/*
- * set the failure fallback case to look in the selected
- * AG as the stream may have moved.
+ * Set the failure fallback case to look in the selected AG as stream
+ * may have moved.
*/
- if (xfs_inode_is_filestream(ap->ip))
- ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
-
+ ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
}
@@ -3643,10 +3667,19 @@ xfs_bmap_btalloc(
int isaligned;
int tryagain;
int error;
+ int stripe_align;
ASSERT(ap->length);
mp = ap->ip->i_mount;
+
+ /* stripe alignment for allocation is determined by mount parameters */
+ stripe_align = 0;
+ if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+ stripe_align = mp->m_swidth;
+ else if (mp->m_dalign)
+ stripe_align = mp->m_dalign;
+
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3688,8 @@ xfs_bmap_btalloc(
ASSERT(!error);
ASSERT(ap->length);
}
+
+
nullfb = *ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
if (nullfb) {
@@ -3692,7 +3727,15 @@ xfs_bmap_btalloc(
args.firstblock = *ap->firstblock;
blen = 0;
if (nullfb) {
- error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
+ /*
+ * Search for an allocation group with a single extent large
+ * enough for the request. If one isn't found, then adjust
+ * the minimum allocation size to the largest space found.
+ */
+ if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+ error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
+ else
+ error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
if (error)
return error;
} else if (ap->flist->xbf_low) {
@@ -3730,7 +3773,7 @@ xfs_bmap_btalloc(
*/
if (!ap->flist->xbf_low && ap->aeof) {
if (!ap->offset) {
- args.alignment = mp->m_dalign;
+ args.alignment = stripe_align;
atype = args.type;
isaligned = 1;
/*
@@ -3755,13 +3798,13 @@ xfs_bmap_btalloc(
* of minlen+alignment+slop doesn't go up
* between the calls.
*/
- if (blen > mp->m_dalign && blen <= args.maxlen)
- nextminlen = blen - mp->m_dalign;
+ if (blen > stripe_align && blen <= args.maxlen)
+ nextminlen = blen - stripe_align;
else
nextminlen = args.minlen;
- if (nextminlen + mp->m_dalign > args.minlen + 1)
+ if (nextminlen + stripe_align > args.minlen + 1)
args.minalignslop =
- nextminlen + mp->m_dalign -
+ nextminlen + stripe_align -
args.minlen - 1;
else
args.minalignslop = 0;
@@ -3783,7 +3826,7 @@ xfs_bmap_btalloc(
*/
args.type = atype;
args.fsbno = ap->blkno;
- args.alignment = mp->m_dalign;
+ args.alignment = stripe_align;
args.minlen = nextminlen;
args.minalignslop = 0;
isaligned = 1;
@@ -3997,17 +4040,18 @@ xfs_bmapi_read(
ASSERT(*nmap >= 1);
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
XFS_BMAPI_IGSTATE)));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_blk_mapr);
@@ -4191,17 +4235,18 @@ xfs_bmapi_delay(
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_blk_mapw);
@@ -4249,8 +4294,8 @@ xfs_bmapi_delay(
}
-int
-__xfs_bmapi_allocate(
+static int
+xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
@@ -4420,7 +4465,7 @@ xfs_bmapi_convert_unwritten(
* so generate another request.
*/
if (mval->br_blockcount < len)
- return EAGAIN;
+ return -EAGAIN;
return 0;
}
@@ -4484,17 +4529,18 @@ xfs_bmapi_write(
ASSERT(tp != NULL);
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -4528,9 +4574,6 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
- if (flags & XFS_BMAPI_STACK_SWITCH)
- bma.stack_switch = 1;
-
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
@@ -4573,7 +4616,7 @@ xfs_bmapi_write(
/* Execute unwritten extent conversion if necessary */
error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
- if (error == EAGAIN)
+ if (error == -EAGAIN)
continue;
if (error)
goto error0;
@@ -4875,7 +4918,7 @@ xfs_bmap_del_extent(
goto done;
cur->bc_rec.b = new;
error = xfs_btree_insert(cur, &i);
- if (error && error != ENOSPC)
+ if (error && error != -ENOSPC)
goto done;
/*
* If get no-space back from btree insert,
@@ -4883,7 +4926,7 @@ xfs_bmap_del_extent(
* block reservation.
* Fix up our state and return the error.
*/
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/*
* Reset the cursor, don't trust
* it after any insert operation.
@@ -4911,7 +4954,7 @@ xfs_bmap_del_extent(
xfs_bmbt_set_blockcount(ep,
got.br_blockcount);
flags = 0;
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto done;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
@@ -5029,12 +5072,13 @@ xfs_bunmapi(
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
mp = ip->i_mount;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(len > 0);
ASSERT(nexts >= 0);
@@ -5277,7 +5321,7 @@ xfs_bunmapi(
del.br_startoff > got.br_startoff &&
del.br_startoff + del.br_blockcount <
got.br_startoff + got.br_blockcount) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto error0;
}
error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
@@ -5358,3 +5402,203 @@ error0:
}
return error;
}
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int *done,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb,
+ xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock,
+ struct xfs_bmap_free *flist,
+ int num_exts)
+{
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_bmbt_rec_host *gotp;
+ struct xfs_bmbt_irec got;
+ struct xfs_bmbt_irec left;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_extnum_t nexts = 0;
+ xfs_fileoff_t startoff;
+ int error = 0;
+ int i;
+ int whichfork = XFS_DATA_FORK;
+ int logflags = 0;
+ xfs_filblks_t blockcount = 0;
+ int total_extents;
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+ XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ ASSERT(current_ext != NULL);
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ /* Read in all the extents */
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If *current_ext is 0, we would need to lookup the extent
+ * from where we would start shifting and store it in gotp.
+ */
+ if (!*current_ext) {
+ gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+ /*
+ * gotp can be null in 2 cases: 1) if there are no extents
+ * or 2) start_fsb lies in a hole beyond which there are
+ * no extents. Either way, we are done.
+ */
+ if (!gotp) {
+ *done = 1;
+ return 0;
+ }
+ }
+
+ if (ifp->if_flags & XFS_IFBROOT) {
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ cur->bc_private.b.firstblock = *firstblock;
+ cur->bc_private.b.flist = flist;
+ cur->bc_private.b.flags = 0;
+ }
+
+ /*
+ * There may be delalloc extents in the data fork before the range we
+ * are collapsing out, so we cannot
+ * use the count of real extents here. Instead we have to calculate it
+ * from the incore fork.
+ */
+ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ while (nexts++ < num_exts && *current_ext < total_extents) {
+
+ gotp = xfs_iext_get_ext(ifp, *current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+ startoff = got.br_startoff - offset_shift_fsb;
+
+ /*
+ * Before shifting extent into hole, make sure that the hole
+ * is large enough to accomodate the shift.
+ */
+ if (*current_ext) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+ *current_ext - 1), &left);
+
+ if (startoff < left.br_startoff + left.br_blockcount)
+ error = -EINVAL;
+ } else if (offset_shift_fsb > got.br_startoff) {
+ /*
+ * When first extent is shifted, offset_shift_fsb
+ * should be less than the stating offset of
+ * the first extent.
+ */
+ error = -EINVAL;
+ }
+
+ if (error)
+ goto del_cursor;
+
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ /* Check if we can merge 2 adjacent extents */
+ if (*current_ext &&
+ left.br_startoff + left.br_blockcount == startoff &&
+ left.br_startblock + left.br_blockcount ==
+ got.br_startblock &&
+ left.br_state == got.br_state &&
+ left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+ blockcount = left.br_blockcount +
+ got.br_blockcount;
+ xfs_iext_remove(ip, *current_ext, 1, 0);
+ logflags |= XFS_ILOG_CORE;
+ if (cur) {
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ } else {
+ logflags |= XFS_ILOG_DEXT;
+ }
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+
+ /* Make cursor point to the extent we will update */
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ xfs_bmbt_set_blockcount(gotp, blockcount);
+ got.br_blockcount = blockcount;
+ } else {
+ /* We have to update the startoff */
+ xfs_bmbt_set_startoff(gotp, startoff);
+ got.br_startoff = startoff;
+ }
+
+ logflags |= XFS_ILOG_CORE;
+ if (cur) {
+ error = xfs_bmbt_update(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ got.br_state);
+ if (error)
+ goto del_cursor;
+ } else {
+ logflags |= XFS_ILOG_DEXT;
+ }
+
+ (*current_ext)++;
+ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ }
+
+ /* Check if we are done */
+ if (*current_ext == total_extents)
+ *done = 1;
+
+del_cursor:
+ if (cur)
+ xfs_btree_del_cursor(cur,
+ error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ if (logflags)
+ xfs_trans_log_inode(tp, ip, logflags);
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 33b41f351225..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
-#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }, \
- { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
@@ -127,6 +125,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
+
#ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
@@ -146,8 +154,8 @@ int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *last_block, int whichfork);
-int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
- xfs_fileoff_t *unused, int whichfork);
+int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
+ int whichfork);
int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork);
@@ -169,5 +177,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
+int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ int *done, xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+ int num_exts);
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 706bc3f777cb..fba753308f31 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -84,7 +84,7 @@ xfs_bmdr_to_bmbt(
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
- dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
@@ -111,23 +111,8 @@ __xfs_bmbt_get_all(
ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
s->br_startoff = ((xfs_fileoff_t)l0 &
xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
-#if XFS_BIG_BLKNOS
s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)l1) >> 21);
-#else
-#ifdef DEBUG
- {
- xfs_dfsbno_t b;
-
- b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
- (((xfs_dfsbno_t)l1) >> 21);
- ASSERT((b >> 32) == 0 || isnulldstartblock(b));
- s->br_startblock = (xfs_fsblock_t)b;
- }
-#else /* !DEBUG */
- s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
/* This is xfs_extent_state() in-line */
if (ext_flag) {
@@ -163,21 +148,8 @@ xfs_fsblock_t
xfs_bmbt_get_startblock(
xfs_bmbt_rec_host_t *r)
{
-#if XFS_BIG_BLKNOS
return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)r->l1) >> 21);
-#else
-#ifdef DEBUG
- xfs_dfsbno_t b;
-
- b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
- (((xfs_dfsbno_t)r->l1) >> 21);
- ASSERT((b >> 32) == 0 || isnulldstartblock(b));
- return (xfs_fsblock_t)b;
-#else /* !DEBUG */
- return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -241,7 +213,6 @@ xfs_bmbt_set_allf(
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
@@ -250,23 +221,6 @@ xfs_bmbt_set_allf(
r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(startblock)) {
- r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9) |
- (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- } else {
- r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9);
- r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -298,8 +252,6 @@ xfs_bmbt_disk_set_allf(
ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-
-#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = cpu_to_be64(
@@ -310,26 +262,6 @@ xfs_bmbt_disk_set_allf(
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(startblock)) {
- r->l0 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9) |
- (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
- r->l1 = cpu_to_be64(xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
- } else {
- r->l0 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9));
- r->l1 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -365,24 +297,11 @@ xfs_bmbt_set_startblock(
xfs_bmbt_rec_host_t *r,
xfs_fsblock_t v)
{
-#if XFS_BIG_BLKNOS
ASSERT((v & xfs_mask64hi(12)) == 0);
r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
(xfs_bmbt_rec_base_t)(v >> 43);
r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
(xfs_bmbt_rec_base_t)(v << 21);
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(v)) {
- r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)v << 21) |
- (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- } else {
- r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
- (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -438,12 +357,12 @@ xfs_bmbt_to_bmdr(
cpu_to_be64(XFS_BUF_DADDR_NULL));
} else
ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
- ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
- ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
+ ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
ASSERT(rblock->bb_level != 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
- dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
@@ -519,7 +438,6 @@ xfs_bmbt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
- int length,
int *stat)
{
xfs_alloc_arg_t args; /* block allocation args */
@@ -555,7 +473,7 @@ xfs_bmbt_alloc_block(
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto error0;
}
error = xfs_alloc_vextent(&args);
@@ -672,8 +590,7 @@ xfs_bmbt_get_dmaxrecs(
{
if (level != cur->bc_nlevels - 1)
return cur->bc_mp->m_bmap_dmxr[level != 0];
- return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
- level == 0);
+ return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
}
STATIC void
@@ -765,11 +682,11 @@ xfs_bmbt_verify(
/* sibling pointer verification */
if (!block->bb_u.l.bb_leftsib ||
- (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
+ (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
return false;
if (!block->bb_u.l.bb_rightsib ||
- (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
+ (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
return false;
@@ -780,12 +697,14 @@ static void
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_lblock_verify_crc(bp) &&
- xfs_bmbt_verify(bp))) {
+ if (!xfs_btree_lblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_bmbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
}
}
@@ -794,11 +713,9 @@ xfs_bmbt_write_verify(
struct xfs_buf *bp)
{
if (!xfs_bmbt_verify(bp)) {
- xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
xfs_btree_lblock_calc_crc(bp);
@@ -914,7 +831,6 @@ xfs_bmbt_maxrecs(
*/
int
xfs_bmdr_maxrecs(
- struct xfs_mount *mp,
int blocklen,
int leaf)
{
@@ -962,7 +878,7 @@ xfs_bmbt_change_owner(
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
if (!cur)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 6e42e1e50b89..819a8a4dee95 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -130,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
xfs_bmdr_block_t *, int);
extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
-extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 9adaae4f3e2f..8fe6a93ff473 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -33,6 +33,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_alloc.h"
/*
* Cursor allocation zone.
@@ -43,9 +44,10 @@ kmem_zone_t *xfs_btree_cur_zone;
* Btree magic numbers.
*/
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
- { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
+ { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+ XFS_FIBT_MAGIC },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
- XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
};
#define xfs_btree_magic(cur) \
xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -76,11 +78,11 @@ xfs_btree_check_lblock(
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
- (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+ (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
- (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+ (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_rightsib)));
@@ -90,7 +92,7 @@ xfs_btree_check_lblock(
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -138,7 +140,7 @@ xfs_btree_check_sblock(
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -165,12 +167,12 @@ xfs_btree_check_block(
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
struct xfs_btree_cur *cur, /* btree cursor */
- xfs_dfsbno_t bno, /* btree block disk address */
+ xfs_fsblock_t bno, /* btree block disk address */
int level) /* btree block level */
{
XFS_WANT_CORRUPTED_RETURN(
level > 0 &&
- bno != NULLDFSBNO &&
+ bno != NULLFSBLOCK &&
XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
return 0;
}
@@ -234,8 +236,7 @@ xfs_btree_lblock_calc_crc(
return;
if (bip)
block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_LBLOCK_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
}
bool
@@ -243,8 +244,8 @@ xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_LBLOCK_CRC_OFF);
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
return true;
}
@@ -267,8 +268,7 @@ xfs_btree_sblock_calc_crc(
return;
if (bip)
block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_SBLOCK_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
}
bool
@@ -276,8 +276,8 @@ xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_SBLOCK_CRC_OFF);
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
return true;
}
@@ -554,14 +554,11 @@ xfs_btree_get_bufl(
xfs_fsblock_t fsbno, /* file system block number */
uint lock) /* lock flags for get_buf */
{
- xfs_buf_t *bp; /* buffer pointer (return value) */
xfs_daddr_t d; /* real disk block address */
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
- ASSERT(!xfs_buf_geterror(bp));
- return bp;
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
}
/*
@@ -576,15 +573,12 @@ xfs_btree_get_bufs(
xfs_agblock_t agbno, /* allocation group block number */
uint lock) /* lock flags for get_buf */
{
- xfs_buf_t *bp; /* buffer pointer (return value) */
xfs_daddr_t d; /* real disk block address */
ASSERT(agno != NULLAGNUMBER);
ASSERT(agbno != NULLAGBLOCK);
d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
- ASSERT(!xfs_buf_geterror(bp));
- return bp;
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
}
/*
@@ -601,7 +595,7 @@ xfs_btree_islastblock(
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
else
return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
@@ -724,7 +718,6 @@ xfs_btree_read_bufl(
mp->m_bsize, lock, &bp, ops);
if (error)
return error;
- ASSERT(!xfs_buf_geterror(bp));
if (bp)
xfs_buf_set_ref(bp, refval);
*bpp = bp;
@@ -778,16 +771,16 @@ xfs_btree_readahead_lblock(
struct xfs_btree_block *block)
{
int rval = 0;
- xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
- xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
- if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
xfs_btree_reada_bufl(cur->bc_mp, left, 1,
cur->bc_ops->buf_ops);
rval++;
}
- if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
xfs_btree_reada_bufl(cur->bc_mp, right, 1,
cur->bc_ops->buf_ops);
rval++;
@@ -859,7 +852,7 @@ xfs_btree_ptr_to_daddr(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+ ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
} else {
@@ -907,9 +900,9 @@ xfs_btree_setbuf(
b = XFS_BUF_TO_BLOCK(bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
+ if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
+ if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
} else {
if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
@@ -925,7 +918,7 @@ xfs_btree_ptr_is_null(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return ptr->l == cpu_to_be64(NULLDFSBNO);
+ return ptr->l == cpu_to_be64(NULLFSBLOCK);
else
return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
@@ -936,7 +929,7 @@ xfs_btree_set_ptr_null(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- ptr->l = cpu_to_be64(NULLDFSBNO);
+ ptr->l = cpu_to_be64(NULLFSBLOCK);
else
ptr->s = cpu_to_be32(NULLAGBLOCK);
}
@@ -1004,8 +997,8 @@ xfs_btree_init_block_int(
buf->bb_numrecs = cpu_to_be16(numrecs);
if (flags & XFS_BTREE_LONG_PTRS) {
- buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
+ buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
if (flags & XFS_BTREE_CRC_BLOCKS) {
buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.l.bb_owner = cpu_to_be64(owner);
@@ -1117,6 +1110,7 @@ xfs_btree_set_refs(
xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
+ case XFS_BTNUM_FINO:
xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
@@ -1146,7 +1140,7 @@ xfs_btree_get_buf_block(
mp->m_bsize, flags);
if (!*bpp)
- return ENOMEM;
+ return -ENOMEM;
(*bpp)->b_ops = cur->bc_ops->buf_ops;
*block = XFS_BUF_TO_BLOCK(*bpp);
@@ -1161,7 +1155,6 @@ STATIC int
xfs_btree_read_buf_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr,
- int level,
int flags,
struct xfs_btree_block **block,
struct xfs_buf **bpp)
@@ -1180,7 +1173,6 @@ xfs_btree_read_buf_block(
if (error)
return error;
- ASSERT(!xfs_buf_geterror(*bpp));
xfs_btree_set_refs(cur, *bpp);
*block = XFS_BUF_TO_BLOCK(*bpp);
return 0;
@@ -1506,7 +1498,7 @@ xfs_btree_increment(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
ASSERT(lev < cur->bc_nlevels);
@@ -1519,8 +1511,8 @@ xfs_btree_increment(
union xfs_btree_ptr *ptrp;
ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
- error = xfs_btree_read_buf_block(cur, ptrp, --lev,
- 0, &block, &bp);
+ --lev;
+ error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
if (error)
goto error0;
@@ -1605,7 +1597,7 @@ xfs_btree_decrement(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
ASSERT(lev < cur->bc_nlevels);
@@ -1618,8 +1610,8 @@ xfs_btree_decrement(
union xfs_btree_ptr *ptrp;
ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
- error = xfs_btree_read_buf_block(cur, ptrp, --lev,
- 0, &block, &bp);
+ --lev;
+ error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
if (error)
goto error0;
xfs_btree_setbuf(cur, lev, bp);
@@ -1669,7 +1661,7 @@ xfs_btree_lookup_get_block(
return 0;
}
- error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
+ error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
if (error)
return error;
@@ -2020,7 +2012,7 @@ xfs_btree_lshift(
goto out0;
/* Set up the left neighbor as "left". */
- error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
+ error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
if (error)
goto error0;
@@ -2204,7 +2196,7 @@ xfs_btree_rshift(
goto out0;
/* Set up the right neighbor as "right". */
- error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
+ error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
if (error)
goto error0;
@@ -2332,7 +2324,7 @@ error1:
* record (to be inserted into parent).
*/
STATIC int /* error */
-xfs_btree_split(
+__xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
@@ -2374,7 +2366,7 @@ xfs_btree_split(
xfs_btree_buf_to_ptr(cur, lbp, &lptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
+ error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -2472,7 +2464,7 @@ xfs_btree_split(
* point back to right instead of to left.
*/
if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
- error = xfs_btree_read_buf_block(cur, &rrptr, level,
+ error = xfs_btree_read_buf_block(cur, &rrptr,
0, &rrblock, &rrbp);
if (error)
goto error0;
@@ -2512,6 +2504,85 @@ error0:
return error;
}
+struct xfs_btree_split_args {
+ struct xfs_btree_cur *cur;
+ int level;
+ union xfs_btree_ptr *ptrp;
+ union xfs_btree_key *key;
+ struct xfs_btree_cur **curp;
+ int *stat; /* success/failure */
+ int result;
+ bool kswapd; /* allocation in kswapd context */
+ struct completion *done;
+ struct work_struct work;
+};
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+ struct work_struct *work)
+{
+ struct xfs_btree_split_args *args = container_of(work,
+ struct xfs_btree_split_args, work);
+ unsigned long pflags;
+ unsigned long new_pflags = PF_FSTRANS;
+
+ /*
+ * we are in a transaction context here, but may also be doing work
+ * in kswapd context, and hence we may need to inherit that state
+ * temporarily to ensure that we don't block waiting for memory reclaim
+ * in any way.
+ */
+ if (args->kswapd)
+ new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+ current_set_flags_nested(&pflags, new_pflags);
+
+ args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+ args->key, args->curp, args->stat);
+ complete(args->done);
+
+ current_restore_flags_nested(&pflags, new_pflags);
+}
+
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_split_args args;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+
+ args.cur = cur;
+ args.level = level;
+ args.ptrp = ptrp;
+ args.key = key;
+ args.curp = curp;
+ args.stat = stat;
+ args.done = &done;
+ args.kswapd = current_is_kswapd();
+ INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+ queue_work(xfs_alloc_wq, &args.work);
+ wait_for_completion(&done);
+ destroy_work_on_stack(&args.work);
+ return args.result;
+}
+
+
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
@@ -2547,7 +2618,7 @@ xfs_btree_new_iroot(
pp = xfs_btree_ptr_addr(cur, 1, block);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
+ error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
if (error)
goto error0;
if (*stat == 0) {
@@ -2651,7 +2722,7 @@ xfs_btree_new_root(
cur->bc_ops->init_ptr_from_cur(cur, &rptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
+ error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -2686,8 +2757,7 @@ xfs_btree_new_root(
lbp = bp;
xfs_btree_buf_to_ptr(cur, lbp, &lptr);
left = block;
- error = xfs_btree_read_buf_block(cur, &rptr,
- cur->bc_nlevels - 1, 0, &right, &rbp);
+ error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
if (error)
goto error0;
bp = rbp;
@@ -2698,8 +2768,7 @@ xfs_btree_new_root(
xfs_btree_buf_to_ptr(cur, rbp, &rptr);
right = block;
xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
- error = xfs_btree_read_buf_block(cur, &lptr,
- cur->bc_nlevels - 1, 0, &left, &lbp);
+ error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
if (error)
goto error0;
bp = lbp;
@@ -3651,8 +3720,7 @@ xfs_btree_delrec(
rptr = cptr;
right = block;
rbp = bp;
- error = xfs_btree_read_buf_block(cur, &lptr, level,
- 0, &left, &lbp);
+ error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
if (error)
goto error0;
@@ -3669,8 +3737,7 @@ xfs_btree_delrec(
lptr = cptr;
left = block;
lbp = bp;
- error = xfs_btree_read_buf_block(cur, &rptr, level,
- 0, &right, &rbp);
+ error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
if (error)
goto error0;
@@ -3742,8 +3809,7 @@ xfs_btree_delrec(
/* If there is a right sibling, point it to the remaining block. */
xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
if (!xfs_btree_ptr_is_null(cur, &cptr)) {
- error = xfs_btree_read_buf_block(cur, &cptr, level,
- 0, &rrblock, &rrbp);
+ error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
if (error)
goto error0;
xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
@@ -3952,7 +4018,7 @@ xfs_btree_block_change_owner(
/* now read rh sibling block for next iteration */
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
if (xfs_btree_ptr_is_null(cur, &rptr))
- return ENOENT;
+ return -ENOENT;
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
@@ -3995,7 +4061,7 @@ xfs_btree_change_owner(
buffer_list);
} while (!error);
- if (error != ENOENT)
+ if (error != -ENOENT)
return error;
}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 91e34f21bace..8f18bab73ea5 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -62,6 +62,7 @@ union xfs_btree_rec {
#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
+#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
/*
* For logging record fields.
@@ -92,6 +93,7 @@ do { \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
+ case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -105,6 +107,7 @@ do { \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
+ case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -129,7 +132,7 @@ struct xfs_btree_ops {
int (*alloc_block)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *start_bno,
union xfs_btree_ptr *new_bno,
- int length, int *stat);
+ int *stat);
int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
/* update last record information */
@@ -255,7 +258,7 @@ xfs_btree_check_block(
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
struct xfs_btree_cur *cur, /* btree cursor */
- xfs_dfsbno_t ptr, /* btree block disk address */
+ xfs_fsblock_t ptr, /* btree block disk address */
int level); /* btree block level */
/*
diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index fad1676ad8cd..fad1676ad8cd 100644
--- a/fs/xfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 796272a2e129..2c42ae28d027 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -167,8 +167,8 @@ xfs_da3_node_verify(
* we don't know if the node is for and attribute or directory tree,
* so only fail if the count is outside both bounds
*/
- if (ichdr.count > mp->m_dir_node_ents &&
- ichdr.count > mp->m_attr_node_ents)
+ if (ichdr.count > mp->m_dir_geo->node_ents &&
+ ichdr.count > mp->m_attr_geo->node_ents)
return false;
/* XXX: hash order check? */
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
if (!xfs_da3_node_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
}
/*
@@ -209,18 +209,20 @@ static void
xfs_da3_node_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_da_blkinfo *info = bp->b_addr;
switch (be16_to_cpu(info->magic)) {
case XFS_DA3_NODE_MAGIC:
- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DA3_NODE_CRC_OFF))
+ if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
+ xfs_buf_ioerror(bp, -EFSBADCRC);
break;
+ }
/* fall through */
case XFS_DA_NODE_MAGIC:
- if (!xfs_da3_node_verify(bp))
+ if (!xfs_da3_node_verify(bp)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
break;
+ }
return;
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
}
/* corrupt block */
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -314,7 +315,7 @@ xfs_da3_node_create(
error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
if (error)
- return(error);
+ return error;
bp->b_ops = &xfs_da3_node_buf_ops;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
node = bp->b_addr;
@@ -336,7 +337,7 @@ xfs_da3_node_create(
XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
*bpp = bp;
- return(0);
+ return 0;
}
/*
@@ -384,8 +385,8 @@ xfs_da3_split(
switch (oldblk->magic) {
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk);
- if ((error != 0) && (error != ENOSPC)) {
- return(error); /* GROT: attr is inconsistent */
+ if ((error != 0) && (error != -ENOSPC)) {
+ return error; /* GROT: attr is inconsistent */
}
if (!error) {
addblk = newblk;
@@ -407,7 +408,7 @@ xfs_da3_split(
&state->extrablk);
}
if (error)
- return(error); /* GROT: attr inconsistent */
+ return error; /* GROT: attr inconsistent */
addblk = newblk;
break;
case XFS_DIR2_LEAFN_MAGIC:
@@ -421,7 +422,7 @@ xfs_da3_split(
max - i, &action);
addblk->bp = NULL;
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
/*
* Record the newly split block for the next time thru?
*/
@@ -438,7 +439,7 @@ xfs_da3_split(
xfs_da3_fixhashpath(state, &state->path);
}
if (!addblk)
- return(0);
+ return 0;
/*
* Split the root node.
@@ -448,7 +449,7 @@ xfs_da3_split(
error = xfs_da3_root_split(state, oldblk, addblk);
if (error) {
addblk->bp = NULL;
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
}
/*
@@ -491,7 +492,7 @@ xfs_da3_split(
sizeof(node->hdr.info)));
}
addblk->bp = NULL;
- return(0);
+ return 0;
}
/*
@@ -597,7 +598,7 @@ xfs_da3_root_split(
* Set up the new root node.
*/
error = xfs_da3_node_create(args,
- (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
+ (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
level + 1, &bp, args->whichfork);
if (error)
return error;
@@ -615,10 +616,10 @@ xfs_da3_root_split(
#ifdef DEBUG
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
- ASSERT(blk1->blkno >= mp->m_dirleafblk &&
- blk1->blkno < mp->m_dirfreeblk);
- ASSERT(blk2->blkno >= mp->m_dirleafblk &&
- blk2->blkno < mp->m_dirfreeblk);
+ ASSERT(blk1->blkno >= args->geo->leafblk &&
+ blk1->blkno < args->geo->freeblk);
+ ASSERT(blk2->blkno >= args->geo->leafblk &&
+ blk2->blkno < args->geo->freeblk);
}
#endif
@@ -662,25 +663,25 @@ xfs_da3_node_split(
/*
* Do we have to split the node?
*/
- if (nodehdr.count + newcount > state->node_ents) {
+ if (nodehdr.count + newcount > state->args->geo->node_ents) {
/*
* Allocate a new node, add to the doubly linked chain of
* nodes, then move some of our excess entries into it.
*/
error = xfs_da_grow_inode(state->args, &blkno);
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
error = xfs_da3_node_create(state->args, blkno, treelevel,
&newblk->bp, state->args->whichfork);
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
newblk->blkno = blkno;
newblk->magic = XFS_DA_NODE_MAGIC;
xfs_da3_node_rebalance(state, oldblk, newblk);
error = xfs_da3_blk_link(state, oldblk, newblk);
if (error)
- return(error);
+ return error;
*result = 1;
} else {
*result = 0;
@@ -720,7 +721,7 @@ xfs_da3_node_split(
}
}
- return(0);
+ return 0;
}
/*
@@ -893,8 +894,8 @@ xfs_da3_node_add(
ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
ASSERT(newblk->blkno != 0);
if (state->args->whichfork == XFS_DATA_FORK)
- ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
- newblk->blkno < state->mp->m_dirfreeblk);
+ ASSERT(newblk->blkno >= state->args->geo->leafblk &&
+ newblk->blkno < state->args->geo->freeblk);
/*
* We may need to make some room before we insert the new node.
@@ -962,9 +963,9 @@ xfs_da3_join(
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_toosmall(state, &action);
if (error)
- return(error);
+ return error;
if (action == 0)
- return(0);
+ return 0;
xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
break;
case XFS_DIR2_LEAFN_MAGIC:
@@ -984,7 +985,7 @@ xfs_da3_join(
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_node_toosmall(state, &action);
if (error)
- return(error);
+ return error;
if (action == 0)
return 0;
xfs_da3_node_unbalance(state, drop_blk, save_blk);
@@ -994,12 +995,12 @@ xfs_da3_join(
error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
xfs_da_state_kill_altpath(state);
if (error)
- return(error);
+ return error;
error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
drop_blk->bp);
drop_blk->bp = NULL;
if (error)
- return(error);
+ return error;
}
/*
* We joined all the way to the top. If it turns out that
@@ -1009,7 +1010,7 @@ xfs_da3_join(
xfs_da3_node_remove(state, drop_blk);
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_root_join(state, &state->path.blk[0]);
- return(error);
+ return error;
}
#ifdef DEBUG
@@ -1088,16 +1089,17 @@ xfs_da3_root_join(
* that could occur. For dir3 blocks we also need to update the block
* number in the buffer header.
*/
- memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
+ memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
root_blk->bp->b_ops = bp->b_ops;
xfs_trans_buf_copy_type(root_blk->bp, bp);
if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
}
- xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+ xfs_trans_log_buf(args->trans, root_blk->bp, 0,
+ args->geo->blksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
- return(error);
+ return error;
}
/*
@@ -1138,9 +1140,9 @@ xfs_da3_node_toosmall(
info = blk->bp->b_addr;
node = (xfs_da_intnode_t *)info;
dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- if (nodehdr.count > (state->node_ents >> 1)) {
+ if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
- return(0); /* blk over 50%, don't try to join */
+ return 0; /* blk over 50%, don't try to join */
}
/*
@@ -1159,13 +1161,13 @@ xfs_da3_node_toosmall(
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
*action = 2;
}
- return(0);
+ return 0;
}
/*
@@ -1175,8 +1177,8 @@ xfs_da3_node_toosmall(
* We prefer coalescing with the lower numbered sibling so as
* to shrink a directory over time.
*/
- count = state->node_ents;
- count -= state->node_ents >> 2;
+ count = state->args->geo->node_ents;
+ count -= state->args->geo->node_ents >> 2;
count -= nodehdr.count;
/* start with smaller blk num */
@@ -1192,7 +1194,7 @@ xfs_da3_node_toosmall(
error = xfs_da3_node_read(state->args->trans, dp,
blkno, -1, &bp, state->args->whichfork);
if (error)
- return(error);
+ return error;
node = bp->b_addr;
dp->d_ops->node_hdr_from_disk(&thdr, node);
@@ -1295,7 +1297,7 @@ xfs_da3_fixhashpath(
node = blk->bp->b_addr;
dp->d_ops->node_hdr_from_disk(&nodehdr, node);
btree = dp->d_ops->node_tree_p(node);
- if (be32_to_cpu(btree->hashval) == lasthash)
+ if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
break;
blk->hashval = lasthash;
btree[blk->index].hashval = cpu_to_be32(lasthash);
@@ -1471,7 +1473,7 @@ xfs_da3_node_lookup_int(
* Descend thru the B-tree searching each level for the right
* node to use, until the right hashval is found.
*/
- blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
+ blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
for (blk = &state->path.blk[0], state->path.active = 1;
state->path.active <= XFS_DA_NODE_MAXDEPTH;
blk++, state->path.active++) {
@@ -1484,7 +1486,7 @@ xfs_da3_node_lookup_int(
if (error) {
blk->blkno = 0;
state->path.active--;
- return(error);
+ return error;
}
curr = blk->bp->b_addr;
blk->magic = be16_to_cpu(curr->magic);
@@ -1577,25 +1579,25 @@ xfs_da3_node_lookup_int(
args->blkno = blk->blkno;
} else {
ASSERT(0);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
- if (((retval == ENOENT) || (retval == ENOATTR)) &&
+ if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
(blk->hashval == args->hashval)) {
error = xfs_da3_path_shift(state, &state->path, 1, 1,
&retval);
if (error)
- return(error);
+ return error;
if (retval == 0) {
continue;
} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
/* path_shift() gives ENOENT */
- retval = XFS_ERROR(ENOATTR);
+ retval = -ENOATTR;
}
}
break;
}
*result = retval;
- return(0);
+ return 0;
}
/*========================================================================
@@ -1690,7 +1692,7 @@ xfs_da3_blk_link(
be32_to_cpu(old_info->back),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1711,7 +1713,7 @@ xfs_da3_blk_link(
be32_to_cpu(old_info->forw),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1724,7 +1726,7 @@ xfs_da3_blk_link(
xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
- return(0);
+ return 0;
}
/*
@@ -1770,7 +1772,7 @@ xfs_da3_blk_unlink(
be32_to_cpu(drop_info->back),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1787,7 +1789,7 @@ xfs_da3_blk_unlink(
be32_to_cpu(drop_info->forw),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1799,7 +1801,7 @@ xfs_da3_blk_unlink(
}
xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
- return(0);
+ return 0;
}
/*
@@ -1857,9 +1859,9 @@ xfs_da3_path_shift(
}
}
if (level < 0) {
- *result = XFS_ERROR(ENOENT); /* we're out of our tree */
+ *result = -ENOENT; /* we're out of our tree */
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
- return(0);
+ return 0;
}
/*
@@ -1881,7 +1883,7 @@ xfs_da3_path_shift(
error = xfs_da3_node_read(args->trans, dp, blkno, -1,
&blk->bp, args->whichfork);
if (error)
- return(error);
+ return error;
info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
@@ -2002,7 +2004,7 @@ xfs_da_grow_inode_int(
struct xfs_trans *tp = args->trans;
struct xfs_inode *dp = args->dp;
int w = args->whichfork;
- xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
+ xfs_rfsblock_t nblks = dp->i_d.di_nblocks;
struct xfs_bmbt_irec map, *mapp;
int nmap, error, got, i, mapi;
@@ -2066,7 +2068,7 @@ xfs_da_grow_inode_int(
if (got != count || mapp[0].br_startoff != *bno ||
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
*bno + count) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto out_free_map;
}
@@ -2089,20 +2091,12 @@ xfs_da_grow_inode(
xfs_dablk_t *new_blkno)
{
xfs_fileoff_t bno;
- int count;
int error;
trace_xfs_da_grow_inode(args);
- if (args->whichfork == XFS_DATA_FORK) {
- bno = args->dp->i_mount->m_dirleafblk;
- count = args->dp->i_mount->m_dirblkfsbs;
- } else {
- bno = 0;
- count = 1;
- }
-
- error = xfs_da_grow_inode_int(args, &bno, count);
+ bno = args->geo->leafblk;
+ error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
if (!error)
*new_blkno = (xfs_dablk_t)bno;
return error;
@@ -2157,27 +2151,27 @@ xfs_da3_swap_lastblock(
w = args->whichfork;
ASSERT(w == XFS_DATA_FORK);
mp = dp->i_mount;
- lastoff = mp->m_dirfreeblk;
+ lastoff = args->geo->freeblk;
error = xfs_bmap_last_before(tp, dp, &lastoff, w);
if (error)
return error;
if (unlikely(lastoff == 0)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
* Read the last block in the btree space.
*/
- last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
+ last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
if (error)
return error;
/*
* Copy the last block into the dead buffer and log it.
*/
- memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
- xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+ memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
+ xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
dead_info = dead_buf->b_addr;
/*
* Get values from the moved block.
@@ -2215,7 +2209,7 @@ xfs_da3_swap_lastblock(
sib_info->magic != dead_info->magic)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
sib_info->forw = cpu_to_be32(dead_blkno);
@@ -2237,7 +2231,7 @@ xfs_da3_swap_lastblock(
sib_info->magic != dead_info->magic)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
sib_info->back = cpu_to_be32(dead_blkno);
@@ -2246,7 +2240,7 @@ xfs_da3_swap_lastblock(
sizeof(sib_info->back)));
sib_buf = NULL;
}
- par_blkno = mp->m_dirleafblk;
+ par_blkno = args->geo->leafblk;
level = -1;
/*
* Walk down the tree looking for the parent of the moved block.
@@ -2260,7 +2254,7 @@ xfs_da3_swap_lastblock(
if (level >= 0 && level != par_hdr.level + 1) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
level = par_hdr.level;
@@ -2273,7 +2267,7 @@ xfs_da3_swap_lastblock(
if (entno == par_hdr.count) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
par_blkno = be32_to_cpu(btree[entno].before);
@@ -2300,7 +2294,7 @@ xfs_da3_swap_lastblock(
if (unlikely(par_blkno == 0)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
@@ -2311,7 +2305,7 @@ xfs_da3_swap_lastblock(
if (par_hdr.level != level) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
btree = dp->d_ops->node_tree_p(par_node);
@@ -2356,10 +2350,7 @@ xfs_da_shrink_inode(
w = args->whichfork;
tp = args->trans;
mp = dp->i_mount;
- if (w == XFS_DATA_FORK)
- count = mp->m_dirblkfsbs;
- else
- count = 1;
+ count = args->geo->fsbcount;
for (;;) {
/*
* Remove extents. If we get ENOSPC for a dir we have to move
@@ -2368,7 +2359,7 @@ xfs_da_shrink_inode(
error = xfs_bunmapi(tp, dp, dead_blkno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
0, args->firstblock, args->flist, &done);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
if (w != XFS_DATA_FORK)
break;
error = xfs_da3_swap_lastblock(args, &dead_blkno,
@@ -2436,7 +2427,7 @@ xfs_buf_map_from_irec(
map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
KM_SLEEP | KM_NOFS);
if (!map)
- return ENOMEM;
+ return -ENOMEM;
*mapp = map;
}
@@ -2461,7 +2452,6 @@ xfs_buf_map_from_irec(
*/
static int
xfs_dabuf_map(
- struct xfs_trans *trans,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mappedbno,
@@ -2479,7 +2469,10 @@ xfs_dabuf_map(
ASSERT(map && *map);
ASSERT(*nmaps == 1);
- nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
+ if (whichfork == XFS_DATA_FORK)
+ nfsb = mp->m_dir_geo->fsbcount;
+ else
+ nfsb = mp->m_attr_geo->fsbcount;
/*
* Caller doesn't have a mapping. -2 means don't complain
@@ -2507,8 +2500,8 @@ xfs_dabuf_map(
}
if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
- error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
- if (unlikely(error == EFSCORRUPTED)) {
+ error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
+ if (unlikely(error == -EFSCORRUPTED)) {
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
int i;
xfs_alert(mp, "%s: bno %lld dir: inode %lld",
@@ -2557,7 +2550,7 @@ xfs_da_get_buf(
*bpp = NULL;
mapp = &map;
nmap = 1;
- error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+ error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
&mapp, &nmap);
if (error) {
/* mapping a hole is not an error, but we don't continue */
@@ -2568,7 +2561,7 @@ xfs_da_get_buf(
bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
mapp, nmap, 0);
- error = bp ? bp->b_error : XFS_ERROR(EIO);
+ error = bp ? bp->b_error : -EIO;
if (error) {
xfs_trans_brelse(trans, bp);
goto out_free;
@@ -2605,7 +2598,7 @@ xfs_da_read_buf(
*bpp = NULL;
mapp = &map;
nmap = 1;
- error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+ error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
&mapp, &nmap);
if (error) {
/* mapping a hole is not an error, but we don't continue */
@@ -2624,47 +2617,6 @@ xfs_da_read_buf(
xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
else
xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-
- /*
- * This verification code will be moved to a CRC verification callback
- * function so just leave it here unchanged until then.
- */
- {
- xfs_dir2_data_hdr_t *hdr = bp->b_addr;
- xfs_dir2_free_t *free = bp->b_addr;
- xfs_da_blkinfo_t *info = bp->b_addr;
- uint magic, magic1;
- struct xfs_mount *mp = dp->i_mount;
-
- magic = be16_to_cpu(info->magic);
- magic1 = be32_to_cpu(hdr->magic);
- if (unlikely(
- XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
- (magic != XFS_DA3_NODE_MAGIC) &&
- (magic != XFS_ATTR_LEAF_MAGIC) &&
- (magic != XFS_ATTR3_LEAF_MAGIC) &&
- (magic != XFS_DIR2_LEAF1_MAGIC) &&
- (magic != XFS_DIR3_LEAF1_MAGIC) &&
- (magic != XFS_DIR2_LEAFN_MAGIC) &&
- (magic != XFS_DIR3_LEAFN_MAGIC) &&
- (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
- (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
- (magic1 != XFS_DIR2_DATA_MAGIC) &&
- (magic1 != XFS_DIR3_DATA_MAGIC) &&
- (free->hdr.magic !=
- cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
- (free->hdr.magic !=
- cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
- mp, XFS_ERRTAG_DA_READ_BUF,
- XFS_RANDOM_DA_READ_BUF))) {
- trace_xfs_da_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
- XFS_ERRLEVEL_LOW, mp, info);
- error = XFS_ERROR(EFSCORRUPTED);
- xfs_trans_brelse(trans, bp);
- goto out_free;
- }
- }
*bpp = bp;
out_free:
if (mapp != &map)
@@ -2678,7 +2630,6 @@ out_free:
*/
xfs_daddr_t
xfs_da_reada_buf(
- struct xfs_trans *trans,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mappedbno,
@@ -2692,7 +2643,7 @@ xfs_da_reada_buf(
mapp = &map;
nmap = 1;
- error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+ error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
&mapp, &nmap);
if (error) {
/* mapping a hole is not an error, but we don't continue */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6e95ea79f5d7..6e153e399a77 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -25,6 +25,23 @@ struct xfs_trans;
struct zone;
struct xfs_dir_ops;
+/*
+ * Directory/attribute geometry information. There will be one of these for each
+ * data fork type, and it will be passed around via the xfs_da_args. Global
+ * structures will be attached to the xfs_mount.
+ */
+struct xfs_da_geometry {
+ int blksize; /* da block size in bytes */
+ int fsbcount; /* da block size in filesystem blocks */
+ uint8_t fsblog; /* log2 of _filesystem_ block size */
+ uint8_t blklog; /* log2 of da block size */
+ uint node_ents; /* # of entries in a danode */
+ int magicpct; /* 37% of block size in bytes */
+ xfs_dablk_t datablk; /* blockno of dir data v2 */
+ xfs_dablk_t leafblk; /* blockno of leaf data v2 */
+ xfs_dablk_t freeblk; /* blockno of free data v2 */
+};
+
/*========================================================================
* Btree searching and modification structure definitions.
*========================================================================*/
@@ -42,6 +59,7 @@ enum xfs_dacmp {
* Structure to ease passing around component names.
*/
typedef struct xfs_da_args {
+ struct xfs_da_geometry *geo; /* da block geometry */
const __uint8_t *name; /* string (maybe not NULL terminated) */
int namelen; /* length of string (maybe no NULL) */
__uint8_t filetype; /* filetype of inode for directories */
@@ -60,10 +78,12 @@ typedef struct xfs_da_args {
int index; /* index of attr of interest in blk */
xfs_dablk_t rmtblkno; /* remote attr value starting blkno */
int rmtblkcnt; /* remote attr value block count */
+ int rmtvaluelen; /* remote attr value length in bytes */
xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */
int index2; /* index of 2nd attr in blk */
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
+ int rmtvaluelen2; /* remote attr value length in bytes */
int op_flags; /* operation flags */
enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
@@ -108,8 +128,6 @@ typedef struct xfs_da_state_path {
typedef struct xfs_da_state {
xfs_da_args_t *args; /* filename arguments */
struct xfs_mount *mp; /* filesystem mount point */
- unsigned int blocksize; /* logical block size */
- unsigned int node_ents; /* how many entries in danode */
xfs_da_state_path_t path; /* search/split paths */
xfs_da_state_path_t altpath; /* alternate path for join */
unsigned char inleaf; /* insert into 1->lf, 0->splf */
@@ -183,9 +201,9 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
-xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mapped_bno,
- int whichfork, const struct xfs_buf_ops *ops);
+xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+ xfs_daddr_t mapped_bno, int whichfork,
+ const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index e6c83e1fbc8a..c9aee52a37e2 100644
--- a/fs/xfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -26,8 +26,10 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
/*
* Shortform directory ops
@@ -425,9 +427,9 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
* Directory Leaf block operations
*/
static int
-xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
{
- return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) /
+ return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
@@ -438,9 +440,9 @@ xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
}
static int
-xfs_dir3_max_leaf_ents(struct xfs_mount *mp)
+xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
{
- return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) /
+ return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
@@ -591,9 +593,9 @@ xfs_da3_node_hdr_to_disk(
* Directory free space block operations
*/
static int
-xfs_dir2_free_max_bests(struct xfs_mount *mp)
+xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
{
- return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
+ return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
@@ -607,24 +609,25 @@ xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
- return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
+ return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+ (db / xfs_dir2_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
- return db % xfs_dir2_free_max_bests(mp);
+ return db % xfs_dir2_free_max_bests(geo);
}
static int
-xfs_dir3_free_max_bests(struct xfs_mount *mp)
+xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
{
- return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) /
+ return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
@@ -638,18 +641,19 @@ xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
-xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
- return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
+ return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+ (db / xfs_dir3_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
-xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
- return db % xfs_dir3_free_max_bests(mp);
+ return db % xfs_dir3_free_max_bests(geo);
}
static void
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index a19d3f8f639c..0a49b0286372 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -19,10 +19,6 @@
#ifndef __XFS_DA_FORMAT_H__
#define __XFS_DA_FORMAT_H__
-/*========================================================================
- * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
- *========================================================================*/
-
/*
* This structure is common to both leaf nodes and non-leaf nodes in the Btree.
*
@@ -122,8 +118,6 @@ struct xfs_da3_icnode_hdr {
__uint16_t level;
};
-#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
-
/*
* Directory version 2.
*
@@ -330,8 +324,6 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
#define XFS_DIR2_DATA_SPACE 0
#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_DATA_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
/*
* Describe a free area in the data block.
@@ -456,8 +448,6 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
*/
#define XFS_DIR2_LEAF_SPACE 1
#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_LEAF_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
/*
* Leaf block header.
@@ -514,17 +504,6 @@ struct xfs_dir3_leaf {
#define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline struct xfs_dir2_leaf_tail *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
-{
- return (struct xfs_dir2_leaf_tail *)
- ((char *)lp + mp->m_dirblksize -
- sizeof(struct xfs_dir2_leaf_tail));
-}
-
-/*
* Get address of the bests array in the single-leaf block.
*/
static inline __be16 *
@@ -534,123 +513,6 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
}
/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr. It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_db_t)
- (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_data_aoff_t)(by &
- ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return ((xfs_dir2_off_t)db <<
- (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
* Free space block defintions for the node format.
*/
@@ -659,8 +521,6 @@ xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
*/
#define XFS_DIR2_FREE_SPACE 2
#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_FREE_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
typedef struct xfs_dir2_free_hdr {
__be32 magic; /* XFS_DIR2_FREE_MAGIC */
@@ -736,16 +596,6 @@ typedef struct xfs_dir2_block_tail {
} xfs_dir2_block_tail_t;
/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_block_tail *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
-{
- return ((struct xfs_dir2_block_tail *)
- ((char *)hdr + mp->m_dirblksize)) - 1;
-}
-
-/*
* Pointer to the leaf entries embedded in a data block (1-block format)
*/
static inline struct xfs_dir2_leaf_entry *
@@ -764,10 +614,6 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
* of an attribute name may not be unique, we may have duplicate keys. The
* internal links in the Btree are logical block offsets into the file.
*
- *========================================================================
- * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
- *========================================================================
- *
* Struct leaf_entry's are packed from the top. Name/values grow from the
* bottom but are not packed. The freemap contains run-length-encoded entries
* for the free bytes after the leaf_entry's, but only the N largest such,
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h
index e5869b50dc41..623bbe8fd921 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/libxfs/xfs_dinode.h
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
/* structure must be padded to 64 bit alignment */
} xfs_dinode_t;
+#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc)
+
#define DI_MAX_FLUSH 0xffff
/*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index ce16ef02997a..6cef22152fd6 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -85,38 +85,74 @@ static struct xfs_nameops xfs_ascii_ci_nameops = {
.compname = xfs_ascii_ci_compname,
};
-void
-xfs_dir_mount(
- xfs_mount_t *mp)
+int
+xfs_da_mount(
+ struct xfs_mount *mp)
{
- int nodehdr_size;
+ struct xfs_da_geometry *dageo;
+ int nodehdr_size;
- ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
+ ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
XFS_MAX_BLOCKSIZE);
mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
- mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
- mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
- mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
- mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
- mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
-
nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
- mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) /
+ mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+ KM_SLEEP | KM_MAYFAIL);
+ mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+ KM_SLEEP | KM_MAYFAIL);
+ if (!mp->m_dir_geo || !mp->m_attr_geo) {
+ kmem_free(mp->m_dir_geo);
+ kmem_free(mp->m_attr_geo);
+ return -ENOMEM;
+ }
+
+ /* set up directory geometry */
+ dageo = mp->m_dir_geo;
+ dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
+ dageo->fsblog = mp->m_sb.sb_blocklog;
+ dageo->blksize = 1 << dageo->blklog;
+ dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
+
+ /*
+ * Now we've set up the block conversion variables, we can calculate the
+ * segment block constants using the geometry structure.
+ */
+ dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
+ dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
+ dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
+ dageo->node_ents = (dageo->blksize - nodehdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
- mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) /
+ dageo->magicpct = (dageo->blksize * 37) / 100;
+
+ /* set up attribute geometry - single fsb only */
+ dageo = mp->m_attr_geo;
+ dageo->blklog = mp->m_sb.sb_blocklog;
+ dageo->fsblog = mp->m_sb.sb_blocklog;
+ dageo->blksize = 1 << dageo->blklog;
+ dageo->fsbcount = 1;
+ dageo->node_ents = (dageo->blksize - nodehdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
+ dageo->magicpct = (dageo->blksize * 37) / 100;
- mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
if (xfs_sb_version_hasasciici(&mp->m_sb))
mp->m_dirnameops = &xfs_ascii_ci_nameops;
else
mp->m_dirnameops = &xfs_default_nameops;
+ return 0;
+}
+
+void
+xfs_da_unmount(
+ struct xfs_mount *mp)
+{
+ kmem_free(mp->m_dir_geo);
+ kmem_free(mp->m_attr_geo);
}
/*
@@ -166,7 +202,7 @@ xfs_dir_ino_validate(
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -180,16 +216,24 @@ xfs_dir_init(
xfs_inode_t *dp,
xfs_inode_t *pdp)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int error;
- memset((char *)&args, 0, sizeof(args));
- args.dp = dp;
- args.trans = tp;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+ error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+ if (error)
return error;
- return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ args->geo = dp->i_mount->m_dir_geo;
+ args->dp = dp;
+ args->trans = tp;
+ error = xfs_dir2_sf_create(args, pdp->i_ino);
+ kmem_free(args);
+ return error;
}
/*
@@ -205,41 +249,57 @@ xfs_dir_createname(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
XFS_STATS_INC(xs_dir_create);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ args->geo = dp->i_mount->m_dir_geo;
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(args, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(args, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+
+out_free:
+ kmem_free(args);
return rval;
}
@@ -254,18 +314,18 @@ xfs_dir_cilookup_result(
int len)
{
if (args->cmpresult == XFS_CMP_DIFFERENT)
- return ENOENT;
+ return -ENOENT;
if (args->cmpresult != XFS_CMP_CASE ||
!(args->op_flags & XFS_DA_OP_CILOOKUP))
- return EEXIST;
+ return -EEXIST;
args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
if (!args->value)
- return ENOMEM;
+ return -ENOMEM;
memcpy(args->value, name, len);
args->valuelen = len;
- return EEXIST;
+ return -EEXIST;
}
/*
@@ -282,46 +342,67 @@ xfs_dir_lookup(
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_OKNOENT;
+ /*
+ * We need to use KM_NOFS here so that lockdep will not throw false
+ * positive deadlock warnings on a non-transactional lookup path. It is
+ * safe to recurse into inode recalim in that case, but lockdep can't
+ * easily be taught about it. Hence KM_NOFS avoids having to add more
+ * lockdep Doing this avoids having to add a bunch of lockdep class
+ * annotations into the reclaim path for the ilock.
+ */
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args->geo = dp->i_mount->m_dir_geo;
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name)
- args.op_flags |= XFS_DA_OP_CILOOKUP;
+ args->op_flags |= XFS_DA_OP_CILOOKUP;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_lookup(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_lookup(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_lookup(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isblock(args, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isleaf(args, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_lookup(args);
else
- rval = xfs_dir2_node_lookup(&args);
- if (rval == EEXIST)
+ rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
+ if (rval == -EEXIST)
rval = 0;
if (!rval) {
- *inum = args.inumber;
+ *inum = args->inumber;
if (ci_name) {
- ci_name->name = args.value;
- ci_name->len = args.valuelen;
+ ci_name->name = args->value;
+ ci_name->len = args->valuelen;
}
}
+out_free:
+ kmem_free(args);
return rval;
}
@@ -338,38 +419,52 @@ xfs_dir_removename(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = ino;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_removename(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_removename(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_removename(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ args->geo = dp->i_mount->m_dir_geo;
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = ino;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(args, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(args, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_removename(args);
else
- rval = xfs_dir2_node_removename(&args);
+ rval = xfs_dir2_node_removename(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -386,40 +481,55 @@ xfs_dir_replace(
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
-
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_replace(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_replace(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_replace(&args);
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ args->geo = dp->i_mount->m_dir_geo;
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(args, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(args, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_replace(args);
else
- rval = xfs_dir2_node_replace(&args);
+ rval = xfs_dir2_node_replace(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -434,7 +544,7 @@ xfs_dir_canenter(
struct xfs_name *name, /* name of entry to add */
uint resblks)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
@@ -443,29 +553,43 @@ xfs_dir_canenter(
ASSERT(S_ISDIR(dp->i_d.di_mode));
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ args->geo = dp->i_mount->m_dir_geo;
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(args, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(args, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -497,13 +621,13 @@ xfs_dir2_grow_inode(
* Set lowest possible block in the space requested.
*/
bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
- count = mp->m_dirblkfsbs;
+ count = args->geo->fsbcount;
error = xfs_da_grow_inode_int(args, &bno, count);
if (error)
return error;
- *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
+ *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);
/*
* Update file's size if this is the data space and it grew.
@@ -525,19 +649,16 @@ xfs_dir2_grow_inode(
*/
int
xfs_dir2_isblock(
- xfs_trans_t *tp,
- xfs_inode_t *dp,
- int *vp) /* out: 1 is block, 0 is not block */
+ struct xfs_da_args *args,
+ int *vp) /* out: 1 is block, 0 is not block */
{
- xfs_fileoff_t last; /* last file offset */
- xfs_mount_t *mp;
- int rval;
+ xfs_fileoff_t last; /* last file offset */
+ int rval;
- mp = dp->i_mount;
- if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+ if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
- rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
- ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
+ rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
+ ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
*vp = rval;
return 0;
}
@@ -547,18 +668,15 @@ xfs_dir2_isblock(
*/
int
xfs_dir2_isleaf(
- xfs_trans_t *tp,
- xfs_inode_t *dp,
- int *vp) /* out: 1 is leaf, 0 is not leaf */
+ struct xfs_da_args *args,
+ int *vp) /* out: 1 is block, 0 is not block */
{
- xfs_fileoff_t last; /* last file offset */
- xfs_mount_t *mp;
- int rval;
+ xfs_fileoff_t last; /* last file offset */
+ int rval;
- mp = dp->i_mount;
- if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)))
+ if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
- *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
+ *vp = last == args->geo->leafblk + args->geo->fsbcount;
return 0;
}
@@ -586,11 +704,11 @@ xfs_dir2_shrink_inode(
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
- da = xfs_dir2_db_to_da(mp, db);
+ da = xfs_dir2_db_to_da(args->geo, db);
/*
* Unmap the fsblock(s).
*/
- if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
+ if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
&done))) {
/*
@@ -617,12 +735,12 @@ xfs_dir2_shrink_inode(
/*
* If it's not a data block, we're done.
*/
- if (db >= XFS_DIR2_LEAF_FIRSTDB(mp))
+ if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))
return 0;
/*
* If the block isn't the last one in the directory, we're done.
*/
- if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
+ if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))
return 0;
bno = da;
if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
@@ -631,7 +749,7 @@ xfs_dir2_shrink_inode(
*/
return error;
}
- if (db == mp->m_dirdatablk)
+ if (db == args->geo->datablk)
ASSERT(bno == 0);
else
ASSERT(bno > 0);
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index cec70e0781ab..c8e86b0b5e99 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -80,7 +80,7 @@ struct xfs_dir_ops {
struct xfs_dir3_icleaf_hdr *from);
void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from);
- int (*leaf_max_ents)(struct xfs_mount *mp);
+ int (*leaf_max_ents)(struct xfs_da_geometry *geo);
struct xfs_dir2_leaf_entry *
(*leaf_ents_p)(struct xfs_dir2_leaf *lp);
@@ -97,10 +97,12 @@ struct xfs_dir_ops {
struct xfs_dir3_icfree_hdr *from);
void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from);
- int (*free_max_bests)(struct xfs_mount *mp);
+ int (*free_max_bests)(struct xfs_da_geometry *geo);
__be16 * (*free_bests_p)(struct xfs_dir2_free *free);
- xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db);
- int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db);
+ xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo,
+ xfs_dir2_db_t db);
+ int (*db_to_fdindex)(struct xfs_da_geometry *geo,
+ xfs_dir2_db_t db);
};
extern const struct xfs_dir_ops *
@@ -112,7 +114,9 @@ extern const struct xfs_dir_ops *
* Generic directory interface routines
*/
extern void xfs_dir_startup(void);
-extern void xfs_dir_mount(struct xfs_mount *mp);
+extern int xfs_da_mount(struct xfs_mount *mp);
+extern void xfs_da_unmount(struct xfs_mount *mp);
+
extern int xfs_dir_isempty(struct xfs_inode *dp);
extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_inode *pdp);
@@ -142,23 +146,23 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r);
+extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_entry *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_header(struct xfs_da_args *args,
struct xfs_buf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
- struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
+ struct xfs_buf *bp, struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 90cdbf4b5f19..9628ceccfa02 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_DATA_CRC_OFF)) ||
- !xfs_dir3_block_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_dir3_block_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_block_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
@@ -135,7 +136,7 @@ xfs_dir3_block_read(
struct xfs_mount *mp = dp->i_mount;
int err;
- err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
+ err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
if (!err && tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
@@ -280,8 +281,7 @@ out:
*/
static void
xfs_dir2_block_compact(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_block_tail *btp,
@@ -314,18 +314,17 @@ xfs_dir2_block_compact(
*lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
*lfloghigh -= be32_to_cpu(btp->stale) - 1;
be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
- xfs_dir2_data_make_free(tp, dp, bp,
+ xfs_dir2_data_make_free(args, bp,
(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
needlog, &needscan);
- blp += be32_to_cpu(btp->stale) - 1;
btp->stale = cpu_to_be32(1);
/*
* If we now need to rebuild the bestfree map, do so.
* This needs to happen before the next call to use_free.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, needlog);
+ xfs_dir2_data_freescan(args->dp, hdr, needlog);
}
/*
@@ -377,7 +376,7 @@ xfs_dir2_block_addname(
* Set up pointers to parts of the block.
*/
hdr = bp->b_addr;
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -393,7 +392,7 @@ xfs_dir2_block_addname(
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_trans_brelse(tp, bp);
if (!dup)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
return 0;
}
@@ -403,7 +402,7 @@ xfs_dir2_block_addname(
if (!dup) {
/* Don't have a space reservation: return no-space. */
if (args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Convert to the next larger format.
* Then add the new entry in that format.
@@ -420,7 +419,7 @@ xfs_dir2_block_addname(
* If need to compact the leaf entries, do it now.
*/
if (compact) {
- xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog,
+ xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog,
&lfloghigh, &lfloglow);
/* recalculate blp post-compaction */
blp = xfs_dir2_block_leaf_p(btp);
@@ -455,7 +454,7 @@ xfs_dir2_block_addname(
/*
* Mark the space needed for the new leaf entry, now in use.
*/
- xfs_dir2_data_use_free(tp, dp, bp, enddup,
+ xfs_dir2_data_use_free(args, bp, enddup,
(xfs_dir2_data_aoff_t)
((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
sizeof(*blp)),
@@ -536,13 +535,13 @@ xfs_dir2_block_addname(
* Fill in the leaf entry.
*/
blp[mid].hashval = cpu_to_be32(args->hashval);
- blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+ blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
(char *)dep - (char *)hdr));
xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
/*
* Mark space for the data entry used.
*/
- xfs_dir2_data_use_free(tp, dp, bp, dup,
+ xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
/*
@@ -560,9 +559,9 @@ xfs_dir2_block_addname(
if (needscan)
xfs_dir2_data_freescan(dp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, bp);
+ xfs_dir2_data_log_header(args, bp);
xfs_dir2_block_log_tail(tp, bp);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(args, bp, dep);
xfs_dir3_data_check(dp, bp);
return 0;
}
@@ -581,7 +580,7 @@ xfs_dir2_block_log_leaf(
xfs_dir2_leaf_entry_t *blp;
xfs_dir2_block_tail_t *btp;
- btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+ btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
(uint)((char *)&blp[last + 1] - (char *)hdr - 1));
@@ -598,7 +597,7 @@ xfs_dir2_block_log_tail(
xfs_dir2_data_hdr_t *hdr = bp->b_addr;
xfs_dir2_block_tail_t *btp;
- btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+ btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
(uint)((char *)(btp + 1) - (char *)hdr - 1));
}
@@ -633,13 +632,14 @@ xfs_dir2_block_lookup(
mp = dp->i_mount;
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Get the offset from the leaf entry, to point to the data.
*/
dep = (xfs_dir2_data_entry_t *)((char *)hdr +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(blp[ent].address)));
/*
* Fill in inode number, CI name if appropriate, release the block.
*/
@@ -647,7 +647,7 @@ xfs_dir2_block_lookup(
args->filetype = dp->d_ops->data_get_ftype(dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(args->trans, bp);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -685,7 +685,7 @@ xfs_dir2_block_lookup_int(
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Loop doing a binary search for our hash value.
@@ -703,7 +703,7 @@ xfs_dir2_block_lookup_int(
if (low > high) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
}
/*
@@ -723,7 +723,7 @@ xfs_dir2_block_lookup_int(
* Get pointer to the entry from the leaf.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr));
/*
* Compare name and if it's an exact match, return the index
* and buffer. If it's the first case-insensitive match, store
@@ -751,7 +751,7 @@ xfs_dir2_block_lookup_int(
* No match, release the buffer and return ENOENT.
*/
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -790,18 +790,19 @@ xfs_dir2_block_removename(
tp = args->trans;
mp = dp->i_mount;
hdr = bp->b_addr;
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry using the leaf entry.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(blp[ent].address)));
/*
* Mark the data entry's space free.
*/
needlog = needscan = 0;
- xfs_dir2_data_make_free(tp, dp, bp,
+ xfs_dir2_data_make_free(args, bp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
/*
@@ -820,7 +821,7 @@ xfs_dir2_block_removename(
if (needscan)
xfs_dir2_data_freescan(dp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, bp);
+ xfs_dir2_data_log_header(args, bp);
xfs_dir3_data_check(dp, bp);
/*
* See if the size as a shortform is good enough.
@@ -865,20 +866,21 @@ xfs_dir2_block_replace(
dp = args->dp;
mp = dp->i_mount;
hdr = bp->b_addr;
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry we need to change.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(blp[ent].address)));
ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
/*
* Change the inode number to the new value.
*/
dep->inumber = cpu_to_be64(args->inumber);
dp->d_ops->data_put_ftype(dep, args->filetype);
- xfs_dir2_data_log_entry(args->trans, dp, bp, dep);
+ xfs_dir2_data_log_entry(args, bp, dep);
xfs_dir3_data_check(dp, bp);
return 0;
}
@@ -938,7 +940,7 @@ xfs_dir2_leaf_to_block(
leaf = lbp->b_addr;
dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
ents = dp->d_ops->leaf_ents_p(leaf);
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
@@ -948,13 +950,13 @@ xfs_dir2_leaf_to_block(
* been left behind during no-space-reservation operations.
* These will show up in the leaf bests table.
*/
- while (dp->i_d.di_size > mp->m_dirblksize) {
+ while (dp->i_d.di_size > args->geo->blksize) {
int hdrsz;
hdrsz = dp->d_ops->data_entry_offset;
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
- mp->m_dirblksize - hdrsz) {
+ args->geo->blksize - hdrsz) {
if ((error =
xfs_dir2_leaf_trim_data(args, lbp,
(xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -966,7 +968,7 @@ xfs_dir2_leaf_to_block(
* Read the data block if we don't already have it, give up if it fails.
*/
if (!dbp) {
- error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);
if (error)
return error;
}
@@ -982,7 +984,7 @@ xfs_dir2_leaf_to_block(
/*
* Look at the last data entry.
*/
- tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
+ tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1;
dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free or is too short we can't do it.
@@ -1001,12 +1003,12 @@ xfs_dir2_leaf_to_block(
/*
* Use up the space at the end of the block (blp/btp).
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size,
+ xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
&needlog, &needscan);
/*
* Initialize the block tail.
*/
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
btp->stale = 0;
xfs_dir2_block_log_tail(tp, dbp);
@@ -1027,11 +1029,11 @@ xfs_dir2_leaf_to_block(
if (needscan)
xfs_dir2_data_freescan(dp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(args, dbp);
/*
* Pitch the old leaf block.
*/
- error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
+ error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp);
if (error)
return error;
@@ -1089,7 +1091,7 @@ xfs_dir2_sf_to_block(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- return XFS_ERROR(EIO);
+ return -EIO;
}
oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
@@ -1140,13 +1142,13 @@ xfs_dir2_sf_to_block(
*/
dup = dp->d_ops->data_unused_p(hdr);
needlog = needscan = 0;
- xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog,
- &needscan);
+ xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
+ i, &needlog, &needscan);
ASSERT(needscan == 0);
/*
* Fill in the tail.
*/
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */
btp->stale = 0;
blp = xfs_dir2_block_leaf_p(btp);
@@ -1154,7 +1156,7 @@ xfs_dir2_sf_to_block(
/*
* Remove the freespace, we'll manage it.
*/
- xfs_dir2_data_use_free(tp, dp, bp, dup,
+ xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
/*
@@ -1167,9 +1169,9 @@ xfs_dir2_sf_to_block(
dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
tagp = dp->d_ops->data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(args, bp, dep);
blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
- blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+ blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
(char *)dep - (char *)hdr));
/*
* Create entry for ..
@@ -1181,9 +1183,9 @@ xfs_dir2_sf_to_block(
dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
tagp = dp->d_ops->data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(args, bp, dep);
blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
- blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+ blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
(char *)dep - (char *)hdr));
offset = dp->d_ops->data_first_offset;
/*
@@ -1215,7 +1217,7 @@ xfs_dir2_sf_to_block(
dup->length = cpu_to_be16(newoffset - offset);
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
((char *)dup - (char *)hdr));
- xfs_dir2_data_log_unused(tp, bp, dup);
+ xfs_dir2_data_log_unused(args, bp, dup);
xfs_dir2_data_freeinsert(hdr,
dp->d_ops->data_bestfree_p(hdr),
dup, &dummy);
@@ -1232,12 +1234,12 @@ xfs_dir2_sf_to_block(
memcpy(dep->name, sfep->name, dep->namelen);
tagp = dp->d_ops->data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, bp, dep);
+ xfs_dir2_data_log_entry(args, bp, dep);
name.name = sfep->name;
name.len = sfep->namelen;
blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
hashname(&name));
- blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
+ blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
(char *)dep - (char *)hdr));
offset = (int)((char *)(tagp + 1) - (char *)hdr);
if (++i == sfp->count)
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 70acff4ee173..fdd803fecb8e 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -63,8 +63,10 @@ __xfs_dir3_data_check(
int stale; /* count of stale leaves */
struct xfs_name name;
const struct xfs_dir_ops *ops;
+ struct xfs_da_geometry *geo;
mp = bp->b_target->bt_mount;
+ geo = mp->m_dir_geo;
/*
* We can be passed a null dp here from a verifier, so we need to go the
@@ -78,7 +80,7 @@ __xfs_dir3_data_check(
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(geo, hdr);
lep = xfs_dir2_block_leaf_p(btp);
endp = (char *)lep;
@@ -94,11 +96,11 @@ __xfs_dir3_data_check(
break;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
- endp = (char *)hdr + mp->m_dirblksize;
+ endp = (char *)hdr + geo->blksize;
break;
default:
XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
@@ -172,9 +174,9 @@ __xfs_dir3_data_check(
lastfree = 0;
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
- addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
- (xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)hdr));
+ addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+ (xfs_dir2_data_aoff_t)
+ ((char *)dep - (char *)hdr));
name.name = dep->name;
name.len = dep->namelen;
hash = mp->m_dirnameops->hashname(&name);
@@ -241,7 +243,6 @@ static void
xfs_dir3_data_reada_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
switch (hdr->magic) {
@@ -255,8 +256,8 @@ xfs_dir3_data_reada_verify(
xfs_dir3_data_verify(bp);
return;
default:
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
break;
}
}
@@ -267,13 +268,14 @@ xfs_dir3_data_read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_DATA_CRC_OFF)) ||
- !xfs_dir3_data_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_dir3_data_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -285,8 +287,8 @@ xfs_dir3_data_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_data_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -296,7 +298,7 @@ xfs_dir3_data_write_verify(
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
@@ -329,12 +331,11 @@ xfs_dir3_data_read(
int
xfs_dir3_data_readahead(
- struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mapped_bno)
{
- return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
+ return xfs_da_reada_buf(dp, bno, mapped_bno,
XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
}
@@ -510,6 +511,7 @@ xfs_dir2_data_freescan(
struct xfs_dir2_data_free *bf;
char *endp; /* end of block's data */
char *p; /* current entry pointer */
+ struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -528,10 +530,10 @@ xfs_dir2_data_freescan(
p = (char *)dp->d_ops->data_entry_p(hdr);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
- btp = xfs_dir2_block_tail_p(dp->i_mount, hdr);
+ btp = xfs_dir2_block_tail_p(geo, hdr);
endp = (char *)xfs_dir2_block_leaf_p(btp);
} else
- endp = (char *)hdr + dp->i_mount->m_dirblksize;
+ endp = (char *)hdr + geo->blksize;
/*
* Loop over the block's entries.
*/
@@ -585,8 +587,8 @@ xfs_dir3_data_init(
/*
* Get the buffer set up for the block.
*/
- error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
- XFS_DATA_FORK);
+ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
+ -1, &bp, XFS_DATA_FORK);
if (error)
return error;
bp->b_ops = &xfs_dir3_data_buf_ops;
@@ -621,15 +623,15 @@ xfs_dir3_data_init(
dup = dp->d_ops->data_unused_p(hdr);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
- t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset;
+ t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
bf[0].length = cpu_to_be16(t);
dup->length = cpu_to_be16(t);
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
/*
* Log it and return it.
*/
- xfs_dir2_data_log_header(tp, dp, bp);
- xfs_dir2_data_log_unused(tp, bp, dup);
+ xfs_dir2_data_log_header(args, bp);
+ xfs_dir2_data_log_unused(args, bp, dup);
*bpp = bp;
return 0;
}
@@ -639,8 +641,7 @@ xfs_dir3_data_init(
*/
void
xfs_dir2_data_log_entry(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
@@ -651,8 +652,8 @@ xfs_dir2_data_log_entry(
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
- xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
- (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) -
+ xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
+ (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
(char *)hdr - 1));
}
@@ -661,8 +662,7 @@ xfs_dir2_data_log_entry(
*/
void
xfs_dir2_data_log_header(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp)
{
#ifdef DEBUG
@@ -674,7 +674,8 @@ xfs_dir2_data_log_header(
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
#endif
- xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1);
+ xfs_trans_log_buf(args->trans, bp, 0,
+ args->dp->d_ops->data_entry_offset - 1);
}
/*
@@ -682,7 +683,7 @@ xfs_dir2_data_log_header(
*/
void
xfs_dir2_data_log_unused(
- struct xfs_trans *tp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
xfs_dir2_data_unused_t *dup) /* data unused pointer */
{
@@ -696,13 +697,13 @@ xfs_dir2_data_log_unused(
/*
* Log the first part of the unused entry.
*/
- xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
+ xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
(uint)((char *)&dup->length + sizeof(dup->length) -
1 - (char *)hdr));
/*
* Log the end (tag) of the unused entry.
*/
- xfs_trans_log_buf(tp, bp,
+ xfs_trans_log_buf(args->trans, bp,
(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
sizeof(xfs_dir2_data_off_t) - 1));
@@ -714,8 +715,7 @@ xfs_dir2_data_log_unused(
*/
void
xfs_dir2_data_make_free(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
xfs_dir2_data_aoff_t offset, /* starting byte offset */
xfs_dir2_data_aoff_t len, /* length in bytes */
@@ -725,14 +725,12 @@ xfs_dir2_data_make_free(
xfs_dir2_data_hdr_t *hdr; /* data block pointer */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
char *endptr; /* end of data area */
- xfs_mount_t *mp; /* filesystem mount point */
int needscan; /* need to regen bestfree */
xfs_dir2_data_unused_t *newdup; /* new unused entry */
xfs_dir2_data_unused_t *postdup; /* unused entry after us */
xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
struct xfs_dir2_data_free *bf;
- mp = tp->t_mountp;
hdr = bp->b_addr;
/*
@@ -740,20 +738,20 @@ xfs_dir2_data_make_free(
*/
if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
- endptr = (char *)hdr + mp->m_dirblksize;
+ endptr = (char *)hdr + args->geo->blksize;
else {
xfs_dir2_block_tail_t *btp; /* block tail */
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
}
/*
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
*/
- if (offset > dp->d_ops->data_entry_offset) {
+ if (offset > args->dp->d_ops->data_entry_offset) {
__be16 *tagp; /* tag just before us */
tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -779,7 +777,7 @@ xfs_dir2_data_make_free(
* Previous and following entries are both free,
* merge everything into a single free entry.
*/
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = args->dp->d_ops->data_bestfree_p(hdr);
if (prevdup && postdup) {
xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */
@@ -801,7 +799,7 @@ xfs_dir2_data_make_free(
be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(prevdup) =
cpu_to_be16((char *)prevdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, prevdup);
+ xfs_dir2_data_log_unused(args, bp, prevdup);
if (!needscan) {
/*
* Has to be the case that entries 0 and 1 are
@@ -836,7 +834,7 @@ xfs_dir2_data_make_free(
be16_add_cpu(&prevdup->length, len);
*xfs_dir2_data_unused_tag_p(prevdup) =
cpu_to_be16((char *)prevdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, prevdup);
+ xfs_dir2_data_log_unused(args, bp, prevdup);
/*
* If the previous entry was in the table, the new entry
* is longer, so it will be in the table too. Remove
@@ -864,7 +862,7 @@ xfs_dir2_data_make_free(
newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup);
+ xfs_dir2_data_log_unused(args, bp, newdup);
/*
* If the following entry was in the table, the new entry
* is longer, so it will be in the table too. Remove
@@ -891,7 +889,7 @@ xfs_dir2_data_make_free(
newdup->length = cpu_to_be16(len);
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup);
+ xfs_dir2_data_log_unused(args, bp, newdup);
xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
}
*needscanp = needscan;
@@ -902,8 +900,7 @@ xfs_dir2_data_make_free(
*/
void
xfs_dir2_data_use_free(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
xfs_dir2_data_unused_t *dup, /* unused entry */
xfs_dir2_data_aoff_t offset, /* starting offset to use */
@@ -934,7 +931,7 @@ xfs_dir2_data_use_free(
* Look up the entry in the bestfree table.
*/
oldlen = be16_to_cpu(dup->length);
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = args->dp->d_ops->data_bestfree_p(hdr);
dfp = xfs_dir2_data_freefind(hdr, bf, dup);
ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
/*
@@ -966,7 +963,7 @@ xfs_dir2_data_use_free(
newdup->length = cpu_to_be16(oldlen - len);
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup);
+ xfs_dir2_data_log_unused(args, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
@@ -994,7 +991,7 @@ xfs_dir2_data_use_free(
newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup);
+ xfs_dir2_data_log_unused(args, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
@@ -1022,13 +1019,13 @@ xfs_dir2_data_use_free(
newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
cpu_to_be16((char *)newdup - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup);
+ xfs_dir2_data_log_unused(args, bp, newdup);
newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
*xfs_dir2_data_unused_tag_p(newdup2) =
cpu_to_be16((char *)newdup2 - (char *)hdr);
- xfs_dir2_data_log_unused(tp, bp, newdup2);
+ xfs_dir2_data_log_unused(args, bp, newdup2);
/*
* If the old entry was in the table, we need to scan
* if the 3rd entry was valid, since these entries
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index ae47ec6e16c4..a19174eb3cb2 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -41,9 +41,10 @@
*/
static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
int *indexp, struct xfs_buf **dbpp);
-static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
- int first, int last);
-static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
+static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
+ struct xfs_buf *bp, int first, int last);
+static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
+ struct xfs_buf *bp);
/*
* Check the internal consistency of a leaf1 block.
@@ -92,6 +93,7 @@ xfs_dir3_leaf_check_int(
int i;
const struct xfs_dir_ops *ops;
struct xfs_dir3_icleaf_hdr leafhdr;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
/*
* we can be passed a null dp here from a verifier, so we need to go the
@@ -105,14 +107,14 @@ xfs_dir3_leaf_check_int(
}
ents = ops->leaf_ents_p(leaf);
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(geo, leaf);
/*
* XXX (dgc): This value is not restrictive enough.
* Should factor in the size of the bests table as well.
* We can deduce a value for that from di_size.
*/
- if (hdr->count > ops->leaf_max_ents(mp))
+ if (hdr->count > ops->leaf_max_ents(geo))
return false;
/* Leaves and bests don't overlap in leaf format. */
@@ -179,13 +181,14 @@ __read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_LEAF_CRC_OFF)) ||
- !xfs_dir3_leaf_verify(bp, magic)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_dir3_leaf_verify(bp, magic))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -198,8 +201,8 @@ __write_verify(
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_leaf_verify(bp, magic)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -209,7 +212,7 @@ __write_verify(
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
}
static void
@@ -322,7 +325,7 @@ xfs_dir3_leaf_init(
if (type == XFS_DIR2_LEAF1_MAGIC) {
struct xfs_dir2_leaf_tail *ltp;
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
ltp->bestcount = 0;
bp->b_ops = &xfs_dir3_leaf1_buf_ops;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
@@ -346,18 +349,18 @@ xfs_dir3_leaf_get_buf(
int error;
ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
- ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
- bno < XFS_DIR2_FREE_FIRSTDB(mp));
+ ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
+ bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
- error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
- XFS_DATA_FORK);
+ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
+ -1, &bp, XFS_DATA_FORK);
if (error)
return error;
xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
- xfs_dir3_leaf_log_header(tp, dp, bp);
+ xfs_dir3_leaf_log_header(args, bp);
if (magic == XFS_DIR2_LEAF1_MAGIC)
- xfs_dir3_leaf_log_tail(tp, bp);
+ xfs_dir3_leaf_log_tail(args, bp);
*bpp = bp;
return 0;
}
@@ -402,8 +405,8 @@ xfs_dir2_block_to_leaf(
if ((error = xfs_da_grow_inode(args, &blkno))) {
return error;
}
- ldb = xfs_dir2_da_to_db(mp, blkno);
- ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
+ ldb = xfs_dir2_da_to_db(args->geo, blkno);
+ ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
/*
* Initialize the leaf block, get a buffer for it.
*/
@@ -414,7 +417,7 @@ xfs_dir2_block_to_leaf(
leaf = lbp->b_addr;
hdr = dbp->b_addr;
xfs_dir3_data_check(dp, dbp);
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
bf = dp->d_ops->data_bestfree_p(hdr);
ents = dp->d_ops->leaf_ents_p(leaf);
@@ -426,23 +429,23 @@ xfs_dir2_block_to_leaf(
leafhdr.count = be32_to_cpu(btp->count);
leafhdr.stale = be32_to_cpu(btp->stale);
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_log_header(args, lbp);
/*
* Could compact these but I think we always do the conversion
* after squeezing out stale entries.
*/
memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1);
+ xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
needscan = 0;
needlog = 1;
/*
* Make the space formerly occupied by the leaf entries and block
* tail be free.
*/
- xfs_dir2_data_make_free(tp, dp, dbp,
+ xfs_dir2_data_make_free(args, dbp,
(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
- (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
+ (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
(char *)blp),
&needlog, &needscan);
/*
@@ -460,7 +463,7 @@ xfs_dir2_block_to_leaf(
/*
* Set up leaf tail and bests table.
*/
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ltp->bestcount = cpu_to_be32(1);
bestsp = xfs_dir2_leaf_bests_p(ltp);
bestsp[0] = bf[0].length;
@@ -468,10 +471,10 @@ xfs_dir2_block_to_leaf(
* Log the data header and leaf bests table.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(args, dbp);
xfs_dir3_leaf_check(dp, lbp);
xfs_dir3_data_check(dp, dbp);
- xfs_dir3_leaf_log_bests(tp, lbp, 0, 0);
+ xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
return 0;
}
@@ -640,7 +643,7 @@ xfs_dir2_leaf_addname(
tp = args->trans;
mp = dp->i_mount;
- error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
if (error)
return error;
@@ -652,7 +655,7 @@ xfs_dir2_leaf_addname(
*/
index = xfs_dir2_leaf_search_hash(args, lbp);
leaf = lbp->b_addr;
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ents = dp->d_ops->leaf_ents_p(leaf);
dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
@@ -669,7 +672,7 @@ xfs_dir2_leaf_addname(
index++, lep++) {
if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
continue;
- i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
ASSERT(i < be32_to_cpu(ltp->bestcount));
ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
if (be16_to_cpu(bestsp[i]) >= length) {
@@ -728,7 +731,7 @@ xfs_dir2_leaf_addname(
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
args->total == 0) {
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
* Convert to node form.
@@ -752,7 +755,7 @@ xfs_dir2_leaf_addname(
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_trans_brelse(tp, lbp);
- return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
+ return use_block == -1 ? -ENOSPC : 0;
}
/*
* If no allocations are allowed, return now before we've
@@ -760,7 +763,7 @@ xfs_dir2_leaf_addname(
*/
if (args->total == 0 && use_block == -1) {
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
* Need to compact the leaf entries, removing stale ones.
@@ -809,14 +812,15 @@ xfs_dir2_leaf_addname(
memmove(&bestsp[0], &bestsp[1],
be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
be32_add_cpu(&ltp->bestcount, 1);
- xfs_dir3_leaf_log_tail(tp, lbp);
- xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+ xfs_dir3_leaf_log_tail(args, lbp);
+ xfs_dir3_leaf_log_bests(args, lbp, 0,
+ be32_to_cpu(ltp->bestcount) - 1);
}
/*
* If we're filling in a previously empty block just log it.
*/
else
- xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
+ xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
hdr = dbp->b_addr;
bf = dp->d_ops->data_bestfree_p(hdr);
bestsp[use_block] = bf[0].length;
@@ -827,8 +831,8 @@ xfs_dir2_leaf_addname(
* Just read that one in.
*/
error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(mp, use_block),
- -1, &dbp);
+ xfs_dir2_db_to_da(args->geo, use_block),
+ -1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -847,7 +851,7 @@ xfs_dir2_leaf_addname(
/*
* Mark the initial part of our freespace in use for the new entry.
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup,
+ xfs_dir2_data_use_free(args, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
@@ -869,8 +873,8 @@ xfs_dir2_leaf_addname(
* Need to log the data block's header.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+ xfs_dir2_data_log_header(args, dbp);
+ xfs_dir2_data_log_entry(args, dbp, dep);
/*
* If the bests table needs to be changed, do it.
* Log the change unless we've already done that.
@@ -878,7 +882,7 @@ xfs_dir2_leaf_addname(
if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
bestsp[use_block] = bf[0].length;
if (!grown)
- xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
+ xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
}
lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
@@ -888,14 +892,15 @@ xfs_dir2_leaf_addname(
* Fill in the new leaf entry.
*/
lep->hashval = cpu_to_be32(args->hashval);
- lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
+ lep->address = cpu_to_be32(
+ xfs_dir2_db_off_to_dataptr(args->geo, use_block,
be16_to_cpu(*tagp)));
/*
* Log the leaf fields and give up the buffers.
*/
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
- xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_log_header(args, lbp);
+ xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
xfs_dir3_leaf_check(dp, lbp);
xfs_dir3_data_check(dp, dbp);
return 0;
@@ -947,9 +952,9 @@ xfs_dir3_leaf_compact(
leafhdr->stale = 0;
dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
- xfs_dir3_leaf_log_header(args->trans, dp, bp);
+ xfs_dir3_leaf_log_header(args, bp);
if (loglow != -1)
- xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1);
+ xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
}
/*
@@ -1051,7 +1056,7 @@ xfs_dir3_leaf_compact_x1(
*/
static void
xfs_dir3_leaf_log_bests(
- xfs_trans_t *tp, /* transaction pointer */
+ struct xfs_da_args *args,
struct xfs_buf *bp, /* leaf buffer */
int first, /* first entry to log */
int last) /* last entry to log */
@@ -1064,10 +1069,11 @@ xfs_dir3_leaf_log_bests(
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
- ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
firstb = xfs_dir2_leaf_bests_p(ltp) + first;
lastb = xfs_dir2_leaf_bests_p(ltp) + last;
- xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+ xfs_trans_log_buf(args->trans, bp,
+ (uint)((char *)firstb - (char *)leaf),
(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
}
@@ -1076,8 +1082,7 @@ xfs_dir3_leaf_log_bests(
*/
void
xfs_dir3_leaf_log_ents(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
int first,
int last)
@@ -1092,10 +1097,11 @@ xfs_dir3_leaf_log_ents(
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
- ents = dp->d_ops->leaf_ents_p(leaf);
+ ents = args->dp->d_ops->leaf_ents_p(leaf);
firstlep = &ents[first];
lastlep = &ents[last];
- xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+ xfs_trans_log_buf(args->trans, bp,
+ (uint)((char *)firstlep - (char *)leaf),
(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
}
@@ -1104,8 +1110,7 @@ xfs_dir3_leaf_log_ents(
*/
void
xfs_dir3_leaf_log_header(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
@@ -1115,8 +1120,9 @@ xfs_dir3_leaf_log_header(
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
- xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
- dp->d_ops->leaf_hdr_size - 1);
+ xfs_trans_log_buf(args->trans, bp,
+ (uint)((char *)&leaf->hdr - (char *)leaf),
+ args->dp->d_ops->leaf_hdr_size - 1);
}
/*
@@ -1124,21 +1130,20 @@ xfs_dir3_leaf_log_header(
*/
STATIC void
xfs_dir3_leaf_log_tail(
- struct xfs_trans *tp,
+ struct xfs_da_args *args,
struct xfs_buf *bp)
{
struct xfs_dir2_leaf *leaf = bp->b_addr;
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
- struct xfs_mount *mp = tp->t_mountp;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
- xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
- (uint)(mp->m_dirblksize - 1));
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+ xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
+ (uint)(args->geo->blksize - 1));
}
/*
@@ -1184,7 +1189,7 @@ xfs_dir2_leaf_lookup(
*/
dep = (xfs_dir2_data_entry_t *)
((char *)dbp->b_addr +
- xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
/*
* Return the found inode number & CI name if appropriate
*/
@@ -1193,7 +1198,7 @@ xfs_dir2_leaf_lookup(
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -1230,7 +1235,7 @@ xfs_dir2_leaf_lookup_int(
tp = args->trans;
mp = dp->i_mount;
- error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
if (error)
return error;
@@ -1259,7 +1264,8 @@ xfs_dir2_leaf_lookup_int(
/*
* Get the new data block number.
*/
- newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ newdb = xfs_dir2_dataptr_to_db(args->geo,
+ be32_to_cpu(lep->address));
/*
* If it's not the same as the old data block number,
* need to pitch the old one and read the new one.
@@ -1268,8 +1274,8 @@ xfs_dir2_leaf_lookup_int(
if (dbp)
xfs_trans_brelse(tp, dbp);
error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(mp, newdb),
- -1, &dbp);
+ xfs_dir2_db_to_da(args->geo, newdb),
+ -1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1280,7 +1286,8 @@ xfs_dir2_leaf_lookup_int(
* Point to the data entry.
*/
dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(lep->address)));
/*
* Compare name and if it's an exact match, return the index
* and buffer. If it's the first case-insensitive match, store
@@ -1309,8 +1316,8 @@ xfs_dir2_leaf_lookup_int(
if (cidb != curdb) {
xfs_trans_brelse(tp, dbp);
error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(mp, cidb),
- -1, &dbp);
+ xfs_dir2_db_to_da(args->geo, cidb),
+ -1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1320,13 +1327,13 @@ xfs_dir2_leaf_lookup_int(
return 0;
}
/*
- * No match found, return ENOENT.
+ * No match found, return -ENOENT.
*/
ASSERT(cidb == -1);
if (dbp)
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -1379,18 +1386,18 @@ xfs_dir2_leaf_removename(
* Point to the leaf entry, use that to point to the data entry.
*/
lep = &ents[index];
- db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
- dep = (xfs_dir2_data_entry_t *)
- ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+ xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
needscan = needlog = 0;
oldbest = be16_to_cpu(bf[0].length);
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
/*
* Mark the former data entry unused.
*/
- xfs_dir2_data_make_free(tp, dp, dbp,
+ xfs_dir2_data_make_free(args, dbp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
/*
@@ -1398,10 +1405,10 @@ xfs_dir2_leaf_removename(
*/
leafhdr.stale++;
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_log_header(args, lbp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index);
+ xfs_dir3_leaf_log_ents(args, lbp, index, index);
/*
* Scan the freespace in the data block again if necessary,
@@ -1410,22 +1417,22 @@ xfs_dir2_leaf_removename(
if (needscan)
xfs_dir2_data_freescan(dp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(args, dbp);
/*
* If the longest freespace in the data block has changed,
* put the new value in the bests table and log that.
*/
if (be16_to_cpu(bf[0].length) != oldbest) {
bestsp[db] = bf[0].length;
- xfs_dir3_leaf_log_bests(tp, lbp, db, db);
+ xfs_dir3_leaf_log_bests(args, lbp, db, db);
}
xfs_dir3_data_check(dp, dbp);
/*
* If the data block is now empty then get rid of the data block.
*/
if (be16_to_cpu(bf[0].length) ==
- mp->m_dirblksize - dp->d_ops->data_entry_offset) {
- ASSERT(db != mp->m_dirdatablk);
+ args->geo->blksize - dp->d_ops->data_entry_offset) {
+ ASSERT(db != args->geo->datablk);
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
/*
* Nope, can't get rid of it because it caused
@@ -1433,7 +1440,7 @@ xfs_dir2_leaf_removename(
* Just go on, returning success, leaving the
* empty block in place.
*/
- if (error == ENOSPC && args->total == 0)
+ if (error == -ENOSPC && args->total == 0)
error = 0;
xfs_dir3_leaf_check(dp, lbp);
return error;
@@ -1458,15 +1465,16 @@ xfs_dir2_leaf_removename(
memmove(&bestsp[db - i], bestsp,
(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
be32_add_cpu(&ltp->bestcount, -(db - i));
- xfs_dir3_leaf_log_tail(tp, lbp);
- xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+ xfs_dir3_leaf_log_tail(args, lbp);
+ xfs_dir3_leaf_log_bests(args, lbp, 0,
+ be32_to_cpu(ltp->bestcount) - 1);
} else
bestsp[db] = cpu_to_be16(NULLDATAOFF);
}
/*
* If the data block was not the first one, drop it.
*/
- else if (db != mp->m_dirdatablk)
+ else if (db != args->geo->datablk)
dbp = NULL;
xfs_dir3_leaf_check(dp, lbp);
@@ -1514,7 +1522,7 @@ xfs_dir2_leaf_replace(
*/
dep = (xfs_dir2_data_entry_t *)
((char *)dbp->b_addr +
- xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
ASSERT(args->inumber != be64_to_cpu(dep->inumber));
/*
* Put the new inode number in, log it.
@@ -1522,7 +1530,7 @@ xfs_dir2_leaf_replace(
dep->inumber = cpu_to_be64(args->inumber);
dp->d_ops->data_put_ftype(dep, args->filetype);
tp = args->trans;
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+ xfs_dir2_data_log_entry(args, dbp, dep);
xfs_dir3_leaf_check(dp, lbp);
xfs_trans_brelse(tp, lbp);
return 0;
@@ -1608,12 +1616,13 @@ xfs_dir2_leaf_trim_data(
/*
* Read the offending data block. We need its buffer.
*/
- error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
+ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
+ -1, &dbp);
if (error)
return error;
leaf = lbp->b_addr;
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
#ifdef DEBUG
{
@@ -1623,7 +1632,7 @@ xfs_dir2_leaf_trim_data(
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
ASSERT(be16_to_cpu(bf[0].length) ==
- mp->m_dirblksize - dp->d_ops->data_entry_offset);
+ args->geo->blksize - dp->d_ops->data_entry_offset);
ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
}
#endif
@@ -1632,7 +1641,7 @@ xfs_dir2_leaf_trim_data(
* Get rid of the data block.
*/
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
xfs_trans_brelse(tp, dbp);
return error;
}
@@ -1642,8 +1651,8 @@ xfs_dir2_leaf_trim_data(
bestsp = xfs_dir2_leaf_bests_p(ltp);
be32_add_cpu(&ltp->bestcount, -1);
memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
- xfs_dir3_leaf_log_tail(tp, lbp);
- xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+ xfs_dir3_leaf_log_tail(args, lbp);
+ xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
return 0;
}
@@ -1707,22 +1716,22 @@ xfs_dir2_node_to_leaf(
/*
* Get the last offset in the file.
*/
- if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
+ if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
return error;
}
- fo -= mp->m_dirblkfsbs;
+ fo -= args->geo->fsbcount;
/*
* If there are freespace blocks other than the first one,
* take this opportunity to remove trailing empty freespace blocks
* that may have been left behind during no-space-reservation
* operations.
*/
- while (fo > mp->m_dirfreeblk) {
+ while (fo > args->geo->freeblk) {
if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
return error;
}
if (rval)
- fo -= mp->m_dirblkfsbs;
+ fo -= args->geo->fsbcount;
else
return 0;
}
@@ -1735,7 +1744,7 @@ xfs_dir2_node_to_leaf(
/*
* If it's not the single leaf block, give up.
*/
- if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
+ if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
return 0;
lbp = state->path.blk[0].bp;
leaf = lbp->b_addr;
@@ -1747,7 +1756,7 @@ xfs_dir2_node_to_leaf(
/*
* Read the freespace block.
*/
- error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp);
+ error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp);
if (error)
return error;
free = fbp->b_addr;
@@ -1759,7 +1768,7 @@ xfs_dir2_node_to_leaf(
* Now see if the leafn and free data will fit in a leaf1.
* If not, release the buffer and give up.
*/
- if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) {
+ if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
xfs_trans_brelse(tp, fbp);
return 0;
}
@@ -1779,7 +1788,7 @@ xfs_dir2_node_to_leaf(
/*
* Set up the leaf tail from the freespace block.
*/
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ltp->bestcount = cpu_to_be32(freehdr.nvalid);
/*
@@ -1789,22 +1798,24 @@ xfs_dir2_node_to_leaf(
freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
- xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
- xfs_dir3_leaf_log_tail(tp, lbp);
+ xfs_dir3_leaf_log_header(args, lbp);
+ xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+ xfs_dir3_leaf_log_tail(args, lbp);
xfs_dir3_leaf_check(dp, lbp);
/*
* Get rid of the freespace block.
*/
- error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
+ error = xfs_dir2_shrink_inode(args,
+ xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
+ fbp);
if (error) {
/*
* This can't fail here because it can only happen when
* punching out the middle of an extent, and this is an
* isolated block.
*/
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
return error;
}
fbp = NULL;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 56369d4509d5..2ae6ac2c11ae 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_FREE_CRC_OFF)) ||
- !xfs_dir3_free_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_dir3_free_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_free_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
@@ -194,17 +195,18 @@ xfs_dir2_free_try_read(
static int
xfs_dir3_free_get_buf(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ xfs_da_args_t *args,
xfs_dir2_db_t fbno,
struct xfs_buf **bpp)
{
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp;
int error;
struct xfs_dir3_icfree_hdr hdr;
- error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno),
+ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),
-1, &bp, XFS_DATA_FORK);
if (error)
return error;
@@ -239,8 +241,7 @@ xfs_dir3_free_get_buf(
*/
STATIC void
xfs_dir2_free_log_bests(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
int first, /* first entry to log */
int last) /* last entry to log */
@@ -249,10 +250,10 @@ xfs_dir2_free_log_bests(
__be16 *bests;
free = bp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
+ bests = args->dp->d_ops->free_bests_p(free);
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
- xfs_trans_log_buf(tp, bp,
+ xfs_trans_log_buf(args->trans, bp,
(uint)((char *)&bests[first] - (char *)free),
(uint)((char *)&bests[last] - (char *)free +
sizeof(bests[0]) - 1));
@@ -263,8 +264,7 @@ xfs_dir2_free_log_bests(
*/
static void
xfs_dir2_free_log_header(
- struct xfs_trans *tp,
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
struct xfs_buf *bp)
{
#ifdef DEBUG
@@ -274,7 +274,8 @@ xfs_dir2_free_log_header(
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
#endif
- xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1);
+ xfs_trans_log_buf(args->trans, bp, 0,
+ args->dp->d_ops->free_hdr_size - 1);
}
/*
@@ -314,20 +315,20 @@ xfs_dir2_leaf_to_node(
if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {
return error;
}
- ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp));
+ ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
/*
* Get the buffer for the new freespace block.
*/
- error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp);
+ error = xfs_dir3_free_get_buf(args, fdb, &fbp);
if (error)
return error;
free = fbp->b_addr;
dp->d_ops->free_hdr_from_disk(&freehdr, free);
leaf = lbp->b_addr;
- ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ASSERT(be32_to_cpu(ltp->bestcount) <=
- (uint)dp->i_d.di_size / mp->m_dirblksize);
+ (uint)dp->i_d.di_size / args->geo->blksize);
/*
* Copy freespace entries from the leaf block to the new block.
@@ -348,8 +349,8 @@ xfs_dir2_leaf_to_node(
freehdr.nvalid = be32_to_cpu(ltp->bestcount);
dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1);
+ xfs_dir2_free_log_header(args, fbp);
/*
* Converting the leaf to a leafnode is just a matter of changing the
@@ -363,7 +364,7 @@ xfs_dir2_leaf_to_node(
leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
lbp->b_ops = &xfs_dir3_leafn_buf_ops;
xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
- xfs_dir3_leaf_log_header(tp, dp, lbp);
+ xfs_dir3_leaf_log_header(args, lbp);
xfs_dir3_leaf_check(dp, lbp);
return 0;
}
@@ -405,7 +406,7 @@ xfs_dir2_leafn_add(
* into other peoples memory
*/
if (index < 0)
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
/*
* If there are already the maximum number of leaf entries in
@@ -414,9 +415,9 @@ xfs_dir2_leafn_add(
* a compact.
*/
- if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) {
+ if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
if (!leafhdr.stale)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
compact = leafhdr.stale > 1;
} else
compact = 0;
@@ -449,12 +450,12 @@ xfs_dir2_leafn_add(
highstale, &lfloglow, &lfloghigh);
lep->hashval = cpu_to_be32(args->hashval);
- lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
+ lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,
args->blkno, args->index));
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, bp);
- xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_log_header(args, bp);
+ xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh);
xfs_dir3_leaf_check(dp, bp);
return 0;
}
@@ -470,7 +471,8 @@ xfs_dir2_free_hdr_check(
dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
- ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0);
+ ASSERT((hdr.firstdb %
+ dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);
ASSERT(hdr.firstdb <= db);
ASSERT(db < hdr.firstdb + hdr.nvalid);
}
@@ -575,7 +577,8 @@ xfs_dir2_leafn_lookup_for_addname(
/*
* Pull the data block number from the entry.
*/
- newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ newdb = xfs_dir2_dataptr_to_db(args->geo,
+ be32_to_cpu(lep->address));
/*
* For addname, we're looking for a place to put the new entry.
* We want to use a data block with an entry of equal
@@ -592,7 +595,7 @@ xfs_dir2_leafn_lookup_for_addname(
* Convert the data block to the free block
* holding its freespace information.
*/
- newfdb = dp->d_ops->db_to_fdb(mp, newdb);
+ newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);
/*
* If it's not the one we have in hand, read it in.
*/
@@ -604,7 +607,8 @@ xfs_dir2_leafn_lookup_for_addname(
xfs_trans_brelse(tp, curbp);
error = xfs_dir2_free_read(tp, dp,
- xfs_dir2_db_to_da(mp, newfdb),
+ xfs_dir2_db_to_da(args->geo,
+ newfdb),
&curbp);
if (error)
return error;
@@ -615,7 +619,7 @@ xfs_dir2_leafn_lookup_for_addname(
/*
* Get the index for our entry.
*/
- fi = dp->d_ops->db_to_fdindex(mp, curdb);
+ fi = dp->d_ops->db_to_fdindex(args->geo, curdb);
/*
* If it has room, return it.
*/
@@ -625,7 +629,7 @@ xfs_dir2_leafn_lookup_for_addname(
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
xfs_trans_brelse(tp, curbp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
curfdb = newfdb;
if (be16_to_cpu(bests[fi]) >= length)
@@ -656,7 +660,7 @@ out:
* Return the index, that will be the insertion point.
*/
*indexp = index;
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -720,7 +724,8 @@ xfs_dir2_leafn_lookup_for_entry(
/*
* Pull the data block number from the entry.
*/
- newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ newdb = xfs_dir2_dataptr_to_db(args->geo,
+ be32_to_cpu(lep->address));
/*
* Not adding a new entry, so we really want to find
* the name given to us.
@@ -745,7 +750,8 @@ xfs_dir2_leafn_lookup_for_entry(
curbp = state->extrablk.bp;
} else {
error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(mp, newdb),
+ xfs_dir2_db_to_da(args->geo,
+ newdb),
-1, &curbp);
if (error)
return error;
@@ -757,7 +763,8 @@ xfs_dir2_leafn_lookup_for_entry(
* Point to the data entry.
*/
dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(lep->address)));
/*
* Compare the entry and if it's an exact match, return
* EEXIST immediately. If it's the first case-insensitive
@@ -782,7 +789,7 @@ xfs_dir2_leafn_lookup_for_entry(
curbp->b_ops = &xfs_dir3_data_buf_ops;
xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
if (cmp == XFS_CMP_EXACT)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
}
ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
@@ -805,7 +812,7 @@ xfs_dir2_leafn_lookup_for_entry(
state->extravalid = 0;
}
*indexp = index;
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -843,7 +850,6 @@ xfs_dir3_leafn_moveents(
int start_d,/* destination leaf index */
int count) /* count of leaves to copy */
{
- struct xfs_trans *tp = args->trans;
int stale; /* count stale leaves copied */
trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
@@ -862,7 +868,7 @@ xfs_dir3_leafn_moveents(
if (start_d < dhdr->count) {
memmove(&dents[start_d + count], &dents[start_d],
(dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count,
+ xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,
count + dhdr->count - 1);
}
/*
@@ -884,8 +890,7 @@ xfs_dir3_leafn_moveents(
*/
memcpy(&dents[start_d], &sents[start_s],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_d,
- start_d, start_d + count - 1);
+ xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);
/*
* If there are source entries after the ones we copied,
@@ -894,8 +899,7 @@ xfs_dir3_leafn_moveents(
if (start_s + count < shdr->count) {
memmove(&sents[start_s], &sents[start_s + count],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(tp, args->dp, bp_s,
- start_s, start_s + count - 1);
+ xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);
}
/*
@@ -1031,8 +1035,8 @@ xfs_dir2_leafn_rebalance(
/* log the changes made when moving the entries */
dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
- xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp);
- xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp);
+ xfs_dir3_leaf_log_header(args, blk1->bp);
+ xfs_dir3_leaf_log_header(args, blk2->bp);
xfs_dir3_leaf_check(dp, blk1->bp);
xfs_dir3_leaf_check(dp, blk2->bp);
@@ -1075,7 +1079,6 @@ xfs_dir3_data_block_free(
struct xfs_buf *fbp,
int longest)
{
- struct xfs_trans *tp = args->trans;
int logfree = 0;
__be16 *bests;
struct xfs_dir3_icfree_hdr freehdr;
@@ -1089,7 +1092,7 @@ xfs_dir3_data_block_free(
* value.
*/
bests[findex] = cpu_to_be16(longest);
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(args, fbp, findex, findex);
return 0;
}
@@ -1117,7 +1120,7 @@ xfs_dir3_data_block_free(
}
dp->d_ops->free_hdr_to_disk(free, &freehdr);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir2_free_log_header(args, fbp);
/*
* If there are no useful entries left in the block, get rid of the
@@ -1130,7 +1133,7 @@ xfs_dir3_data_block_free(
if (error == 0) {
fbp = NULL;
logfree = 0;
- } else if (error != ENOSPC || args->total != 0)
+ } else if (error != -ENOSPC || args->total != 0)
return error;
/*
* It's possible to get ENOSPC if there is no
@@ -1141,7 +1144,7 @@ xfs_dir3_data_block_free(
/* Log the free entry that changed, unless we got rid of it. */
if (logfree)
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(args, fbp, findex, findex);
return 0;
}
@@ -1192,9 +1195,9 @@ xfs_dir2_leafn_remove(
/*
* Extract the data block and offset from the entry.
*/
- db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
ASSERT(dblk->blkno == db);
- off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
+ off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));
ASSERT(dblk->index == off);
/*
@@ -1203,10 +1206,10 @@ xfs_dir2_leafn_remove(
*/
leafhdr.stale++;
dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
- xfs_dir3_leaf_log_header(tp, dp, bp);
+ xfs_dir3_leaf_log_header(args, bp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(tp, dp, bp, index, index);
+ xfs_dir3_leaf_log_ents(args, bp, index, index);
/*
* Make the data entry free. Keep track of the longest freespace
@@ -1218,7 +1221,7 @@ xfs_dir2_leafn_remove(
bf = dp->d_ops->data_bestfree_p(hdr);
longest = be16_to_cpu(bf[0].length);
needlog = needscan = 0;
- xfs_dir2_data_make_free(tp, dp, dbp, off,
+ xfs_dir2_data_make_free(args, dbp, off,
dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
/*
* Rescan the data block freespaces for bestfree.
@@ -1227,7 +1230,7 @@ xfs_dir2_leafn_remove(
if (needscan)
xfs_dir2_data_freescan(dp, hdr, &needlog);
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(args, dbp);
xfs_dir3_data_check(dp, dbp);
/*
* If the longest data block freespace changes, need to update
@@ -1244,8 +1247,9 @@ xfs_dir2_leafn_remove(
* Convert the data block number to a free block,
* read in the free block.
*/
- fdb = dp->d_ops->db_to_fdb(mp, db);
- error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb),
+ fdb = dp->d_ops->db_to_fdb(args->geo, db);
+ error = xfs_dir2_free_read(tp, dp,
+ xfs_dir2_db_to_da(args->geo, fdb),
&fbp);
if (error)
return error;
@@ -1254,20 +1258,21 @@ xfs_dir2_leafn_remove(
{
struct xfs_dir3_icfree_hdr freehdr;
dp->d_ops->free_hdr_from_disk(&freehdr, free);
- ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) *
- (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
+ ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) *
+ (fdb - xfs_dir2_byte_to_db(args->geo,
+ XFS_DIR2_FREE_OFFSET)));
}
#endif
/*
* Calculate which entry we need to fix.
*/
- findex = dp->d_ops->db_to_fdindex(mp, db);
+ findex = dp->d_ops->db_to_fdindex(args->geo, db);
longest = be16_to_cpu(bf[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
- if (longest == mp->m_dirblksize -
+ if (longest == args->geo->blksize -
dp->d_ops->data_entry_offset) {
/*
* Try to punch out the data block.
@@ -1282,7 +1287,7 @@ xfs_dir2_leafn_remove(
* In this case just drop the buffer and some one else
* will eventually get rid of the empty block.
*/
- else if (!(error == ENOSPC && args->total == 0))
+ else if (!(error == -ENOSPC && args->total == 0))
return error;
}
/*
@@ -1302,7 +1307,7 @@ xfs_dir2_leafn_remove(
*/
*rval = (dp->d_ops->leaf_hdr_size +
(uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
- mp->m_dir_magicpct;
+ args->geo->magicpct;
return 0;
}
@@ -1335,7 +1340,7 @@ xfs_dir2_leafn_split(
/*
* Initialize the new leaf block.
*/
- error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno),
+ error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno),
&newblk->bp, XFS_DIR2_LEAFN_MAGIC);
if (error)
return error;
@@ -1409,7 +1414,7 @@ xfs_dir2_leafn_toosmall(
count = leafhdr.count - leafhdr.stale;
bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
- if (bytes > (state->blocksize >> 1)) {
+ if (bytes > (state->args->geo->blksize >> 1)) {
/*
* Blk over 50%, don't try to join.
*/
@@ -1462,7 +1467,8 @@ xfs_dir2_leafn_toosmall(
* Count bytes in the two blocks combined.
*/
count = leafhdr.count - leafhdr.stale;
- bytes = state->blocksize - (state->blocksize >> 2);
+ bytes = state->args->geo->blksize -
+ (state->args->geo->blksize >> 2);
leaf = bp->b_addr;
dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
@@ -1559,8 +1565,8 @@ xfs_dir2_leafn_unbalance(
/* log the changes made when moving the entries */
dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
- xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp);
- xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp);
+ xfs_dir3_leaf_log_header(args, save_blk->bp);
+ xfs_dir3_leaf_log_header(args, drop_blk->bp);
xfs_dir3_leaf_check(dp, save_blk->bp);
xfs_dir3_leaf_check(dp, drop_blk->bp);
@@ -1586,8 +1592,6 @@ xfs_dir2_node_addname(
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
- state->blocksize = state->mp->m_dirblksize;
- state->node_ents = state->mp->m_dir_node_ents;
/*
* Look up the name. We're not supposed to find it, but
* this gives us the insertion point.
@@ -1595,7 +1599,7 @@ xfs_dir2_node_addname(
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
rval = error;
- if (rval != ENOENT) {
+ if (rval != -ENOENT) {
goto done;
}
/*
@@ -1624,7 +1628,7 @@ xfs_dir2_node_addname(
* It didn't work, we need to split the leaf block.
*/
if (args->total == 0) {
- ASSERT(rval == ENOSPC);
+ ASSERT(rval == -ENOSPC);
goto done;
}
/*
@@ -1726,9 +1730,9 @@ xfs_dir2_node_addname_int(
if (dbno == -1) {
xfs_fileoff_t fo; /* freespace block number */
- if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
+ if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
return error;
- lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
+ lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
fbno = ifbno;
}
/*
@@ -1746,7 +1750,8 @@ xfs_dir2_node_addname_int(
* us a freespace block to start with.
*/
if (++fbno == 0)
- fbno = XFS_DIR2_FREE_FIRSTDB(mp);
+ fbno = xfs_dir2_byte_to_db(args->geo,
+ XFS_DIR2_FREE_OFFSET);
/*
* If it's ifbno we already looked at it.
*/
@@ -1764,8 +1769,8 @@ xfs_dir2_node_addname_int(
* to avoid it.
*/
error = xfs_dir2_free_try_read(tp, dp,
- xfs_dir2_db_to_da(mp, fbno),
- &fbp);
+ xfs_dir2_db_to_da(args->geo, fbno),
+ &fbp);
if (error)
return error;
if (!fbp)
@@ -1810,7 +1815,7 @@ xfs_dir2_node_addname_int(
* Not allowed to allocate, return failure.
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Allocate and initialize the new data block.
@@ -1833,10 +1838,10 @@ xfs_dir2_node_addname_int(
* Get the freespace block corresponding to the data block
* that was just allocated.
*/
- fbno = dp->d_ops->db_to_fdb(mp, dbno);
+ fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
error = xfs_dir2_free_try_read(tp, dp,
- xfs_dir2_db_to_da(mp, fbno),
- &fbp);
+ xfs_dir2_db_to_da(args->geo, fbno),
+ &fbp);
if (error)
return error;
@@ -1850,12 +1855,13 @@ xfs_dir2_node_addname_int(
if (error)
return error;
- if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) {
+ if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
xfs_alert(mp,
"%s: dir ino %llu needed freesp block %lld for\n"
" data block %lld, got %lld ifbno %llu lastfbno %d",
__func__, (unsigned long long)dp->i_ino,
- (long long)dp->d_ops->db_to_fdb(mp, dbno),
+ (long long)dp->d_ops->db_to_fdb(
+ args->geo, dbno),
(long long)dbno, (long long)fbno,
(unsigned long long)ifbno, lastfbno);
if (fblk) {
@@ -1870,13 +1876,13 @@ xfs_dir2_node_addname_int(
}
XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
* Get a buffer for the new block.
*/
- error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp);
+ error = xfs_dir3_free_get_buf(args, fbno, &fbp);
if (error)
return error;
free = fbp->b_addr;
@@ -1886,8 +1892,10 @@ xfs_dir2_node_addname_int(
/*
* Remember the first slot as our empty slot.
*/
- freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
- dp->d_ops->free_max_bests(mp);
+ freehdr.firstdb =
+ (fbno - xfs_dir2_byte_to_db(args->geo,
+ XFS_DIR2_FREE_OFFSET)) *
+ dp->d_ops->free_max_bests(args->geo);
} else {
free = fbp->b_addr;
bests = dp->d_ops->free_bests_p(free);
@@ -1897,13 +1905,13 @@ xfs_dir2_node_addname_int(
/*
* Set the freespace block index from the data block number.
*/
- findex = dp->d_ops->db_to_fdindex(mp, dbno);
+ findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
/*
* If it's after the end of the current entries in the
* freespace block, extend that table.
*/
if (findex >= freehdr.nvalid) {
- ASSERT(findex < dp->d_ops->free_max_bests(mp));
+ ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
freehdr.nvalid = findex + 1;
/*
* Tag new entry so nused will go up.
@@ -1917,7 +1925,7 @@ xfs_dir2_node_addname_int(
if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
freehdr.nused++;
dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_header(tp, dp, fbp);
+ xfs_dir2_free_log_header(args, fbp);
}
/*
* Update the real value in the table.
@@ -1942,7 +1950,8 @@ xfs_dir2_node_addname_int(
/*
* Read the data block in.
*/
- error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
+ error = xfs_dir3_data_read(tp, dp,
+ xfs_dir2_db_to_da(args->geo, dbno),
-1, &dbp);
if (error)
return error;
@@ -1960,7 +1969,7 @@ xfs_dir2_node_addname_int(
/*
* Mark the first part of the unused space, inuse for us.
*/
- xfs_dir2_data_use_free(tp, dp, dbp, dup,
+ xfs_dir2_data_use_free(args, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
@@ -1973,7 +1982,7 @@ xfs_dir2_node_addname_int(
dp->d_ops->data_put_ftype(dep, args->filetype);
tagp = dp->d_ops->data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
- xfs_dir2_data_log_entry(tp, dp, dbp, dep);
+ xfs_dir2_data_log_entry(args, dbp, dep);
/*
* Rescan the block for bestfree if needed.
*/
@@ -1983,7 +1992,7 @@ xfs_dir2_node_addname_int(
* Log the data block header if needed.
*/
if (needlog)
- xfs_dir2_data_log_header(tp, dp, dbp);
+ xfs_dir2_data_log_header(args, dbp);
/*
* If the freespace entry is now wrong, update it.
*/
@@ -1996,7 +2005,7 @@ xfs_dir2_node_addname_int(
* Log the freespace entry if needed.
*/
if (logfree)
- xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex);
+ xfs_dir2_free_log_bests(args, fbp, findex, findex);
/*
* Return the data block and offset in args, then drop the data block.
*/
@@ -2027,16 +2036,14 @@ xfs_dir2_node_lookup(
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
- state->blocksize = state->mp->m_dirblksize;
- state->node_ents = state->mp->m_dir_node_ents;
/*
* Fill in the path to the entry in the cursor.
*/
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
rval = error;
- else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
- /* If a CI match, dup the actual name and return EEXIST */
+ else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) {
+ /* If a CI match, dup the actual name and return -EEXIST */
xfs_dir2_data_entry_t *dep;
dep = (xfs_dir2_data_entry_t *)
@@ -2067,12 +2074,12 @@ xfs_dir2_node_lookup(
*/
int /* error */
xfs_dir2_node_removename(
- xfs_da_args_t *args) /* operation arguments */
+ struct xfs_da_args *args) /* operation arguments */
{
- xfs_da_state_blk_t *blk; /* leaf block */
+ struct xfs_da_state_blk *blk; /* leaf block */
int error; /* error return value */
int rval; /* operation return value */
- xfs_da_state_t *state; /* btree cursor */
+ struct xfs_da_state *state; /* btree cursor */
trace_xfs_dir2_node_removename(args);
@@ -2082,21 +2089,18 @@ xfs_dir2_node_removename(
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
- state->blocksize = state->mp->m_dirblksize;
- state->node_ents = state->mp->m_dir_node_ents;
- /*
- * Look up the entry we're deleting, set up the cursor.
- */
+
+ /* Look up the entry we're deleting, set up the cursor. */
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
- rval = error;
- /*
- * Didn't find it, upper layer screwed up.
- */
- if (rval != EEXIST) {
- xfs_da_state_free(state);
- return rval;
+ goto out_free;
+
+ /* Didn't find it, upper layer screwed up. */
+ if (rval != -EEXIST) {
+ error = rval;
+ goto out_free;
}
+
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
ASSERT(state->extravalid);
@@ -2107,7 +2111,7 @@ xfs_dir2_node_removename(
error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
&state->extrablk, &rval);
if (error)
- return error;
+ goto out_free;
/*
* Fix the hash values up the btree.
*/
@@ -2122,6 +2126,7 @@ xfs_dir2_node_removename(
*/
if (!error)
error = xfs_dir2_node_to_leaf(state);
+out_free:
xfs_da_state_free(state);
return error;
}
@@ -2152,8 +2157,6 @@ xfs_dir2_node_replace(
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
- state->blocksize = state->mp->m_dirblksize;
- state->node_ents = state->mp->m_dir_node_ents;
inum = args->inumber;
/*
* Lookup the entry to change in the btree.
@@ -2166,7 +2169,7 @@ xfs_dir2_node_replace(
* It should be found, since the vnodeops layer has looked it up
* and locked it. But paranoia is good.
*/
- if (rval == EEXIST) {
+ if (rval == -EEXIST) {
struct xfs_dir2_leaf_entry *ents;
/*
* Find the leaf entry.
@@ -2185,15 +2188,15 @@ xfs_dir2_node_replace(
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
dep = (xfs_dir2_data_entry_t *)
((char *)hdr +
- xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(args->geo,
+ be32_to_cpu(lep->address)));
ASSERT(inum != be64_to_cpu(dep->inumber));
/*
* Fill in the new inode number and log the entry.
*/
dep->inumber = cpu_to_be64(inum);
args->dp->d_ops->data_put_ftype(dep, args->filetype);
- xfs_dir2_data_log_entry(args->trans, args->dp,
- state->extrablk.bp, dep);
+ xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
rval = 0;
}
/*
@@ -2261,15 +2264,15 @@ xfs_dir2_node_trim_free(
/*
* Blow the block away.
*/
- if ((error =
- xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
- bp))) {
+ error = xfs_dir2_shrink_inode(args,
+ xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp);
+ if (error) {
/*
* Can't fail with ENOSPC since that only happens with no
* space reservation, when breaking up an extent into two
* pieces. This is the last block of an extent.
*/
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
xfs_trans_brelse(tp, bp);
return error;
}
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 8b9d2281f85b..27ce0794d196 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -20,6 +20,140 @@
struct dir_context;
+/*
+ * Directory offset/block conversion functions.
+ *
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
+{
+ return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr. It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
+{
+ return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+ return (xfs_dir2_db_t)(by >> geo->blklog);
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+ return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+ return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+ return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+ xfs_dir2_data_aoff_t o)
+{
+ return ((xfs_dir2_off_t)db << geo->blklog) + o;
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+ return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+ return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+ xfs_dir2_data_aoff_t o)
+{
+ return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+ return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+ return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0);
+}
+
+/*
+ * Directory tail pointer accessor functions. Based on block geometry.
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr)
+{
+ return ((struct xfs_dir2_block_tail *)
+ ((char *)hdr + geo->blksize)) - 1;
+}
+
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
+{
+ return (struct xfs_dir2_leaf_tail *)
+ ((char *)lp + geo->blksize -
+ sizeof(struct xfs_dir2_leaf_tail));
+}
+
/* xfs_dir2.c */
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
@@ -54,8 +188,8 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
-extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mapped_bno);
+extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
+ xfs_daddr_t mapped_bno);
extern struct xfs_dir2_data_free *
xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
@@ -77,9 +211,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
struct xfs_buf **bpp, __uint16_t magic);
-extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
struct xfs_buf *bp, int first, int last);
-extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp,
+extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
struct xfs_buf *bp);
extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index aafc6e46cb58..5079e051ef08 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -51,10 +51,9 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
#else
#define xfs_dir2_sf_check(args)
#endif /* DEBUG */
-#if XFS_BIG_INUMS
+
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
-#endif /* XFS_BIG_INUMS */
/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
@@ -82,8 +81,10 @@ xfs_dir2_block_sfsize(
xfs_ino_t parent = 0; /* parent inode number */
int size=0; /* total computed size */
int has_ftype;
+ struct xfs_da_geometry *geo;
mp = dp->i_mount;
+ geo = mp->m_dir_geo;
/*
* if there is a filetype field, add the extra byte to the namelen
@@ -92,7 +93,7 @@ xfs_dir2_block_sfsize(
has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
count = i8count = namelen = 0;
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -104,8 +105,8 @@ xfs_dir2_block_sfsize(
/*
* Calculate the pointer to the entry at hand.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+ xfs_dir2_dataptr_to_off(geo, addr));
/*
* Detect . and .., so we can special-case them.
* . is not included in sf directories.
@@ -115,10 +116,10 @@ xfs_dir2_block_sfsize(
isdotdot =
dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.';
-#if XFS_BIG_INUMS
+
if (!isdot)
i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
-#endif
+
/* take into account the file type field */
if (!isdot && !isdotdot) {
count++;
@@ -170,6 +171,7 @@ xfs_dir2_block_to_sf(
char *ptr; /* current data pointer */
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */
+ xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */
trace_xfs_dir2_block_to_sf(args);
@@ -177,39 +179,24 @@ xfs_dir2_block_to_sf(
mp = dp->i_mount;
/*
- * Make a copy of the block data, so we can shrink the inode
- * and add local data.
+ * allocate a temporary destination buffer the size of the inode
+ * to format the data into. Once we have formatted the data, we
+ * can free the block and copy the formatted data into the inode literal
+ * area.
*/
- hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
- memcpy(hdr, bp->b_addr, mp->m_dirblksize);
- logflags = XFS_ILOG_CORE;
- if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
- ASSERT(error != ENOSPC);
- goto out;
- }
+ dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP);
+ hdr = bp->b_addr;
/*
- * The buffer is now unconditionally gone, whether
- * xfs_dir2_shrink_inode worked or not.
- *
- * Convert the inode to local format.
- */
- dp->i_df.if_flags &= ~XFS_IFEXTENTS;
- dp->i_df.if_flags |= XFS_IFINLINE;
- dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
- ASSERT(dp->i_df.if_bytes == 0);
- xfs_idata_realloc(dp, size, XFS_DATA_FORK);
- logflags |= XFS_ILOG_DDATA;
- /*
* Copy the header into the newly allocate local space.
*/
- sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dst;
memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
- dp->i_d.di_size = size;
+
/*
* Set up to loop over the block's entries.
*/
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(args->geo, hdr);
ptr = (char *)dp->d_ops->data_entry_p(hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -258,10 +245,34 @@ xfs_dir2_block_to_sf(
ptr += dp->d_ops->data_entsize(dep->namelen);
}
ASSERT((char *)sfep - (char *)sfp == size);
+
+ /* now we are done with the block, we can shrink the inode */
+ logflags = XFS_ILOG_CORE;
+ error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
+ if (error) {
+ ASSERT(error != -ENOSPC);
+ goto out;
+ }
+
+ /*
+ * The buffer is now unconditionally gone, whether
+ * xfs_dir2_shrink_inode worked or not.
+ *
+ * Convert the inode to local format and copy the data in.
+ */
+ dp->i_df.if_flags &= ~XFS_IFEXTENTS;
+ dp->i_df.if_flags |= XFS_IFINLINE;
+ dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+ ASSERT(dp->i_df.if_bytes == 0);
+ xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+
+ logflags |= XFS_ILOG_DDATA;
+ memcpy(dp->i_df.if_u1.if_data, dst, size);
+ dp->i_d.di_size = size;
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(hdr);
+ kmem_free(dst);
return error;
}
@@ -275,21 +286,19 @@ int /* error */
xfs_dir2_sf_addname(
xfs_da_args_t *args) /* operation arguments */
{
- int add_entsize; /* size of the new entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
int incr_isize; /* total change in size */
int new_isize; /* di_size after adding name */
int objchange; /* changing to 8-byte inodes */
xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
- int old_isize; /* di_size before adding name */
int pick; /* which algorithm to use */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
trace_xfs_dir2_sf_addname(args);
- ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
+ ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT);
dp = args->dp;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
/*
@@ -297,7 +306,7 @@ xfs_dir2_sf_addname(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -306,29 +315,24 @@ xfs_dir2_sf_addname(
/*
* Compute entry (and change in) size.
*/
- add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
- incr_isize = add_entsize;
+ incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
objchange = 0;
-#if XFS_BIG_INUMS
+
/*
* Do we have to change to 8 byte inodes?
*/
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
/*
- * Yes, adjust the entry size and the total size.
+ * Yes, adjust the inode size. old count + (parent + new)
*/
- add_entsize +=
- (uint)sizeof(xfs_dir2_ino8_t) -
- (uint)sizeof(xfs_dir2_ino4_t);
incr_isize +=
(sfp->count + 2) *
((uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t));
objchange = 1;
}
-#endif
- old_isize = (int)dp->i_d.di_size;
- new_isize = old_isize + incr_isize;
+
+ new_isize = (int)dp->i_d.di_size + incr_isize;
/*
* Won't fit as shortform any more (due to size),
* or the pick routine says it won't (due to offset values).
@@ -340,7 +344,7 @@ xfs_dir2_sf_addname(
* Just checking or no space reservation, it doesn't fit.
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Convert to block form then add the name.
*/
@@ -365,10 +369,8 @@ xfs_dir2_sf_addname(
*/
else {
ASSERT(pick == 2);
-#if XFS_BIG_INUMS
if (objchange)
xfs_dir2_sf_toino8(args);
-#endif
xfs_dir2_sf_addname_hard(args, objchange, new_isize);
}
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -420,10 +422,8 @@ xfs_dir2_sf_addname_easy(
* Update the header and inode.
*/
sfp->count++;
-#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
sfp->i8count++;
-#endif
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
}
@@ -511,10 +511,8 @@ xfs_dir2_sf_addname_hard(
dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
dp->d_ops->sf_put_ftype(sfep, args->filetype);
sfp->count++;
-#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
sfp->i8count++;
-#endif
/*
* If there's more left to copy, do that.
*/
@@ -583,22 +581,17 @@ xfs_dir2_sf_addname_pick(
* we'll go back, convert to block, then try the insert and convert
* to leaf.
*/
- if (used + (holefit ? 0 : size) > mp->m_dirblksize)
+ if (used + (holefit ? 0 : size) > args->geo->blksize)
return 0;
/*
* If changing the inode number size, do it the hard way.
*/
-#if XFS_BIG_INUMS
- if (objchange) {
+ if (objchange)
return 2;
- }
-#else
- ASSERT(objchange == 0);
-#endif
/*
* If it won't fit at the end then do it the hard way (use the hole).
*/
- if (used + size > mp->m_dirblksize)
+ if (used + size > args->geo->blksize)
return 2;
/*
* Do it the easy way.
@@ -645,11 +638,10 @@ xfs_dir2_sf_check(
ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
}
ASSERT(i8count == sfp->i8count);
- ASSERT(XFS_BIG_INUMS || i8count == 0);
ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
ASSERT(offset +
(sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
- (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize);
+ (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize);
}
#endif /* DEBUG */
@@ -733,7 +725,7 @@ xfs_dir2_sf_lookup(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -746,7 +738,7 @@ xfs_dir2_sf_lookup(
args->inumber = dp->i_ino;
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
/*
* Special case for ..
@@ -756,7 +748,7 @@ xfs_dir2_sf_lookup(
args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
/*
* Loop over all the entries trying to match ours.
@@ -776,20 +768,20 @@ xfs_dir2_sf_lookup(
args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
args->filetype = dp->d_ops->sf_get_ftype(sfep);
if (cmp == XFS_CMP_EXACT)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
ci_sfep = sfep;
}
}
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
/*
* Here, we can only be doing a lookup (not a rename or replace).
- * If a case-insensitive match was not found, return ENOENT.
+ * If a case-insensitive match was not found, return -ENOENT.
*/
if (!ci_sfep)
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
/* otherwise process the CI match as required by the caller */
error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -819,7 +811,7 @@ xfs_dir2_sf_removename(
*/
if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == oldsize);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -842,7 +834,7 @@ xfs_dir2_sf_removename(
* Didn't find it.
*/
if (i == sfp->count)
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
/*
* Calculate sizes.
*/
@@ -865,7 +857,6 @@ xfs_dir2_sf_removename(
*/
xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-#if XFS_BIG_INUMS
/*
* Are we changing inode number size?
*/
@@ -875,7 +866,6 @@ xfs_dir2_sf_removename(
else
sfp->i8count--;
}
-#endif
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
return 0;
@@ -890,12 +880,8 @@ xfs_dir2_sf_replace(
{
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
-#if XFS_BIG_INUMS || defined(DEBUG)
xfs_ino_t ino=0; /* entry old inode number */
-#endif
-#if XFS_BIG_INUMS
int i8elevated; /* sf_toino8 set i8count=1 */
-#endif
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
@@ -909,13 +895,13 @@ xfs_dir2_sf_replace(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-#if XFS_BIG_INUMS
+
/*
* New inode number is large, and need to convert to 8-byte inodes.
*/
@@ -946,17 +932,15 @@ xfs_dir2_sf_replace(
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
} else
i8elevated = 0;
-#endif
+
ASSERT(args->namelen != 1 || args->name[0] != '.');
/*
* Replace ..'s entry.
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
-#if XFS_BIG_INUMS || defined(DEBUG)
ino = dp->d_ops->sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
-#endif
dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
}
/*
@@ -967,10 +951,8 @@ xfs_dir2_sf_replace(
i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
-#if XFS_BIG_INUMS || defined(DEBUG)
ino = dp->d_ops->sf_get_ino(sfp, sfep);
ASSERT(args->inumber != ino);
-#endif
dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
dp->d_ops->sf_put_ftype(sfep, args->filetype);
break;
@@ -981,14 +963,11 @@ xfs_dir2_sf_replace(
*/
if (i == sfp->count) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-#if XFS_BIG_INUMS
if (i8elevated)
xfs_dir2_sf_toino4(args);
-#endif
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
}
-#if XFS_BIG_INUMS
/*
* See if the old number was large, the new number is small.
*/
@@ -1015,13 +994,11 @@ xfs_dir2_sf_replace(
if (!i8elevated)
sfp->i8count++;
}
-#endif
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
return 0;
}
-#if XFS_BIG_INUMS
/*
* Convert from 8-byte inode numbers to 4-byte inode numbers.
* The last 8-byte inode number is gone, but the count is still 1.
@@ -1100,9 +1077,9 @@ xfs_dir2_sf_toino4(
}
/*
- * Convert from 4-byte inode numbers to 8-byte inode numbers.
- * The new 8-byte inode number is not there yet, we leave with the
- * count 1 but no corresponding entry.
+ * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
+ * The new entry w/ an 8-byte inode number is not there yet; we leave with
+ * i8count set to 1, but no corresponding 8-byte entry.
*/
static void
xfs_dir2_sf_toino8(
@@ -1135,7 +1112,7 @@ xfs_dir2_sf_toino8(
ASSERT(oldsfp->i8count == 0);
memcpy(buf, oldsfp, oldsize);
/*
- * Compute the new inode size.
+ * Compute the new inode size (nb: entry count + 1 for parent)
*/
newsize =
oldsize +
@@ -1176,4 +1153,3 @@ xfs_dir2_sf_toino8(
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
-#endif /* XFS_BIG_INUMS */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index d401457d2f25..bb969337efc8 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -35,7 +35,6 @@
int
xfs_calc_dquots_per_chunk(
- struct xfs_mount *mp,
unsigned int nbblks) /* basic block units */
{
unsigned int ndquots;
@@ -194,7 +193,7 @@ xfs_dquot_buf_verify_crc(
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
- ndquots = xfs_calc_dquots_per_chunk(mp,
+ ndquots = xfs_calc_dquots_per_chunk(
XFS_BB_TO_FSB(mp, bp->b_length));
for (i = 0; i < ndquots; i++, d++) {
@@ -225,7 +224,7 @@ xfs_dquot_buf_verify(
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
- ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length);
+ ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
/*
* On the first read of the buffer, verify that each dquot is valid.
@@ -257,10 +256,13 @@ xfs_dquot_buf_read_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (!xfs_dquot_buf_verify_crc(mp, bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_dquot_buf_verify(mp, bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
/*
@@ -275,8 +277,8 @@ xfs_dquot_buf_write_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
}
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index b6ab5a3cfa12..7e42bba9a420 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -68,11 +68,7 @@ struct xfs_ifork;
#define XFS_RTLOBIT(w) xfs_lowbit32(w)
#define XFS_RTHIBIT(w) xfs_highbit32(w)
-#if XFS_BIG_BLKNOS
#define XFS_RTBLOCKLOG(b) xfs_highbit64(b)
-#else
-#define XFS_RTBLOCKLOG(b) xfs_highbit32(b)
-#endif
/*
* Dquot and dquot block format definitions
@@ -145,6 +141,8 @@ struct xfs_dsymlink_hdr {
__be64 sl_lsn;
};
+#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
+
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 3 extents back from
@@ -200,6 +198,8 @@ typedef __be32 xfs_alloc_ptr_t;
*/
#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
+#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
+#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
typedef __uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -242,7 +242,17 @@ typedef __be32 xfs_inobt_ptr_t;
* block numbers in the AG.
*/
#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
-#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+
+/*
+ * The first data block of an AG depends on whether the filesystem was formatted
+ * with the finobt feature. If so, account for the finobt reserved root btree
+ * block.
+ */
+#define XFS_PREALLOC_BLOCKS(mp) \
+ (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
+ XFS_FIBT_BLOCK(mp) + 1 : \
+ XFS_IBT_BLOCK(mp) + 1)
@@ -290,23 +300,15 @@ typedef struct xfs_bmbt_rec_host {
* Values and macros for delayed-allocation startblock fields.
*/
#define STARTBLOCKVALBITS 17
-#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20)
-#define DSTARTBLOCKMASKBITS (15 + 20)
+#define STARTBLOCKMASKBITS (15 + 20)
#define STARTBLOCKMASK \
(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
-#define DSTARTBLOCKMASK \
- (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
static inline int isnullstartblock(xfs_fsblock_t x)
{
return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
}
-static inline int isnulldstartblock(xfs_dfsbno_t x)
-{
- return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
-}
-
static inline xfs_fsblock_t nullstartblock(int k)
{
ASSERT(k < (1 << STARTBLOCKVALBITS));
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e87719c5bebe..b62771f1f4b5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -52,7 +52,7 @@ xfs_ialloc_cluster_alignment(
{
if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
args->mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
+ XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size))
return args->mp->m_sb.sb_inoalignmt;
return 1;
}
@@ -112,6 +112,66 @@ xfs_inobt_get_rec(
}
/*
+ * Insert a single inobt record. Cursor must already point to desired location.
+ */
+STATIC int
+xfs_inobt_insert_rec(
+ struct xfs_btree_cur *cur,
+ __int32_t freecount,
+ xfs_inofree_t free,
+ int *stat)
+{
+ cur->bc_rec.i.ir_freecount = freecount;
+ cur->bc_rec.i.ir_free = free;
+ return xfs_btree_insert(cur, stat);
+}
+
+/*
+ * Insert records describing a newly allocated inode chunk into the inobt.
+ */
+STATIC int
+xfs_inobt_insert(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agino_t newino,
+ xfs_agino_t newlen,
+ xfs_btnum_t btnum)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ xfs_agino_t thisino;
+ int i;
+ int error;
+
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+ for (thisino = newino;
+ thisino < newino + newlen;
+ thisino += XFS_INODES_PER_CHUNK) {
+ error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
+ if (error) {
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+ }
+ ASSERT(i == 0);
+
+ error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+ XFS_INOBT_ALL_FREE, &i);
+ if (error) {
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+ }
+ ASSERT(i == 1);
+ }
+
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+ return 0;
+}
+
+/*
* Verify that the number of free inodes in the AGI is correct.
*/
#ifdef DEBUG
@@ -170,27 +230,20 @@ xfs_ialloc_inode_init(
{
struct xfs_buf *fbuf;
struct xfs_dinode *free;
- int blks_per_cluster, nbufs, ninodes;
+ int nbufs, blks_per_cluster, inodes_per_cluster;
int version;
int i, j;
xfs_daddr_t d;
xfs_ino_t ino = 0;
/*
- * Loop over the new block(s), filling in the inodes.
- * For small block sizes, manipulate the inodes in buffers
- * which are multiples of the blocks size.
+ * Loop over the new block(s), filling in the inodes. For small block
+ * sizes, manipulate the inodes in buffers which are multiples of the
+ * blocks size.
*/
- if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
- blks_per_cluster = 1;
- nbufs = length;
- ninodes = mp->m_sb.sb_inopblock;
- } else {
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
- mp->m_sb.sb_blocksize;
- nbufs = length / blks_per_cluster;
- ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
- }
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
+ nbufs = length / blks_per_cluster;
/*
* Figure out what version number to use in the inodes we create. If
@@ -225,12 +278,10 @@ xfs_ialloc_inode_init(
* they track in the AIL as if they were physically logged.
*/
if (tp)
- xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp),
+ xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
mp->m_sb.sb_inodesize, length, gen);
- } else if (xfs_sb_version_hasnlink(&mp->m_sb))
+ } else
version = 2;
- else
- version = 1;
for (j = 0; j < nbufs; j++) {
/*
@@ -241,12 +292,12 @@ xfs_ialloc_inode_init(
mp->m_bsize * blks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
- return ENOMEM;
+ return -ENOMEM;
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
- for (i = 0; i < ninodes; i++) {
+ for (i = 0; i < inodes_per_cluster; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
uint isize = xfs_dinode_size(version);
@@ -310,13 +361,10 @@ xfs_ialloc_ag_alloc(
{
xfs_agi_t *agi; /* allocation group header */
xfs_alloc_arg_t args; /* allocation argument structure */
- xfs_btree_cur_t *cur; /* inode btree cursor */
xfs_agnumber_t agno;
int error;
- int i;
xfs_agino_t newino; /* new first inode's number */
xfs_agino_t newlen; /* new number of inodes */
- xfs_agino_t thisino; /* current inode number, for loop */
int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
struct xfs_perag *pag;
@@ -329,11 +377,11 @@ xfs_ialloc_ag_alloc(
* Locking will ensure that we don't have two callers in here
* at one time.
*/
- newlen = XFS_IALLOC_INODES(args.mp);
+ newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
- return XFS_ERROR(ENOSPC);
- args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
+ return -ENOSPC;
+ args.minlen = args.maxlen = args.mp->m_ialloc_blks;
/*
* First try to allocate inodes contiguous with the last-allocated
* chunk of inodes. If the filesystem is striped, this will fill
@@ -343,7 +391,7 @@ xfs_ialloc_ag_alloc(
newino = be32_to_cpu(agi->agi_newino);
agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
- XFS_IALLOC_BLOCKS(args.mp);
+ args.mp->m_ialloc_blks;
if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) {
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
@@ -370,6 +418,18 @@ xfs_ialloc_ag_alloc(
args.minleft = args.mp->m_in_maxlevels - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
+
+ /*
+ * This request might have dirtied the transaction if the AG can
+ * satisfy the request, but the exact block was not available.
+ * If the allocation did fail, subsequent requests will relax
+ * the exact agbno requirement and increase the alignment
+ * instead. It is critical that the total size of the request
+ * (len + alignment + slop) does not increase from this point
+ * on, so reset minalignslop to ensure it is not included in
+ * subsequent requests.
+ */
+ args.minalignslop = 0;
} else
args.fsbno = NULLFSBLOCK;
@@ -454,29 +514,19 @@ xfs_ialloc_ag_alloc(
agi->agi_newino = cpu_to_be32(newino);
/*
- * Insert records describing the new inode chunk into the btree.
+ * Insert records describing the new inode chunk into the btrees.
*/
- cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
- for (thisino = newino;
- thisino < newino + newlen;
- thisino += XFS_INODES_PER_CHUNK) {
- cur->bc_rec.i.ir_startino = thisino;
- cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
- cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
- error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- return error;
- }
- ASSERT(i == 0);
- error = xfs_btree_insert(cur, &i);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+ XFS_BTNUM_INO);
+ if (error)
+ return error;
+
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+ XFS_BTNUM_FINO);
+ if (error)
return error;
- }
- ASSERT(i == 1);
}
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
/*
* Log allocation group header fields
*/
@@ -585,7 +635,7 @@ xfs_ialloc_ag_select(
* Is there enough free space for the file plus a block of
* inodes? (if we need to allocate some)?
*/
- ineed = XFS_IALLOC_BLOCKS(mp);
+ ineed = mp->m_ialloc_blks;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
@@ -670,13 +720,10 @@ xfs_ialloc_get_rec(
}
/*
- * Allocate an inode.
- *
- * The caller selected an AG for us, and made sure that free inodes are
- * available.
+ * Allocate an inode using the inobt-only algorithm.
*/
STATIC int
-xfs_dialloc_ag(
+xfs_dialloc_ag_inobt(
struct xfs_trans *tp,
struct xfs_buf *agbp,
xfs_ino_t parent,
@@ -702,7 +749,7 @@ xfs_dialloc_ag(
ASSERT(pag->pagi_freecount > 0);
restart_pagno:
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -935,6 +982,294 @@ error0:
}
/*
+ * Use the free inode btree to allocate an inode based on distance from the
+ * parent. Note that the provided cursor may be deleted and replaced.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_near(
+ xfs_agino_t pagino,
+ struct xfs_btree_cur **ocur,
+ struct xfs_inobt_rec_incore *rec)
+{
+ struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
+ struct xfs_btree_cur *rcur; /* right search cursor */
+ struct xfs_inobt_rec_incore rrec;
+ int error;
+ int i, j;
+
+ error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
+ if (error)
+ return error;
+
+ if (i == 1) {
+ error = xfs_inobt_get_rec(lcur, rec, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+ /*
+ * See if we've landed in the parent inode record. The finobt
+ * only tracks chunks with at least one free inode, so record
+ * existence is enough.
+ */
+ if (pagino >= rec->ir_startino &&
+ pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
+ return 0;
+ }
+
+ error = xfs_btree_dup_cursor(lcur, &rcur);
+ if (error)
+ return error;
+
+ error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
+ if (error)
+ goto error_rcur;
+ if (j == 1) {
+ error = xfs_inobt_get_rec(rcur, &rrec, &j);
+ if (error)
+ goto error_rcur;
+ XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
+ }
+
+ XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
+ if (i == 1 && j == 1) {
+ /*
+ * Both the left and right records are valid. Choose the closer
+ * inode chunk to the target.
+ */
+ if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
+ (rrec.ir_startino - pagino)) {
+ *rec = rrec;
+ xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+ *ocur = rcur;
+ } else {
+ xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+ }
+ } else if (j == 1) {
+ /* only the right record is valid */
+ *rec = rrec;
+ xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+ *ocur = rcur;
+ } else if (i == 1) {
+ /* only the left record is valid */
+ xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+ }
+
+ return 0;
+
+error_rcur:
+ xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Use the free inode btree to find a free inode based on a newino hint. If
+ * the hint is NULL, find the first free inode in the AG.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_newino(
+ struct xfs_agi *agi,
+ struct xfs_btree_cur *cur,
+ struct xfs_inobt_rec_incore *rec)
+{
+ int error;
+ int i;
+
+ if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+ error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
+ &i);
+ if (error)
+ return error;
+ if (i == 1) {
+ error = xfs_inobt_get_rec(cur, rec, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+ return 0;
+ }
+ }
+
+ /*
+ * Find the first inode available in the AG.
+ */
+ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+ error = xfs_inobt_get_rec(cur, rec, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+ return 0;
+}
+
+/*
+ * Update the inobt based on a modification made to the finobt. Also ensure that
+ * the records from both trees are equivalent post-modification.
+ */
+STATIC int
+xfs_dialloc_ag_update_inobt(
+ struct xfs_btree_cur *cur, /* inobt cursor */
+ struct xfs_inobt_rec_incore *frec, /* finobt record */
+ int offset) /* inode offset */
+{
+ struct xfs_inobt_rec_incore rec;
+ int error;
+ int i;
+
+ error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+ ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
+ XFS_INODES_PER_CHUNK) == 0);
+
+ rec.ir_free &= ~XFS_INOBT_MASK(offset);
+ rec.ir_freecount--;
+
+ XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
+ (rec.ir_freecount == frec->ir_freecount));
+
+ error = xfs_inobt_update(cur, &rec);
+ if (error)
+ return error;
+
+ return 0;
+}
+
+/*
+ * Allocate an inode using the free inode btree, if available. Otherwise, fall
+ * back to the inobt search algorithm.
+ *
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
+ */
+STATIC int
+xfs_dialloc_ag(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_ino_t parent,
+ xfs_ino_t *inop)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
+ xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
+ struct xfs_perag *pag;
+ struct xfs_btree_cur *cur; /* finobt cursor */
+ struct xfs_btree_cur *icur; /* inobt cursor */
+ struct xfs_inobt_rec_incore rec;
+ xfs_ino_t ino;
+ int error;
+ int offset;
+ int i;
+
+ if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+ return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
+
+ pag = xfs_perag_get(mp, agno);
+
+ /*
+ * If pagino is 0 (this is the root inode allocation) use newino.
+ * This must work because we've just allocated some.
+ */
+ if (!pagino)
+ pagino = be32_to_cpu(agi->agi_newino);
+
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+ error = xfs_check_agi_freecount(cur, agi);
+ if (error)
+ goto error_cur;
+
+ /*
+ * The search algorithm depends on whether we're in the same AG as the
+ * parent. If so, find the closest available inode to the parent. If
+ * not, consider the agi hint or find the first free inode in the AG.
+ */
+ if (agno == pagno)
+ error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
+ else
+ error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
+ if (error)
+ goto error_cur;
+
+ offset = xfs_lowbit64(rec.ir_free);
+ ASSERT(offset >= 0);
+ ASSERT(offset < XFS_INODES_PER_CHUNK);
+ ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+ XFS_INODES_PER_CHUNK) == 0);
+ ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+
+ /*
+ * Modify or remove the finobt record.
+ */
+ rec.ir_free &= ~XFS_INOBT_MASK(offset);
+ rec.ir_freecount--;
+ if (rec.ir_freecount)
+ error = xfs_inobt_update(cur, &rec);
+ else
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto error_cur;
+
+ /*
+ * The finobt has now been updated appropriately. We haven't updated the
+ * agi and superblock yet, so we can create an inobt cursor and validate
+ * the original freecount. If all is well, make the equivalent update to
+ * the inobt using the finobt record and offset information.
+ */
+ icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+
+ error = xfs_check_agi_freecount(icur, agi);
+ if (error)
+ goto error_icur;
+
+ error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
+ if (error)
+ goto error_icur;
+
+ /*
+ * Both trees have now been updated. We must update the perag and
+ * superblock before we can check the freecount for each btree.
+ */
+ be32_add_cpu(&agi->agi_freecount, -1);
+ xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+ pag->pagi_freecount--;
+
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+
+ error = xfs_check_agi_freecount(icur, agi);
+ if (error)
+ goto error_icur;
+ error = xfs_check_agi_freecount(cur, agi);
+ if (error)
+ goto error_icur;
+
+ xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ xfs_perag_put(pag);
+ *inop = ino;
+ return 0;
+
+error_icur:
+ xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
+error_cur:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ xfs_perag_put(pag);
+ return error;
+}
+
+/*
* Allocate an inode on disk.
*
* Mode is used to tell whether the new inode will need space, and whether it
@@ -999,7 +1334,7 @@ xfs_dialloc(
* inode.
*/
if (mp->m_maxicount &&
- mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+ mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
@@ -1050,7 +1385,7 @@ xfs_dialloc(
if (error) {
xfs_trans_brelse(tp, agbp);
- if (error != ENOSPC)
+ if (error != -ENOSPC)
goto out_error;
xfs_perag_put(pag);
@@ -1081,7 +1416,7 @@ nextag:
agno = 0;
if (agno == start_agno) {
*inop = NULLFSINO;
- return noroom ? ENOSPC : 0;
+ return noroom ? -ENOSPC : 0;
}
}
@@ -1090,81 +1425,37 @@ out_alloc:
return xfs_dialloc_ag(tp, agbp, parent, inop);
out_error:
xfs_perag_put(pag);
- return XFS_ERROR(error);
+ return error;
}
-/*
- * Free disk inode. Carefully avoids touching the incore inode, all
- * manipulations incore are the caller's responsibility.
- * The on-disk inode is not changed by this operation, only the
- * btree (free inode mask) is changed.
- */
-int
-xfs_difree(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ino_t inode, /* inode to be freed */
- xfs_bmap_free_t *flist, /* extents to free */
- int *delete, /* set if inode cluster was deleted */
- xfs_ino_t *first_ino) /* first inode in deleted cluster */
+STATIC int
+xfs_difree_inobt(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agino_t agino,
+ struct xfs_bmap_free *flist,
+ int *deleted,
+ xfs_ino_t *first_ino,
+ struct xfs_inobt_rec_incore *orec)
{
- /* REFERENCED */
- xfs_agblock_t agbno; /* block number containing inode */
- xfs_buf_t *agbp; /* buffer containing allocation group header */
- xfs_agino_t agino; /* inode number relative to allocation group */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_agi_t *agi; /* allocation group header */
- xfs_btree_cur_t *cur; /* inode btree cursor */
- int error; /* error return value */
- int i; /* result code */
- int ilen; /* inodes in an inode cluster */
- xfs_mount_t *mp; /* mount structure for filesystem */
- int off; /* offset of inode in inode chunk */
- xfs_inobt_rec_incore_t rec; /* btree record */
- struct xfs_perag *pag;
-
- mp = tp->t_mountp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ struct xfs_perag *pag;
+ struct xfs_btree_cur *cur;
+ struct xfs_inobt_rec_incore rec;
+ int ilen;
+ int error;
+ int i;
+ int off;
- /*
- * Break up inode number into its components.
- */
- agno = XFS_INO_TO_AGNO(mp, inode);
- if (agno >= mp->m_sb.sb_agcount) {
- xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
- __func__, agno, mp->m_sb.sb_agcount);
- ASSERT(0);
- return XFS_ERROR(EINVAL);
- }
- agino = XFS_INO_TO_AGINO(mp, inode);
- if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
- xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
- __func__, (unsigned long long)inode,
- (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
- ASSERT(0);
- return XFS_ERROR(EINVAL);
- }
- agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks) {
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
- __func__, agbno, mp->m_sb.sb_agblocks);
- ASSERT(0);
- return XFS_ERROR(EINVAL);
- }
- /*
- * Get the allocation group header.
- */
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- if (error) {
- xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
- __func__, error);
- return error;
- }
- agi = XFS_BUF_TO_AGI(agbp);
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- ASSERT(agbno < be32_to_cpu(agi->agi_length));
+ ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
+
/*
* Initialize the cursor.
*/
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
error = xfs_check_agi_freecount(cur, agi);
if (error)
@@ -1202,9 +1493,9 @@ xfs_difree(
* When an inode cluster is free, it becomes eligible for removal
*/
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
- (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
+ (rec.ir_freecount == mp->m_ialloc_inos)) {
- *delete = 1;
+ *deleted = 1;
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
/*
@@ -1212,7 +1503,7 @@ xfs_difree(
* AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed.
*/
- ilen = XFS_IALLOC_INODES(mp);
+ ilen = mp->m_ialloc_inos;
be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
@@ -1228,11 +1519,11 @@ xfs_difree(
goto error0;
}
- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
- agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
- XFS_IALLOC_BLOCKS(mp), flist, mp);
+ xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+ XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
+ mp->m_ialloc_blks, flist, mp);
} else {
- *delete = 0;
+ *deleted = 0;
error = xfs_inobt_update(cur, &rec);
if (error) {
@@ -1256,6 +1547,7 @@ xfs_difree(
if (error)
goto error0;
+ *orec = rec;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return 0;
@@ -1264,6 +1556,182 @@ error0:
return error;
}
+/*
+ * Free an inode in the free inode btree.
+ */
+STATIC int
+xfs_difree_finobt(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agino_t agino,
+ struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
+{
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ struct xfs_btree_cur *cur;
+ struct xfs_inobt_rec_incore rec;
+ int offset = agino - ibtrec->ir_startino;
+ int error;
+ int i;
+
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+ error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ goto error;
+ if (i == 0) {
+ /*
+ * If the record does not exist in the finobt, we must have just
+ * freed an inode in a previously fully allocated chunk. If not,
+ * something is out of sync.
+ */
+ XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
+
+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+ ibtrec->ir_free, &i);
+ if (error)
+ goto error;
+ ASSERT(i == 1);
+
+ goto out;
+ }
+
+ /*
+ * Read and update the existing record. We could just copy the ibtrec
+ * across here, but that would defeat the purpose of having redundant
+ * metadata. By making the modifications independently, we can catch
+ * corruptions that we wouldn't see if we just copied from one record
+ * to another.
+ */
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error);
+
+ rec.ir_free |= XFS_INOBT_MASK(offset);
+ rec.ir_freecount++;
+
+ XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
+ (rec.ir_freecount == ibtrec->ir_freecount),
+ error);
+
+ /*
+ * The content of inobt records should always match between the inobt
+ * and finobt. The lifecycle of records in the finobt is different from
+ * the inobt in that the finobt only tracks records with at least one
+ * free inode. Hence, if all of the inodes are free and we aren't
+ * keeping inode chunks permanently on disk, remove the record.
+ * Otherwise, update the record with the new information.
+ */
+ if (rec.ir_freecount == mp->m_ialloc_inos &&
+ !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto error;
+ ASSERT(i == 1);
+ } else {
+ error = xfs_inobt_update(cur, &rec);
+ if (error)
+ goto error;
+ }
+
+out:
+ error = xfs_check_agi_freecount(cur, agi);
+ if (error)
+ goto error;
+
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+error:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Free disk inode. Carefully avoids touching the incore inode, all
+ * manipulations incore are the caller's responsibility.
+ * The on-disk inode is not changed by this operation, only the
+ * btree (free inode mask) is changed.
+ */
+int
+xfs_difree(
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_ino_t inode, /* inode to be freed */
+ struct xfs_bmap_free *flist, /* extents to free */
+ int *deleted,/* set if inode cluster was deleted */
+ xfs_ino_t *first_ino)/* first inode in deleted cluster */
+{
+ /* REFERENCED */
+ xfs_agblock_t agbno; /* block number containing inode */
+ struct xfs_buf *agbp; /* buffer for allocation group header */
+ xfs_agino_t agino; /* allocation group inode number */
+ xfs_agnumber_t agno; /* allocation group number */
+ int error; /* error return value */
+ struct xfs_mount *mp; /* mount structure for filesystem */
+ struct xfs_inobt_rec_incore rec;/* btree record */
+
+ mp = tp->t_mountp;
+
+ /*
+ * Break up inode number into its components.
+ */
+ agno = XFS_INO_TO_AGNO(mp, inode);
+ if (agno >= mp->m_sb.sb_agcount) {
+ xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
+ __func__, agno, mp->m_sb.sb_agcount);
+ ASSERT(0);
+ return -EINVAL;
+ }
+ agino = XFS_INO_TO_AGINO(mp, inode);
+ if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+ __func__, (unsigned long long)inode,
+ (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
+ ASSERT(0);
+ return -EINVAL;
+ }
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+ if (agbno >= mp->m_sb.sb_agblocks) {
+ xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
+ __func__, agbno, mp->m_sb.sb_agblocks);
+ ASSERT(0);
+ return -EINVAL;
+ }
+ /*
+ * Get the allocation group header.
+ */
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error) {
+ xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
+ __func__, error);
+ return error;
+ }
+
+ /*
+ * Fix up the inode allocation btree.
+ */
+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
+ &rec);
+ if (error)
+ goto error0;
+
+ /*
+ * Fix up the free inode btree.
+ */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
+ if (error)
+ goto error0;
+ }
+
+ return 0;
+
+error0:
+ return error;
+}
+
STATIC int
xfs_imap_lookup(
struct xfs_mount *mp,
@@ -1295,13 +1763,13 @@ xfs_imap_lookup(
* we have a record, we need to ensure it contains the inode number
* we are looking up.
*/
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
error = xfs_inobt_get_rec(cur, &rec, &i);
if (!error && i == 0)
- error = EINVAL;
+ error = -EINVAL;
}
xfs_trans_brelse(tp, agbp);
@@ -1311,13 +1779,13 @@ xfs_imap_lookup(
/* check that the returned record contains the required inode */
if (rec.ir_startino > agino ||
- rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
- return EINVAL;
+ rec.ir_startino + mp->m_ialloc_inos <= agino)
+ return -EINVAL;
/* for untrusted inodes check it is allocated first */
if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
- return EINVAL;
+ return -EINVAL;
*chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
*offset_agbno = agbno - *chunk_agbno;
@@ -1361,7 +1829,7 @@ xfs_imap(
* as they can be invalid without implying corruption.
*/
if (flags & XFS_IGET_UNTRUSTED)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (agno >= mp->m_sb.sb_agcount) {
xfs_alert(mp,
"%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
@@ -1381,10 +1849,10 @@ xfs_imap(
}
xfs_stack_trace();
#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
/*
* For bulkstat and handle lookups, we have an untrusted inode number
@@ -1405,7 +1873,7 @@ xfs_imap(
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
*/
- if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
+ if (blks_per_cluster == 1) {
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
@@ -1454,7 +1922,7 @@ out_map:
__func__, (unsigned long long) imap->im_blkno,
(unsigned long long) imap->im_len,
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
}
@@ -1483,7 +1951,16 @@ xfs_ialloc_compute_maxlevels(
}
/*
- * Log specified fields for the ag hdr (inode section)
+ * Log specified fields for the ag hdr (inode section). The growth of the agi
+ * structure over time requires that we interpret the buffer as two logical
+ * regions delineated by the end of the unlinked list. This is due to the size
+ * of the hash table and its location in the middle of the agi.
+ *
+ * For example, a request to log a field before agi_unlinked and a field after
+ * agi_unlinked could cause us to log the entire hash table and use an excessive
+ * amount of log space. To avoid this behavior, log the region up through
+ * agi_unlinked in one call and the region after agi_unlinked through the end of
+ * the structure in another.
*/
void
xfs_ialloc_log_agi(
@@ -1506,6 +1983,8 @@ xfs_ialloc_log_agi(
offsetof(xfs_agi_t, agi_newino),
offsetof(xfs_agi_t, agi_dirino),
offsetof(xfs_agi_t, agi_unlinked),
+ offsetof(xfs_agi_t, agi_free_root),
+ offsetof(xfs_agi_t, agi_free_level),
sizeof(xfs_agi_t)
};
#ifdef DEBUG
@@ -1514,15 +1993,30 @@ xfs_ialloc_log_agi(
agi = XFS_BUF_TO_AGI(bp);
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif
+
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
+
/*
- * Compute byte offsets for the first and last fields.
+ * Compute byte offsets for the first and last fields in the first
+ * region and log the agi buffer. This only logs up through
+ * agi_unlinked.
*/
- xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
+ if (fields & XFS_AGI_ALL_BITS_R1) {
+ xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
+ &first, &last);
+ xfs_trans_log_buf(tp, bp, first, last);
+ }
+
/*
- * Log the allocation group inode header buffer.
+ * Mask off the bits in the first region and calculate the first and
+ * last field offsets for any bits in the second region.
*/
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
- xfs_trans_log_buf(tp, bp, first, last);
+ fields &= ~XFS_AGI_ALL_BITS_R1;
+ if (fields) {
+ xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
+ &first, &last);
+ xfs_trans_log_buf(tp, bp, first, last);
+ }
}
#ifdef DEBUG
@@ -1575,18 +2069,17 @@ xfs_agi_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agi_ok = 1;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agi, agi_crc));
- agi_ok = agi_ok && xfs_agi_verify(bp);
-
- if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
- XFS_RANDOM_IALLOC_READ_AGI))) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+ XFS_ERRTAG_IALLOC_READ_AGI,
+ XFS_RANDOM_IALLOC_READ_AGI))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -1597,8 +2090,8 @@ xfs_agi_write_verify(
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agi_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -1607,8 +2100,7 @@ xfs_agi_write_verify(
if (bip)
XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agi, agi_crc));
+ xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
@@ -1637,7 +2129,6 @@ xfs_read_agi(
if (error)
return error;
- ASSERT(!xfs_buf_geterror(*bpp));
xfs_buf_set_ref(*bpp, XFS_AGI_REF);
return 0;
}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index a8f76a5ff418..95ad1c002d60 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -25,17 +25,18 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_btree_cur;
-/*
- * Allocation parameters for inode allocation.
- */
-#define XFS_IALLOC_INODES(mp) (mp)->m_ialloc_inos
-#define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks
-
-/*
- * Move inodes in clusters of this size.
- */
+/* Move inodes in clusters of this size */
#define XFS_INODE_BIG_CLUSTER_SIZE 8192
-#define XFS_INODE_CLUSTER_SIZE(mp) (mp)->m_inode_cluster_size
+
+/* Calculate and return the number of filesystem blocks per inode cluster */
+static inline int
+xfs_icluster_size_fsb(
+ struct xfs_mount *mp)
+{
+ if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
+ return 1;
+ return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
+}
/*
* Make an inode pointer out of the buffer/offset.
@@ -89,7 +90,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *delete, /* set if inode cluster was deleted */
+ int *deleted, /* set if inode cluster was deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/*
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index c8fa5bbb36de..c9b06f30fe86 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -49,7 +49,8 @@ xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agbp, cur->bc_private.a.agno);
+ cur->bc_private.a.agbp, cur->bc_private.a.agno,
+ cur->bc_btnum);
}
STATIC void
@@ -66,12 +67,26 @@ xfs_inobt_set_root(
xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
}
+STATIC void
+xfs_finobt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *nptr,
+ int inc) /* level change */
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+
+ agi->agi_free_root = nptr->s;
+ be32_add_cpu(&agi->agi_free_level, inc);
+ xfs_ialloc_log_agi(cur->bc_tp, agbp,
+ XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
+}
+
STATIC int
xfs_inobt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
- int length,
int *stat)
{
xfs_alloc_arg_t args; /* block allocation args */
@@ -173,6 +188,17 @@ xfs_inobt_init_ptr_from_cur(
ptr->s = agi->agi_root;
}
+STATIC void
+xfs_finobt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
+
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
+ ptr->s = agi->agi_free_root;
+}
+
STATIC __int64_t
xfs_inobt_key_diff(
struct xfs_btree_cur *cur,
@@ -203,6 +229,7 @@ xfs_inobt_verify(
*/
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
+ case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
@@ -214,6 +241,7 @@ xfs_inobt_verify(
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
+ case cpu_to_be32(XFS_FIBT_MAGIC):
break;
default:
return 0;
@@ -243,12 +271,14 @@ static void
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_sblock_verify_crc(bp) &&
- xfs_inobt_verify(bp))) {
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_inobt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
}
}
@@ -258,9 +288,9 @@ xfs_inobt_write_verify(
{
if (!xfs_inobt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_btree_sblock_calc_crc(bp);
@@ -315,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
#endif
};
+static const struct xfs_btree_ops xfs_finobt_ops = {
+ .rec_len = sizeof(xfs_inobt_rec_t),
+ .key_len = sizeof(xfs_inobt_key_t),
+
+ .dup_cursor = xfs_inobt_dup_cursor,
+ .set_root = xfs_finobt_set_root,
+ .alloc_block = xfs_inobt_alloc_block,
+ .free_block = xfs_inobt_free_block,
+ .get_minrecs = xfs_inobt_get_minrecs,
+ .get_maxrecs = xfs_inobt_get_maxrecs,
+ .init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_rec_from_key = xfs_inobt_init_rec_from_key,
+ .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
+ .key_diff = xfs_inobt_key_diff,
+ .buf_ops = &xfs_inobt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_inobt_keys_inorder,
+ .recs_inorder = xfs_inobt_recs_inorder,
+#endif
+};
+
/*
* Allocate a new inode btree cursor.
*/
@@ -323,7 +375,8 @@ xfs_inobt_init_cursor(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_buf *agbp, /* buffer for agi structure */
- xfs_agnumber_t agno) /* allocation group number */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_btnum_t btnum) /* ialloc or free ino btree */
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
struct xfs_btree_cur *cur;
@@ -332,11 +385,17 @@ xfs_inobt_init_cursor(
cur->bc_tp = tp;
cur->bc_mp = mp;
- cur->bc_nlevels = be32_to_cpu(agi->agi_level);
- cur->bc_btnum = XFS_BTNUM_INO;
+ cur->bc_btnum = btnum;
+ if (btnum == XFS_BTNUM_INO) {
+ cur->bc_nlevels = be32_to_cpu(agi->agi_level);
+ cur->bc_ops = &xfs_inobt_ops;
+ } else {
+ cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
+ cur->bc_ops = &xfs_finobt_ops;
+ }
+
cur->bc_blocklog = mp->m_sb.sb_blocklog;
- cur->bc_ops = &xfs_inobt_ops;
if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index f38b22011c4e..d7ebea72c2d0 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -58,7 +58,8 @@ struct xfs_mount;
((index) - 1) * sizeof(xfs_inobt_ptr_t)))
extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
- struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
+ struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
+ xfs_btnum_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 4fc9f39dd89e..f18fd2da49f7 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -101,9 +101,8 @@ xfs_inode_buf_verify(
return;
}
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
- mp, dip);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -175,14 +174,14 @@ xfs_imap_to_bp(
(int)imap->im_len, buf_flags, &bp,
&xfs_inode_buf_ops);
if (error) {
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
ASSERT(buf_flags & XBF_TRYLOCK);
return error;
}
- if (error == EFSCORRUPTED &&
+ if (error == -EFSCORRUPTED &&
(iget_flags & XFS_IGET_UNTRUSTED))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
__func__, error);
@@ -306,7 +305,7 @@ xfs_dinode_verify(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
- offsetof(struct xfs_dinode, di_crc)))
+ XFS_DINODE_CRC_OFF))
return false;
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
- offsetof(struct xfs_dinode, di_crc));
+ XFS_DINODE_CRC_OFF);
dip->di_crc = xfs_end_cksum(crc);
}
@@ -391,7 +390,7 @@ xfs_iread(
__func__, ip->i_ino);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_brelse;
}
@@ -438,17 +437,16 @@ xfs_iread(
}
/*
- * The inode format changed when we moved the link count and
- * made it 32 bits long. If this is an old format inode,
- * convert it in memory to look like a new one. If it gets
- * flushed to disk we will convert back before flushing or
- * logging it. We zero out the new projid field and the old link
- * count field. We'll handle clearing the pad field (the remains
- * of the old uuid field) when we actually convert the inode to
- * the new format. We don't change the version number so that we
- * can distinguish this from a real new format inode.
+ * Automatically convert version 1 inode formats in memory to version 2
+ * inode format. If the inode is modified, it will get logged and
+ * rewritten as a version 2 inode. We can do this because we set the
+ * superblock feature bit for v2 inodes unconditionally during mount
+ * and it means the reast of the code can assume the inode version is 2
+ * or higher.
*/
if (ip->i_d.di_version == 1) {
+ ip->i_d.di_version = 2;
+ memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
ip->i_d.di_nlink = ip->i_d.di_onlink;
ip->i_d.di_onlink = 0;
xfs_set_projid(ip, 0);
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9308c47f2a52..9308c47f2a52 100644
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index cfee14a83cfe..6a00f7fed69d 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -102,7 +102,7 @@ xfs_iformat_fork(
be64_to_cpu(dip->di_nblocks));
XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
@@ -111,7 +111,7 @@ xfs_iformat_fork(
dip->di_forkoff);
XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
@@ -121,7 +121,7 @@ xfs_iformat_fork(
ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
XFS_ERRLEVEL_LOW, ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
switch (ip->i_d.di_mode & S_IFMT) {
@@ -132,7 +132,7 @@ xfs_iformat_fork(
if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ip->i_d.di_size = 0;
ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
@@ -153,7 +153,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(4)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
di_size = be64_to_cpu(dip->di_size);
@@ -166,7 +166,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(5)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
size = (int)di_size;
@@ -181,13 +181,13 @@ xfs_iformat_fork(
default:
XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
break;
default:
XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (error) {
return error;
@@ -211,7 +211,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(8)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
@@ -223,7 +223,7 @@ xfs_iformat_fork(
error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
break;
default:
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
break;
}
if (error) {
@@ -266,7 +266,7 @@ xfs_iformat_local(
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp = XFS_IFORK_PTR(ip, whichfork);
real_size = 0;
@@ -322,7 +322,7 @@ xfs_iformat_extents(
(unsigned long long) ip->i_ino, nex);
XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp->if_real_bytes = 0;
@@ -350,7 +350,7 @@ xfs_iformat_extents(
XFS_ERROR_REPORT("xfs_iformat_extents(2)",
XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
}
ifp->if_flags |= XFS_IFEXTENTS;
@@ -399,7 +399,7 @@ xfs_iformat_btree(
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
mp, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp->if_broot_bytes = size;
@@ -431,10 +431,12 @@ xfs_iread_extents(
xfs_ifork_t *ifp;
xfs_extnum_t nextents;
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -526,7 +528,7 @@ xfs_iroot_realloc(
ifp->if_broot_bytes = (int)new_size;
ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
XFS_IFORK_SIZE(ip, whichfork));
- memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
+ memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
return;
}
@@ -573,7 +575,7 @@ xfs_iroot_realloc(
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
(int)new_size);
- memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
+ memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
}
kmem_free(ifp->if_broot);
ifp->if_broot = new_broot;
@@ -721,15 +723,16 @@ xfs_idestroy_fork(
}
/*
- * xfs_iextents_copy()
+ * Convert in-core extents to on-disk form
*
- * This is called to copy the REAL extents (as opposed to the delayed
- * allocation extents) from the inode into the given buffer. It
- * returns the number of bytes copied into the buffer.
+ * For either the data or attr fork in extent format, we need to endian convert
+ * the in-core extent as we place them into the on-disk inode.
*
- * If there are no delayed allocation extents, then we can just
- * memcpy() the extents into the buffer. Otherwise, we need to
- * examine each extent in turn and skip those which are delayed.
+ * In the case of the data fork, the in-core and on-disk fork sizes can be
+ * different due to delayed allocation extents. We only copy on-disk extents
+ * here, so callers must always use the physical fork size to determine the
+ * size of the buffer passed to this routine. We will return the size actually
+ * used.
*/
int
xfs_iextents_copy(
@@ -795,8 +798,7 @@ xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
xfs_inode_log_item_t *iip,
- int whichfork,
- xfs_buf_t *bp)
+ int whichfork)
{
char *cp;
xfs_ifork_t *ifp;
@@ -1690,7 +1692,7 @@ xfs_iext_idx_to_irec(
}
*idxp = page_idx;
*erp_idxp = erp_idx;
- return(erp);
+ return erp;
}
/*
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index eb329a1ea888..7d3b1ed6dcbe 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -127,8 +127,7 @@ typedef struct xfs_ifork {
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
- struct xfs_inode_log_item *, int,
- struct xfs_buf *);
+ struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
void xfs_iroot_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h
index 90efdaf1706f..4ff2278e147a 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/libxfs/xfs_inum.h
@@ -54,11 +54,7 @@ struct xfs_mount;
#define XFS_OFFBNO_TO_AGINO(mp,b,o) \
((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
-#if XFS_BIG_INUMS
#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
-#else
-#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
-#endif
#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL))
#endif /* __XFS_INUM_H__ */
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f0969c77bdbe..aff12f2d4428 100644
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -380,7 +380,7 @@ typedef struct xfs_icdinode {
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
- xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */
+ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
@@ -516,7 +516,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
* EFI/EFD log format definitions
*/
typedef struct xfs_extent {
- xfs_dfsbno_t ext_start;
+ xfs_fsblock_t ext_start;
xfs_extlen_t ext_len;
} xfs_extent_t;
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379d..1c55ccbb379d 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 2af1a0a4d0f1..ee7e0e80246b 100644
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -42,7 +42,7 @@ xfs_log_calc_max_attrsetm_res(
int size;
int nblks;
- size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) -
+ size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -
MAXNAMELEN - 1;
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
nblks += XFS_B_TO_FSB(mp, size);
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index b3b2b1065c0f..1b0a08379759 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -98,8 +98,6 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
XFS_GQUOTA_ACTIVE | \
XFS_PQUOTA_ACTIVE))
-#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
- XFS_PQUOTA_ACTIVE))
#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
@@ -156,6 +154,6 @@ typedef __uint16_t xfs_qwarncnt_t;
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
xfs_dqid_t id, uint type, uint flags, char *str);
-extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks);
+extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index b1f2fe8af4a8..f4dd697cac08 100644
--- a/fs/xfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -74,7 +74,6 @@ xfs_rtbuf_get(
mp->m_bsize, 0, &bp, NULL);
if (error)
return error;
- ASSERT(!xfs_buf_geterror(bp));
*bpp = bp;
return 0;
}
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index b7c9aea77f8f..ad525a5623a4 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -186,13 +186,13 @@ xfs_mount_validate_sb(
*/
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
xfs_warn(mp, "bad magic number");
- return XFS_ERROR(EWRONGFS);
+ return -EWRONGFS;
}
if (!xfs_sb_good_version(sbp)) {
xfs_warn(mp, "bad version");
- return XFS_ERROR(EWRONGFS);
+ return -EWRONGFS;
}
/*
@@ -201,10 +201,6 @@ xfs_mount_validate_sb(
* write validation, we don't need to check feature masks.
*/
if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
- xfs_alert(mp,
-"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
-"Use of these features in this kernel is at your own risk!");
-
if (xfs_sb_has_compat_feature(sbp,
XFS_SB_FEAT_COMPAT_UNKNOWN)) {
xfs_warn(mp,
@@ -224,7 +220,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"Attempted to mount read-only compatible filesystem read-write.\n"
"Filesystem can only be safely mounted read only.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
if (xfs_sb_has_incompat_feature(sbp,
@@ -234,7 +230,7 @@ xfs_mount_validate_sb(
"Filesystem can not be safely mounted by this kernel.",
(sbp->sb_features_incompat &
XFS_SB_FEAT_INCOMPAT_UNKNOWN));
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -242,13 +238,13 @@ xfs_mount_validate_sb(
if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
xfs_notice(mp,
"Version 5 of Super block has XFS_OQUOTA bits.");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(
@@ -256,7 +252,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"filesystem is marked as having an external log; "
"specify logdev on the mount command line.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (unlikely(
@@ -264,7 +260,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"filesystem is marked as having an internal log; "
"do not specify logdev on the mount command line.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -288,16 +284,17 @@ xfs_mount_validate_sb(
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
+ sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
(sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) ||
sbp->sb_dblocks == 0 ||
sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
- sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
- XFS_CORRUPTION_ERROR("SB sanity check failed",
- XFS_ERRLEVEL_LOW, mp, sbp);
- return XFS_ERROR(EFSCORRUPTED);
+ sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) ||
+ sbp->sb_shared_vn != 0)) {
+ xfs_notice(mp, "SB sanity check failed");
+ return -EFSCORRUPTED;
}
/*
@@ -308,7 +305,7 @@ xfs_mount_validate_sb(
"File system with blocksize %d bytes. "
"Only pagesize (%ld) or less will currently work.",
sbp->sb_blocksize, PAGE_SIZE);
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
/*
@@ -323,29 +320,20 @@ xfs_mount_validate_sb(
default:
xfs_warn(mp, "inode size of %d bytes not supported",
sbp->sb_inodesize);
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_warn(mp,
"file system too large to be mounted on this system.");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
if (check_inprogress && sbp->sb_inprogress) {
xfs_warn(mp, "Offline file system operation in progress!");
- return XFS_ERROR(EFSCORRUPTED);
- }
-
- /*
- * Version 1 directory format has never worked on Linux.
- */
- if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
- xfs_warn(mp, "file system using version 1 directory format");
- return XFS_ERROR(ENOSYS);
+ return -EFSCORRUPTED;
}
-
return 0;
}
@@ -398,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
}
}
-void
-xfs_sb_from_disk(
+static void
+__xfs_sb_from_disk(
struct xfs_sb *to,
- xfs_dsb_t *from)
+ xfs_dsb_t *from,
+ bool convert_xquota)
{
to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
@@ -457,6 +446,17 @@ xfs_sb_from_disk(
to->sb_pad = 0;
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
+ /* Convert on-disk flags to in-memory flags? */
+ if (convert_xquota)
+ xfs_sb_quota_from_disk(to);
+}
+
+void
+xfs_sb_from_disk(
+ struct xfs_sb *to,
+ xfs_dsb_t *from)
+{
+ __xfs_sb_from_disk(to, from, true);
}
static inline void
@@ -495,10 +495,16 @@ xfs_sb_quota_to_disk(
}
/*
- * GQUOTINO and PQUOTINO cannot be used together in versions
- * of superblock that do not have pquotino. from->sb_flags
- * tells us which quota is active and should be copied to
- * disk.
+ * GQUOTINO and PQUOTINO cannot be used together in versions of
+ * superblock that do not have pquotino. from->sb_flags tells us which
+ * quota is active and should be copied to disk. If neither are active,
+ * make sure we write NULLFSINO to the sb_gquotino field as a quota
+ * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+ * bit is set.
+ *
+ * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+ * as they do not require any translation. Hence the main sb field loop
+ * will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -506,6 +512,17 @@ xfs_sb_quota_to_disk(
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+ else {
+ /*
+ * We can't rely on just the fields being logged to tell us
+ * that it is safe to write NULLFSINO - we should only do that
+ * if quotas are not actually enabled. Hence only write
+ * NULLFSINO if both in-core quota inodes are NULL.
+ */
+ if (from->sb_gquotino == NULLFSINO &&
+ from->sb_pquotino == NULLFSINO)
+ to->sb_gquotino = cpu_to_be64(NULLFSINO);
+ }
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}
@@ -572,7 +589,11 @@ xfs_sb_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_sb sb;
- xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+ /*
+ * Use call variant which doesn't convert quota flags from disk
+ * format, because xfs_mount_validate_sb checks the on-disk flags.
+ */
+ __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
/*
* Only check the in progress field for the primary superblock as
@@ -611,12 +632,11 @@ xfs_sb_read_verify(
XFS_SB_VERSION_5) ||
dsb->sb_crc != 0)) {
- if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
- offsetof(struct xfs_sb, sb_crc))) {
+ if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
/* Only fail bad secondaries on a known V5 filesystem */
- if (bp->b_bn != XFS_SB_DADDR &&
+ if (bp->b_bn == XFS_SB_DADDR ||
xfs_sb_version_hascrc(&mp->m_sb)) {
- error = EFSCORRUPTED;
+ error = -EFSBADCRC;
goto out_error;
}
}
@@ -625,10 +645,9 @@ xfs_sb_read_verify(
out_error:
if (error) {
- if (error != EWRONGFS)
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
+ if (error == -EFSCORRUPTED || error == -EFSBADCRC)
+ xfs_verifier_error(bp);
}
}
@@ -644,14 +663,13 @@ xfs_sb_quiet_read_verify(
{
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
-
if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
/* XFS filesystem, verify noisily! */
xfs_sb_read_verify(bp);
return;
}
/* quietly fail */
- xfs_buf_ioerror(bp, EWRONGFS);
+ xfs_buf_ioerror(bp, -EWRONGFS);
}
static void
@@ -664,9 +682,8 @@ xfs_sb_write_verify(
error = xfs_sb_verify(bp, false);
if (error) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
+ xfs_verifier_error(bp);
return;
}
@@ -676,8 +693,7 @@ xfs_sb_write_verify(
if (bip)
XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_sb, sb_crc));
+ xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 35061d4b614c..2e739708afd3 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -36,8 +36,6 @@ struct xfs_trans;
#define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */
#define XFS_SB_VERSION_NUMBITS 0x000f
#define XFS_SB_VERSION_ALLFBITS 0xfff0
-#define XFS_SB_VERSION_SASHFBITS 0xf000
-#define XFS_SB_VERSION_REALFBITS 0x0ff0
#define XFS_SB_VERSION_ATTRBIT 0x0010
#define XFS_SB_VERSION_NLINKBIT 0x0020
#define XFS_SB_VERSION_QUOTABIT 0x0040
@@ -50,24 +48,15 @@ struct xfs_trans;
#define XFS_SB_VERSION_DIRV2BIT 0x2000
#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
-#define XFS_SB_VERSION_OKSASHFBITS \
- (XFS_SB_VERSION_EXTFLGBIT | \
- XFS_SB_VERSION_DIRV2BIT | \
- XFS_SB_VERSION_BORGBIT)
-#define XFS_SB_VERSION_OKREALFBITS \
- (XFS_SB_VERSION_ATTRBIT | \
- XFS_SB_VERSION_NLINKBIT | \
- XFS_SB_VERSION_QUOTABIT | \
- XFS_SB_VERSION_ALIGNBIT | \
- XFS_SB_VERSION_DALIGNBIT | \
- XFS_SB_VERSION_SHAREDBIT | \
- XFS_SB_VERSION_LOGV2BIT | \
- XFS_SB_VERSION_SECTORBIT | \
- XFS_SB_VERSION_MOREBITSBIT)
-#define XFS_SB_VERSION_OKREALBITS \
- (XFS_SB_VERSION_NUMBITS | \
- XFS_SB_VERSION_OKREALFBITS | \
- XFS_SB_VERSION_OKSASHFBITS)
+
+/*
+ * Supported feature bit list is just all bits in the versionnum field because
+ * we've used them all up and understand them all. Except, of course, for the
+ * shared superblock bit, which nobody knows what it does and so is unsupported.
+ */
+#define XFS_SB_VERSION_OKBITS \
+ ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \
+ ~XFS_SB_VERSION_SHAREDBIT)
/*
* There are two words to hold XFS "feature" bits: the original
@@ -76,7 +65,6 @@ struct xfs_trans;
*
* These defines represent bits in sb_features2.
*/
-#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */
#define XFS_SB_VERSION2_RESERVED1BIT 0x00000001
#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
@@ -86,16 +74,11 @@ struct xfs_trans;
#define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */
#define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */
-#define XFS_SB_VERSION2_OKREALFBITS \
+#define XFS_SB_VERSION2_OKBITS \
(XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
XFS_SB_VERSION2_ATTR2BIT | \
XFS_SB_VERSION2_PROJID32BIT | \
XFS_SB_VERSION2_FTYPE)
-#define XFS_SB_VERSION2_OKSASHFBITS \
- (0)
-#define XFS_SB_VERSION2_OKREALBITS \
- (XFS_SB_VERSION2_OKREALFBITS | \
- XFS_SB_VERSION2_OKSASHFBITS )
/*
* Superblock - in core version. Must match the ondisk version below.
@@ -104,11 +87,11 @@ struct xfs_trans;
typedef struct xfs_sb {
__uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
__uint32_t sb_blocksize; /* logical block size, bytes */
- xfs_drfsbno_t sb_dblocks; /* number of data blocks */
- xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */
- xfs_drtbno_t sb_rextents; /* number of realtime extents */
+ xfs_rfsblock_t sb_dblocks; /* number of data blocks */
+ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
+ xfs_rtblock_t sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* file system unique id */
- xfs_dfsbno_t sb_logstart; /* starting block of log if internal */
+ xfs_fsblock_t sb_logstart; /* starting block of log if internal */
xfs_ino_t sb_rootino; /* root inode number */
xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */
xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */
@@ -182,6 +165,8 @@ typedef struct xfs_sb {
/* must be padded to 64 bit alignment */
} xfs_sb_t;
+#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
+
/*
* Superblock - on disk version. Must match the in core version above.
* Must be padded to 64 bit alignment.
@@ -343,214 +328,140 @@ typedef enum {
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
-static inline int xfs_sb_good_version(xfs_sb_t *sbp)
-{
- /* We always support version 1-3 */
- if (sbp->sb_versionnum >= XFS_SB_VERSION_1 &&
- sbp->sb_versionnum <= XFS_SB_VERSION_3)
- return 1;
-
- /* We support version 4 if all feature bits are supported */
- if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) {
- if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) ||
- ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
- (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
- return 0;
-
- if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
- return 0;
- return 1;
- }
- if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
- return 1;
-
- return 0;
-}
-
/*
- * Detect a mismatched features2 field. Older kernels read/wrote
- * this into the wrong slot, so to be safe we keep them in sync.
+ * The first XFS version we support is a v4 superblock with V2 directories.
*/
-static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp)
+static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
{
- return (sbp->sb_bad_features2 != sbp->sb_features2);
-}
-
-static inline unsigned xfs_sb_version_tonew(unsigned v)
-{
- if (v == XFS_SB_VERSION_1)
- return XFS_SB_VERSION_4;
-
- if (v == XFS_SB_VERSION_2)
- return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
+ if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
+ return false;
- return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT |
- XFS_SB_VERSION_NLINKBIT;
-}
+ /* check for unknown features in the fs */
+ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
+ ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
+ (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
+ return false;
-static inline unsigned xfs_sb_version_toold(unsigned v)
-{
- if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT))
- return 0;
- if (v & XFS_SB_VERSION_NLINKBIT)
- return XFS_SB_VERSION_3;
- if (v & XFS_SB_VERSION_ATTRBIT)
- return XFS_SB_VERSION_2;
- return XFS_SB_VERSION_1;
+ return true;
}
-static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
+static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
{
- return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
- sbp->sb_versionnum == XFS_SB_VERSION_3 ||
- (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
+ if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
+ return true;
+ if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
+ return xfs_sb_good_v4_features(sbp);
+ return false;
}
-static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
+/*
+ * Detect a mismatched features2 field. Older kernels read/wrote
+ * this into the wrong slot, so to be safe we keep them in sync.
+ */
+static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
{
- if (sbp->sb_versionnum == XFS_SB_VERSION_1)
- sbp->sb_versionnum = XFS_SB_VERSION_2;
- else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
- sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
- else
- sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
+ return sbp->sb_bad_features2 != sbp->sb_features2;
}
-static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp)
{
- return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
- (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
+ return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT);
}
-static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)
{
- if (sbp->sb_versionnum <= XFS_SB_VERSION_2)
- sbp->sb_versionnum = XFS_SB_VERSION_3;
- else
- sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
+ sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
}
-static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp)
{
- return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
+ return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
}
-static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addquota(struct xfs_sb *sbp)
{
- if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
- sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
- else
- sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
- XFS_SB_VERSION_QUOTABIT;
+ sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
}
-static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp)
{
- return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
- (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
}
-static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
-{
- return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
-}
-
-static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp)
{
- return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
+ return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
}
-static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
{
- return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
- (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT));
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+ (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
}
-static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
{
- return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
- (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT));
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+ (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
}
-static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
{
- return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
- (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT));
+ return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
}
-static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp)
{
- return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
+ return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
}
-static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
{
- return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
-}
-
-static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
-{
- return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
- (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
- (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT));
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
+ (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
}
/*
* sb_features2 bit version macros.
- *
- * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
- *
- * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
- * ((xfs_sb_version_hasmorebits(sbp) &&
- * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
*/
-
-static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
}
-static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
}
-static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
+ sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT;
}
-static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)
{
sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+ sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
if (!sbp->sb_features2)
sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
}
-static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
(xfs_sb_version_hasmorebits(sbp) &&
(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
}
-static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
{
sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
@@ -585,7 +496,9 @@ xfs_sb_has_compat_feature(
return (sbp->sb_features_compat & feature) != 0;
}
-#define XFS_SB_FEAT_RO_COMPAT_ALL 0
+#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_ALL \
+ (XFS_SB_FEAT_RO_COMPAT_FINOBT)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -621,12 +534,12 @@ xfs_sb_has_incompat_log_feature(
/*
* V5 superblock specific feature checks
*/
-static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
+static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
-static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp)
+static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
@@ -639,6 +552,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
(sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
}
+static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
+}
+
/*
* end of superblock version macros
*/
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 8c5035a13df1..82404da2ca67 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
#define XFS_TRANS_SB_COUNT 41
#define XFS_TRANS_CHECKPOINT 42
#define XFS_TRANS_ICREATE 43
-#define XFS_TRANS_TYPE_MAX 43
+#define XFS_TRANS_CREATE_TMPFILE 44
+#define XFS_TRANS_TYPE_MAX 44
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
{ XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
{ XFS_TRANS_INACTIVE, "INACTIVE" }, \
{ XFS_TRANS_CREATE, "CREATE" }, \
+ { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
{ XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
{ XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
{ XFS_TRANS_REMOVE, "REMOVE" }, \
@@ -236,7 +238,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
-bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index bf59a2b45f8c..5782f037eab4 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -80,7 +80,6 @@ xfs_symlink_hdr_set(
*/
bool
xfs_symlink_hdr_ok(
- struct xfs_mount *mp,
xfs_ino_t ino,
uint32_t offset,
uint32_t size,
@@ -133,12 +132,13 @@ xfs_symlink_read_verify(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
- !xfs_symlink_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_symlink_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -153,8 +153,8 @@ xfs_symlink_write_verify(
return;
if (!xfs_symlink_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -162,8 +162,7 @@ xfs_symlink_write_verify(
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
}
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_dsymlink_hdr, sl_crc));
+ xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 2fd59c0dae66..f2bda7c76b8a 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -26,6 +26,7 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
@@ -81,20 +82,69 @@ xfs_calc_buf_res(
* on disk. Hence we need an inode reservation function that calculates all this
* correctly. So, we log:
*
- * - log op headers for object
+ * - 4 log op headers for object
+ * - for the ilf, the inode core and 2 forks
* - inode log format object
- * - the entire inode contents (core + 2 forks)
- * - two bmap btree block headers
+ * - the inode core
+ * - two inode forks containing bmap btree root blocks.
+ * - the btree data contained by both forks will fit into the inode size,
+ * hence when combined with the inode core above, we have a total of the
+ * actual inode size.
+ * - the BMBT headers need to be accounted separately, as they are
+ * additional to the records and pointers that fit inside the inode
+ * forks.
*/
STATIC uint
xfs_calc_inode_res(
struct xfs_mount *mp,
uint ninodes)
{
- return ninodes * (sizeof(struct xlog_op_header) +
- sizeof(struct xfs_inode_log_format) +
- mp->m_sb.sb_inodesize +
- 2 * XFS_BMBT_BLOCK_LEN(mp));
+ return ninodes *
+ (4 * sizeof(struct xlog_op_header) +
+ sizeof(struct xfs_inode_log_format) +
+ mp->m_sb.sb_inodesize +
+ 2 * XFS_BMBT_BLOCK_LEN(mp));
+}
+
+/*
+ * The free inode btree is a conditional feature and the log reservation
+ * requirements differ slightly from that of the traditional inode allocation
+ * btree. The finobt tracks records for inode chunks with at least one free
+ * inode. A record can be removed from the tree for an inode allocation
+ * or free and thus the finobt reservation is unconditional across:
+ *
+ * - inode allocation
+ * - inode free
+ * - inode chunk allocation
+ *
+ * The 'modify' param indicates to include the record modification scenario. The
+ * 'alloc' param indicates to include the reservation for free space btree
+ * modifications on behalf of finobt modifications. This is required only for
+ * transactions that do not already account for free space btree modifications.
+ *
+ * the free inode btree: max depth * block size
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the free inode btree entry: block size
+ */
+STATIC uint
+xfs_calc_finobt_res(
+ struct xfs_mount *mp,
+ int alloc,
+ int modify)
+{
+ uint res;
+
+ if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+ return 0;
+
+ res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
+ if (alloc)
+ res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+ if (modify)
+ res += (uint)XFS_FSB_TO_B(mp, 1);
+
+ return res;
}
/*
@@ -174,7 +224,7 @@ xfs_calc_itruncate_reservation(
xfs_calc_buf_res(5, 0) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+ xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0)));
}
@@ -204,6 +254,19 @@ xfs_calc_rename_reservation(
}
/*
+ * For removing an inode from unlinked list at first, we can modify:
+ * the agi hash list and counters: sector size
+ * the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+
+/*
* For creating a link to an inode:
* the parent directory inode: inode size
* the linked inode: inode size
@@ -220,6 +283,7 @@ xfs_calc_link_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
+ xfs_calc_iunlink_remove_reservation(mp) +
MAX((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
@@ -229,6 +293,18 @@ xfs_calc_link_reservation(
}
/*
+ * For adding an inode to unlinked list we can modify:
+ * the agi hash list: sector size
+ * the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ xfs_calc_inode_res(mp, 1);
+}
+
+/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
* the removed inode: inode size
@@ -245,10 +321,11 @@ xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((xfs_calc_inode_res(mp, 2) +
+ xfs_calc_iunlink_add_reservation(mp) +
+ MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
@@ -267,6 +344,7 @@ xfs_calc_remove_reservation(
* the superblock for the nlink flag: sector size
* the directory btree: (max depth + v2) * dir block size
* the directory inode's bmap btree: (max depth + v2) * block size
+ * the finobt (record modification and allocation btrees)
*/
STATIC uint
xfs_calc_create_resv_modify(
@@ -275,14 +353,15 @@ xfs_calc_create_resv_modify(
return xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_finobt_res(mp, 1, 1);
}
/*
* For create we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode blocks allocated: mp->m_ialloc_blks * blocksize
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
*/
@@ -292,7 +371,7 @@ xfs_calc_create_resv_alloc(
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
XFS_FSB_TO_B(mp, 1));
@@ -313,6 +392,7 @@ __xfs_calc_create_reservation(
* the superblock for the nlink flag: sector size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the finobt (record insertion)
*/
STATIC uint
xfs_calc_icreate_resv_alloc(
@@ -322,7 +402,8 @@ xfs_calc_icreate_resv_alloc(
mp->m_sb.sb_sectsize +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_finobt_res(mp, 0, 0);
}
STATIC uint
@@ -343,6 +424,20 @@ xfs_calc_create_reservation(
}
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+ struct xfs_mount *mp)
+{
+ uint res = XFS_DQUOT_LOGRES(mp);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ res += xfs_calc_icreate_resv_alloc(mp);
+ else
+ res += xfs_calc_create_resv_alloc(mp);
+
+ return res + xfs_calc_iunlink_add_reservation(mp);
+}
+
/*
* Making a new directory is the same as creating a new file.
*/
@@ -376,6 +471,7 @@ xfs_calc_symlink_reservation(
* the on disk inode before ours in the agi hash list: inode cluster size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the finobt (record insertion, removal or modification)
*/
STATIC uint
xfs_calc_ifree_reservation(
@@ -383,14 +479,15 @@ xfs_calc_ifree_reservation(
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
+ xfs_calc_iunlink_remove_reservation(mp) +
xfs_calc_buf_res(1, 0) +
- xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+ xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_finobt_res(mp, 0, 1);
}
/*
@@ -513,7 +610,7 @@ xfs_calc_addafork_reservation(
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(1, mp->m_dirblksize) +
+ xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
@@ -644,15 +741,14 @@ xfs_calc_qm_setqlim_reservation(
/*
* Allocating quota on disk if needed.
- * the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ * the write transaction log space for quota file extent allocation
* the unit of quota allocation: one system block size
*/
STATIC uint
xfs_calc_qm_dqalloc_reservation(
struct xfs_mount *mp)
{
- ASSERT(M_RES(mp)->tr_write.tr_logres);
- return M_RES(mp)->tr_write.tr_logres +
+ return xfs_calc_write_reservation(mp) +
xfs_calc_buf_res(1,
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
}
@@ -729,6 +825,11 @@ xfs_trans_resv_calc(
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ resp->tr_create_tmpfile.tr_logres =
+ xfs_calc_create_tmpfile_reservation(mp);
+ resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+ resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +885,6 @@ xfs_trans_resv_calc(
/* The following transaction are logged in logical format */
resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
- resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index de7de9aaad8a..1097d14cd583 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
struct xfs_trans_res tr_remove; /* unlink trans */
struct xfs_trans_res tr_symlink; /* symlink trans */
struct xfs_trans_res tr_create; /* create trans */
+ struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */
struct xfs_trans_res tr_mkdir; /* mkdir trans */
struct xfs_trans_res tr_ifree; /* inode free trans */
struct xfs_trans_res tr_ichange; /* inode update trans */
struct xfs_trans_res tr_growdata; /* fs data section grow trans */
- struct xfs_trans_res tr_swrite; /* sync write inode trans */
struct xfs_trans_res tr_addafork; /* add inode attr fork trans */
struct xfs_trans_res tr_writeid; /* write setuid/setgid file */
struct xfs_trans_res tr_attrinval; /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
#define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2
+#define XFS_CREATE_TMPFILE_LOG_COUNT 2
#define XFS_MKDIR_LOG_COUNT 3
#define XFS_SYMLINK_LOG_COUNT 3
#define XFS_REMOVE_LOG_COUNT 2
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 7d2c920dfb9c..bf9c4579334d 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -28,7 +28,8 @@
(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
XFS_EXTENTADD_SPACE_RES(mp,w))
-#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
+#define XFS_DAENTER_1B(mp,w) \
+ ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
#define XFS_DAENTER_DBS(mp,w) \
(XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))
#define XFS_DAENTER_BLOCKS(mp,w) \
@@ -47,13 +48,15 @@
#define XFS_DIRREMOVE_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
#define XFS_IALLOC_SPACE_RES(mp) \
- (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1)
+ ((mp)->m_ialloc_blks + \
+ (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
+ ((mp)->m_in_maxlevels - 1)))
/*
* Space reservation values for various transactions.
*/
#define XFS_ADDAFORK_SPACE_RES(mp) \
- ((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
+ ((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))
#define XFS_ATTRRM_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
/* This macro is not used - see inline code in xfs_attr_set */
@@ -82,5 +85,8 @@
(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
+#define XFS_IFREE_SPACE_RES(mp) \
+ (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+
#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 370eb3e121d1..a65fa5dde6e9 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -124,16 +124,12 @@ struct posix_acl *
xfs_get_acl(struct inode *inode, int type)
{
struct xfs_inode *ip = XFS_I(inode);
- struct posix_acl *acl;
+ struct posix_acl *acl = NULL;
struct xfs_acl *xfs_acl;
unsigned char *ea_name;
int error;
int len;
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
trace_xfs_get_acl(ip);
switch (type) {
@@ -156,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
if (!xfs_acl)
return ERR_PTR(-ENOMEM);
- error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+ error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
&len, ATTR_ROOT);
if (error) {
/*
@@ -164,10 +160,8 @@ xfs_get_acl(struct inode *inode, int type)
* cache entry, for any other error assume it is transient and
* leave the cache entry as ACL_NOT_CACHED.
*/
- if (error == -ENOATTR) {
- acl = NULL;
+ if (error == -ENOATTR)
goto out_update_cache;
- }
goto out;
}
@@ -183,15 +177,12 @@ out:
}
STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
{
struct xfs_inode *ip = XFS_I(inode);
unsigned char *ea_name;
int error;
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
switch (type) {
case ACL_TYPE_ACCESS:
ea_name = SGI_ACL_FILE;
@@ -219,7 +210,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
len -= sizeof(struct xfs_acl_entry) *
(XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
- error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+ error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
len, ATTR_ROOT);
kmem_free(xfs_acl);
@@ -227,7 +218,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
/*
* A NULL ACL argument means we want to remove the ACL.
*/
- error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+ error = xfs_attr_remove(ip, ea_name, ATTR_ROOT);
/*
* If the attribute didn't exist to start with that's fine.
@@ -253,7 +244,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
iattr.ia_mode = mode;
iattr.ia_ctime = current_fs_time(inode->i_sb);
- error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
return error;
@@ -282,131 +273,23 @@ posix_acl_default_exists(struct inode *inode)
return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
}
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
+xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- umode_t mode = inode->i_mode;
- int error = 0, inherit = 0;
-
- if (S_ISDIR(inode->i_mode)) {
- error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
- if (error)
- goto out;
- }
-
- error = posix_acl_create(&acl, GFP_KERNEL, &mode);
- if (error < 0)
- return error;
-
- /*
- * If posix_acl_create returns a positive value we need to
- * inherit a permission that can't be represented using the Unix
- * mode bits and we actually need to set an ACL.
- */
- if (error > 0)
- inherit = 1;
-
- error = xfs_set_mode(inode, mode);
- if (error)
- goto out;
-
- if (inherit)
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-
-out:
- posix_acl_release(acl);
- return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
- struct posix_acl *acl;
- int error;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
-
- error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (error)
- return error;
-
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- posix_acl_release(acl);
- return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
- void *value, size_t size, int type)
-{
- struct posix_acl *acl;
- int error;
-
- acl = xfs_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
-
- error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl = NULL;
int error = 0;
- if (flags & XATTR_CREATE)
- return -EINVAL;
- if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
- return value ? -EACCES : 0;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (!value)
+ if (!acl)
goto set_acl;
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (!acl) {
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- goto out;
- }
- if (IS_ERR(acl)) {
- error = PTR_ERR(acl);
- goto out;
- }
-
- error = posix_acl_valid(acl);
- if (error)
- goto out_release;
-
- error = -EINVAL;
+ error = -E2BIG;
if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
- goto out_release;
+ return error;
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
error = posix_acl_equiv_mode(acl, &mode);
if (error <= 0) {
- posix_acl_release(acl);
acl = NULL;
if (error < 0)
@@ -415,27 +298,9 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
error = xfs_set_mode(inode, mode);
if (error)
- goto out_release;
+ return error;
}
set_acl:
- error = xfs_set_acl(inode, type, acl);
- out_release:
- posix_acl_release(acl);
- out:
- return error;
+ return __xfs_set_acl(inode, type, acl);
}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .get = xfs_xattr_acl_get,
- .set = xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .get = xfs_xattr_acl_get,
- .set = xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 4016a567b83c..5dc163744511 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -60,20 +60,15 @@ struct xfs_acl {
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
-extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
-extern int xfs_acl_chmod(struct inode *inode);
+extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
extern int posix_acl_access_exists(struct inode *inode);
extern int posix_acl_default_exists(struct inode *inode);
-
-extern const struct xattr_handler xfs_xattr_acl_access_handler;
-extern const struct xattr_handler xfs_xattr_acl_default_handler;
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
return NULL;
}
-# define xfs_inherit_acl(inode, default_acl) 0
-# define xfs_acl_chmod(inode) 0
+# define xfs_set_acl NULL
# define posix_acl_access_exists(inode) 0
# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 71c8c9d2b882..b984647c24db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -240,7 +240,7 @@ xfs_end_io(
done:
if (error)
- ioend->io_error = -error;
+ ioend->io_error = error;
xfs_destroy_ioend(ioend);
}
@@ -308,14 +308,14 @@ xfs_map_blocks(
int nimaps = 1;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
if (nonblocking)
- return -XFS_ERROR(EAGAIN);
+ return -EAGAIN;
xfs_ilock(ip, XFS_ILOCK_SHARED);
}
@@ -332,14 +332,14 @@ xfs_map_blocks(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
- return -XFS_ERROR(error);
+ return error;
if (type == XFS_IO_DELALLOC &&
(!nimaps || isnullstartblock(imap->br_startblock))) {
error = xfs_iomap_write_allocate(ip, offset, imap);
if (!error)
trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
- return -XFS_ERROR(error);
+ return error;
}
#ifdef DEBUG
@@ -407,7 +407,7 @@ xfs_alloc_ioend_bio(
struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
ASSERT(bio->bi_private == NULL);
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
return bio;
}
@@ -502,7 +502,7 @@ xfs_submit_ioend(
* time.
*/
if (fail) {
- ioend->io_error = -fail;
+ ioend->io_error = fail;
xfs_finish_ioend(ioend);
continue;
}
@@ -632,38 +632,46 @@ xfs_map_at_offset(
}
/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
+ * Test if a given page contains at least one buffer of a given @type.
+ * If @check_all_buffers is true, then we walk all the buffers in the page to
+ * try to find one of the type passed in. If it is not set, then the caller only
+ * needs to check the first buffer on the page for a match.
*/
-STATIC int
+STATIC bool
xfs_check_page_type(
struct page *page,
- unsigned int type)
+ unsigned int type,
+ bool check_all_buffers)
{
- if (PageWriteback(page))
- return 0;
+ struct buffer_head *bh;
+ struct buffer_head *head;
- if (page->mapping && page_has_buffers(page)) {
- struct buffer_head *bh, *head;
- int acceptable = 0;
+ if (PageWriteback(page))
+ return false;
+ if (!page->mapping)
+ return false;
+ if (!page_has_buffers(page))
+ return false;
- bh = head = page_buffers(page);
- do {
- if (buffer_unwritten(bh))
- acceptable += (type == XFS_IO_UNWRITTEN);
- else if (buffer_delay(bh))
- acceptable += (type == XFS_IO_DELALLOC);
- else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable += (type == XFS_IO_OVERWRITE);
- else
- break;
- } while ((bh = bh->b_this_page) != head);
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_unwritten(bh)) {
+ if (type == XFS_IO_UNWRITTEN)
+ return true;
+ } else if (buffer_delay(bh)) {
+ if (type == XFS_IO_DELALLOC)
+ return true;
+ } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
+ if (type == XFS_IO_OVERWRITE)
+ return true;
+ }
- if (acceptable)
- return 1;
- }
+ /* If we are only checking the first buffer, we are done now. */
+ if (!check_all_buffers)
+ break;
+ } while ((bh = bh->b_this_page) != head);
- return 0;
+ return false;
}
/*
@@ -697,7 +705,7 @@ xfs_convert_page(
goto fail_unlock_page;
if (page->mapping != inode->i_mapping)
goto fail_unlock_page;
- if (!xfs_check_page_type(page, (*ioendp)->io_type))
+ if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
goto fail_unlock_page;
/*
@@ -742,6 +750,15 @@ xfs_convert_page(
p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
page_dirty = p_offset / len;
+ /*
+ * The moment we find a buffer that doesn't match our current type
+ * specification or can't be written, abort the loop and start
+ * writeback. As per the above xfs_imap_valid() check, only
+ * xfs_vm_writepage() can handle partial page writeback fully - we are
+ * limited here to the buffers that are contiguous with the current
+ * ioend, and hence a buffer we can't write breaks that contiguity and
+ * we have to defer the rest of the IO to xfs_vm_writepage().
+ */
bh = head = page_buffers(page);
do {
if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
uptodate = 0;
if (!(PageUptodate(page) || buffer_uptodate(bh))) {
done = 1;
- continue;
+ break;
}
if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
else
type = XFS_IO_OVERWRITE;
- if (!xfs_imap_valid(inode, imap, offset)) {
- done = 1;
- continue;
- }
+ /*
+ * imap should always be valid because of the above
+ * partial page end_offset check on the imap.
+ */
+ ASSERT(xfs_imap_valid(inode, imap, offset));
lock_buffer(bh);
if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
count++;
} else {
done = 1;
+ break;
}
} while (offset += len, (bh = bh->b_this_page) != head);
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
+ if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
goto out_invalidate;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -956,14 +975,39 @@ xfs_vm_writepage(
* Given that we do not allow direct reclaim to call us, we should
* never be called while in a filesystem transaction.
*/
- if (WARN_ON(current->flags & PF_FSTRANS))
+ if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
goto redirty;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
end_index = offset >> PAGE_CACHE_SHIFT;
last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
- if (page->index >= end_index) {
+
+ /*
+ * The page index is less than the end_index, adjust the end_offset
+ * to the highest offset that this page should represent.
+ * -----------------------------------------------------
+ * | file mapping | <EOF> |
+ * -----------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | |
+ * ^--------------------------------^----------|--------
+ * | desired writeback range | see else |
+ * ---------------------------------^------------------|
+ */
+ if (page->index < end_index)
+ end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+ else {
+ /*
+ * Check whether the page to write out is beyond or straddles
+ * i_size or not.
+ * -------------------------------------------------------
+ * | file mapping | <EOF> |
+ * -------------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
+ * ^--------------------------------^-----------|---------
+ * | | Straddles |
+ * ---------------------------------^-----------|--------|
+ */
unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
/*
@@ -971,24 +1015,36 @@ xfs_vm_writepage(
* truncate operation that is in progress. We must redirty the
* page so that reclaim stops reclaiming it. Otherwise
* xfs_vm_releasepage() is called on it and gets confused.
+ *
+ * Note that the end_index is unsigned long, it would overflow
+ * if the given offset is greater than 16TB on 32-bit system
+ * and if we do check the page is fully outside i_size or not
+ * via "if (page->index >= end_index + 1)" as "end_index + 1"
+ * will be evaluated to 0. Hence this page will be redirtied
+ * and be written out repeatedly which would result in an
+ * infinite loop, the user program that perform this operation
+ * will hang. Instead, we can verify this situation by checking
+ * if the page to write is totally beyond the i_size or if it's
+ * offset is just equal to the EOF.
*/
- if (page->index >= end_index + 1 || offset_into_page == 0)
+ if (page->index > end_index ||
+ (page->index == end_index && offset_into_page == 0))
goto redirty;
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
* "A file is mapped in multiples of the page size. For a file
- * that is not a multiple of the page size, the remaining
+ * that is not a multiple of the page size, the remaining
* memory is zeroed when mapped, and writes to that region are
* not written out to the file."
*/
zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+ /* Adjust the end_offset to the end of file */
+ end_offset = offset;
}
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- offset);
len = 1 << inode->i_blkbits;
bh = head = page_buffers(page);
@@ -1169,9 +1225,9 @@ xfs_vm_releasepage(
xfs_count_page_state(page, &delalloc, &unwritten);
- if (WARN_ON(delalloc))
+ if (WARN_ON_ONCE(delalloc))
return 0;
- if (WARN_ON(unwritten))
+ if (WARN_ON_ONCE(unwritten))
return 0;
return try_to_free_buffers(page);
@@ -1197,7 +1253,7 @@ __xfs_get_blocks(
int new = 0;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1217,7 +1273,7 @@ __xfs_get_blocks(
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockmode);
} else {
- lockmode = xfs_ilock_map_shared(ip);
+ lockmode = xfs_ilock_data_map_shared(ip);
}
ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -1246,7 +1302,7 @@ __xfs_get_blocks(
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)
- return -error;
+ return error;
new = 1;
} else {
/*
@@ -1325,6 +1381,14 @@ __xfs_get_blocks(
/*
* If this is O_DIRECT or the mpage code calling tell them how large
* the mapping is, so that we can avoid repeated get_blocks calls.
+ *
+ * If the mapping spans EOF, then we have to break the mapping up as the
+ * mapping for blocks beyond EOF must be marked new so that sub block
+ * regions can be correctly zeroed. We can't do this for mappings within
+ * EOF unless the mapping was just allocated or is unwritten, otherwise
+ * the callers would overwrite existing data with zeros. Hence we have
+ * to split the mapping into a range up to and including EOF, and a
+ * second mapping for beyond EOF.
*/
if (direct || size > (1 << inode->i_blkbits)) {
xfs_off_t mapping_size;
@@ -1335,6 +1399,12 @@ __xfs_get_blocks(
ASSERT(mapping_size > 0);
if (mapping_size > size)
mapping_size = size;
+ if (offset < i_size_read(inode) &&
+ offset + mapping_size >= i_size_read(inode)) {
+ /* limit mapping to block that spans EOF */
+ mapping_size = roundup_64(i_size_read(inode) - offset,
+ 1 << inode->i_blkbits);
+ }
if (mapping_size > LONG_MAX)
mapping_size = LONG_MAX;
@@ -1345,7 +1415,7 @@ __xfs_get_blocks(
out_unlock:
xfs_iunlock(ip, lockmode);
- return -error;
+ return error;
}
int
@@ -1416,9 +1486,8 @@ STATIC ssize_t
xfs_vm_direct_IO(
int rw,
struct kiocb *iocb,
- const struct iovec *iov,
- loff_t offset,
- unsigned long nr_segs)
+ struct iov_iter *iter,
+ loff_t offset)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
@@ -1426,7 +1495,7 @@ xfs_vm_direct_IO(
ssize_t ret;
if (rw & WRITE) {
- size_t size = iov_length(iov, nr_segs);
+ size_t size = iov_iter_count(iter);
/*
* We cannot preallocate a size update transaction here as we
@@ -1438,16 +1507,15 @@ xfs_vm_direct_IO(
if (offset + size > XFS_I(inode)->i_d.di_size)
ioend->io_isdirect = 1;
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
- offset, nr_segs,
- xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL, 0);
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ offset, xfs_get_blocks_direct,
+ xfs_end_io_direct_write, NULL,
+ DIO_ASYNC_EXTEND);
if (ret != -EIOCBQUEUED && iocb->private)
goto out_destroy_ioend;
} else {
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
- offset, nr_segs,
- xfs_get_blocks_direct,
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ offset, xfs_get_blocks_direct,
NULL, NULL, 0);
}
@@ -1546,6 +1614,16 @@ xfs_vm_write_failed(
xfs_vm_kill_delalloc_range(inode, block_offset,
block_offset + bh->b_size);
+
+ /*
+ * This buffer does not contain data anymore. make sure anyone
+ * who finds it knows that for certain.
+ */
+ clear_buffer_delay(bh);
+ clear_buffer_uptodate(bh);
+ clear_buffer_mapped(bh);
+ clear_buffer_new(bh);
+ clear_buffer_dirty(bh);
}
}
@@ -1579,12 +1657,21 @@ xfs_vm_write_begin(
status = __block_write_begin(page, pos, len, xfs_get_blocks);
if (unlikely(status)) {
struct inode *inode = mapping->host;
+ size_t isize = i_size_read(inode);
xfs_vm_write_failed(inode, page, pos, len);
unlock_page(page);
- if (pos + len > i_size_read(inode))
- truncate_pagecache(inode, i_size_read(inode));
+ /*
+ * If the write is beyond EOF, we only want to kill blocks
+ * allocated in this write, not blocks that were previously
+ * written successfully.
+ */
+ if (pos + len > isize) {
+ ssize_t start = max_t(ssize_t, pos, isize);
+
+ truncate_pagecache_range(inode, start, pos + len);
+ }
page_cache_release(page);
page = NULL;
@@ -1595,9 +1682,12 @@ xfs_vm_write_begin(
}
/*
- * On failure, we only need to kill delalloc blocks beyond EOF because they
- * will never be written. For blocks within EOF, generic_write_end() zeros them
- * so they are safe to leave alone and be written with all the other valid data.
+ * On failure, we only need to kill delalloc blocks beyond EOF in the range of
+ * this specific write because they will never be written. Previous writes
+ * beyond EOF where block allocation succeeded do not need to be trashed, so
+ * only new blocks from this write should be trashed. For blocks within
+ * EOF, generic_write_end() zeros them so they are safe to leave alone and be
+ * written with all the other valid data.
*/
STATIC int
xfs_vm_write_end(
@@ -1620,8 +1710,11 @@ xfs_vm_write_end(
loff_t to = pos + len;
if (to > isize) {
- truncate_pagecache(inode, isize);
+ /* only kill blocks in this write beyond EOF */
+ if (pos > isize)
+ isize = pos;
xfs_vm_kill_delalloc_range(inode, isize, to);
+ truncate_pagecache_range(inode, isize, to);
}
}
return ret;
@@ -1660,11 +1753,72 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+ struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ loff_t end_offset;
+ loff_t offset;
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ end_offset = i_size_read(inode);
+ offset = page_offset(page);
+
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ if (offset < end_offset)
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ offset += 1 << inode->i_blkbits;
+ } while (bh != head);
+ }
+ newly_dirty = !TestSetPageDirty(page);
+ spin_unlock(&mapping->private_lock);
+
+ if (newly_dirty) {
+ /* sigh - __set_page_dirty() is static, so copy it here, too */
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(!PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ }
+ return newly_dirty;
+}
+
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
+ .set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 09480c57f069..aa2a8b1838a2 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -76,7 +76,7 @@ xfs_attr3_leaf_freextent(
error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
- return(error);
+ return error;
}
ASSERT(nmap == 1);
ASSERT(map.br_startblock != DELAYSTARTBLOCK);
@@ -95,21 +95,21 @@ xfs_attr3_leaf_freextent(
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, 0);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
*/
error = xfs_trans_roll(trans, dp);
if (error)
- return (error);
+ return error;
}
tblkno += map.br_blockcount;
tblkcnt -= map.br_blockcount;
}
- return(0);
+ return 0;
}
/*
@@ -227,7 +227,7 @@ xfs_attr3_node_inactive(
*/
if (level > XFS_DA_NODE_MAXDEPTH) {
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
- return XFS_ERROR(EIO);
+ return -EIO;
}
node = bp->b_addr;
@@ -256,7 +256,7 @@ xfs_attr3_node_inactive(
error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
if (child_bp) {
/* save for re-read later */
child_blkno = XFS_BUF_ADDR(child_bp);
@@ -277,7 +277,7 @@ xfs_attr3_node_inactive(
child_bp);
break;
default:
- error = XFS_ERROR(EIO);
+ error = -EIO;
xfs_trans_brelse(*trans, child_bp);
break;
}
@@ -360,7 +360,7 @@ xfs_attr3_root_inactive(
error = xfs_attr3_leaf_inactive(trans, dp, bp);
break;
default:
- error = XFS_ERROR(EIO);
+ error = -EIO;
xfs_trans_brelse(*trans, bp);
break;
}
@@ -414,7 +414,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
if (error) {
xfs_trans_cancel(trans, 0);
- return(error);
+ return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -443,10 +443,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
out:
xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
}
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 2d174b128153..62db83ab6cbc 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -50,11 +50,11 @@ xfs_attr_shortform_compare(const void *a, const void *b)
sa = (xfs_attr_sf_sort_t *)a;
sb = (xfs_attr_sf_sort_t *)b;
if (sa->hash < sb->hash) {
- return(-1);
+ return -1;
} else if (sa->hash > sb->hash) {
- return(1);
+ return 1;
} else {
- return(sa->entno - sb->entno);
+ return sa->entno - sb->entno;
}
}
@@ -86,7 +86,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
ASSERT(sf != NULL);
if (!sf->hdr.count)
- return(0);
+ return 0;
cursor = context->cursor;
ASSERT(cursor != NULL);
@@ -124,7 +124,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
trace_xfs_attr_list_sf_all(context);
- return(0);
+ return 0;
}
/* do no more for a search callback */
@@ -150,7 +150,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount, sfe);
kmem_free(sbuf);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
sbp->entno = i;
@@ -188,7 +188,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
if (i == nsbuf) {
kmem_free(sbuf);
- return(0);
+ return 0;
}
/*
@@ -213,7 +213,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
kmem_free(sbuf);
- return(0);
+ return 0;
}
STATIC int
@@ -243,8 +243,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (cursor->blkno > 0) {
error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
- if ((error != 0) && (error != EFSCORRUPTED))
- return(error);
+ if ((error != 0) && (error != -EFSCORRUPTED))
+ return error;
if (bp) {
struct xfs_attr_leaf_entry *entries;
@@ -295,7 +295,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
node = bp->b_addr;
magic = be16_to_cpu(node->hdr.info.magic);
if (magic == XFS_ATTR_LEAF_MAGIC ||
@@ -308,7 +308,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
context->dp->i_mount,
node);
xfs_trans_brelse(NULL, bp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
dp->d_ops->node_hdr_from_disk(&nodehdr, node);
@@ -444,9 +444,11 @@ xfs_attr3_leaf_list_int(
xfs_da_args_t args;
memset((char *)&args, 0, sizeof(args));
+ args.geo = context->dp->i_mount->m_attr_geo;
args.dp = context->dp;
args.whichfork = XFS_ATTR_FORK;
args.valuelen = valuelen;
+ args.rmtvaluelen = valuelen;
args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
args.rmtblkcnt = xfs_attr3_rmt_blocks(
@@ -494,11 +496,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
context->cursor->blkno = 0;
error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
if (error)
- return XFS_ERROR(error);
+ return error;
error = xfs_attr3_leaf_list_int(bp, context);
xfs_trans_brelse(NULL, bp);
- return XFS_ERROR(error);
+ return error;
}
int
@@ -507,17 +509,17 @@ xfs_attr_list_int(
{
int error;
xfs_inode_t *dp = context->dp;
+ uint lock_mode;
XFS_STATS_INC(xs_attr_list);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
-
- xfs_ilock(dp, XFS_ILOCK_SHARED);
+ return -EIO;
/*
* Decide on what work routines to call based on the inode size.
*/
+ lock_mode = xfs_ilock_attr_map_shared(dp);
if (!xfs_inode_hasattr(dp)) {
error = 0;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
@@ -527,9 +529,7 @@ xfs_attr_list_int(
} else {
error = xfs_attr_node_list(context);
}
-
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
+ xfs_iunlock(dp, lock_mode);
return error;
}
@@ -616,16 +616,16 @@ xfs_attr_list(
* Validate the cursor.
*/
if (cursor->pad1 || cursor->pad2)
- return(XFS_ERROR(EINVAL));
+ return -EINVAL;
if ((cursor->initted == 0) &&
(cursor->hashval || cursor->blkno || cursor->offset))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Check for a properly aligned buffer.
*/
if (((long)buffer) & (sizeof(int)-1))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (flags & ATTR_KERNOVAL)
bufsize = 0;
@@ -648,6 +648,6 @@ xfs_attr_list(
alist->al_offset[0] = context.bufsize;
error = xfs_attr_list_int(&context);
- ASSERT(error >= 0);
+ ASSERT(error <= 0);
return error;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..1707980f9a4b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -133,7 +133,7 @@ xfs_bmap_finish(
mp = ntp->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp,
- (error == EFSCORRUPTED) ?
+ (error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
return error;
@@ -249,48 +249,6 @@ xfs_bmap_rtalloc(
}
/*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
- struct work_struct *work)
-{
- struct xfs_bmalloca *args = container_of(work,
- struct xfs_bmalloca, work);
- unsigned long pflags;
-
- /* we are in a transaction context here */
- current_set_flags_nested(&pflags, PF_FSTRANS);
-
- args->result = __xfs_bmapi_allocate(args);
- complete(args->done);
-
- current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
- struct xfs_bmalloca *args)
-{
- DECLARE_COMPLETION_ONSTACK(done);
-
- if (!args->stack_switch)
- return __xfs_bmapi_allocate(args);
-
-
- args->done = &done;
- INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
- queue_work(xfs_alloc_wq, &args->work);
- wait_for_completion(&done);
- return args->result;
-}
-
-/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
* the end of file for an empty fork, so 1 is returned in *eof in that case.
@@ -407,7 +365,7 @@ xfs_bmap_count_tree(
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
xfs_trans_brelse(tp, bp);
} else {
@@ -467,14 +425,14 @@ xfs_bmap_count_blocks(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
@@ -566,13 +524,13 @@ xfs_getbmap(
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
prealloced = 0;
@@ -581,7 +539,7 @@ xfs_getbmap(
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
@@ -601,38 +559,43 @@ xfs_getbmap(
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
- if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
- error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (whichfork == XFS_DATA_FORK) {
+ if (!(iflags & BMV_IF_DELALLOC) &&
+ (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock_iolock;
+
+ /*
+ * Even after flushing the inode, there can still be
+ * delalloc blocks on the inode beyond EOF due to
+ * speculative preallocation. These are not removed
+ * until the release function is called or the inode
+ * is inactivated. Hence we cannot assert here that
+ * ip->i_delayed_blks == 0.
+ */
}
- /*
- * even after flushing the inode, there can still be delalloc
- * blocks on the inode beyond EOF due to speculative
- * preallocation. These are not removed until the release
- * function is called or the inode is inactivated. Hence we
- * cannot assert here that ip->i_delayed_blks == 0.
- */
- }
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
+ } else {
+ lock = xfs_ilock_attr_map_shared(ip);
+ }
/*
* Don't let nex be bigger than the number of extents
@@ -648,7 +611,7 @@ xfs_getbmap(
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
- error = ENOMEM;
+ error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
@@ -737,7 +700,7 @@ xfs_getbmap(
out_free_map:
kmem_free(map);
out_unlock_ilock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -846,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
* have speculative prealloc/delalloc blocks to remove.
*/
if (VFS_I(ip)->i_size == 0 &&
- VN_CACHED(VFS_I(ip)) == 0 &&
+ VFS_I(ip)->i_mapping->nrpages == 0 &&
ip->i_delayed_blks == 0)
return false;
@@ -919,7 +882,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
- return EAGAIN;
+ return -EAGAIN;
}
}
@@ -992,14 +955,14 @@ xfs_alloc_file_space(
trace_xfs_alloc_file_space(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
if (len <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
@@ -1065,7 +1028,7 @@ xfs_alloc_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1102,7 +1065,7 @@ xfs_alloc_file_space(
allocated_fsb = imapp->br_blockcount;
if (nimaps == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
break;
}
@@ -1163,14 +1126,20 @@ xfs_zero_remaining_bytes(
mp->m_rtdev_targp : mp->m_ddev_targp,
BTOBB(mp->m_sb.sb_blocksize), 0);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_buf_unlock(bp);
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
+ uint lock_mode;
+
offset_fsb = XFS_B_TO_FSBT(mp, offset);
nimap = 1;
+
+ lock_mode = xfs_ilock_data_map_shared(ip);
error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
+ xfs_iunlock(ip, lock_mode);
+
if (error || nimap < 1)
break;
ASSERT(imap.br_blockcount >= 1);
@@ -1187,7 +1156,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNWRITE(bp);
XFS_BUF_READ(bp);
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = -EIO;
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1174,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNDONE(bp);
XFS_BUF_UNREAD(bp);
XFS_BUF_WRITE(bp);
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = -EIO;
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
@@ -1255,7 +1234,7 @@ xfs_free_file_space(
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
- error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);
if (error)
goto out;
@@ -1327,7 +1306,6 @@ xfs_free_file_space(
* the freeing of the space succeeds at ENOSPC.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
/*
@@ -1337,7 +1315,7 @@ xfs_free_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1397,6 +1375,8 @@ xfs_zero_file_space(
xfs_off_t end_boundary;
int error;
+ trace_xfs_zero_file_space(ip);
+
granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
/*
@@ -1411,9 +1391,18 @@ xfs_zero_file_space(
ASSERT(end_boundary <= offset + len);
if (start_boundary < end_boundary - 1) {
- /* punch out the page cache over the conversion range */
+ /*
+ * punch out delayed allocation blocks and the page cache over
+ * the conversion range
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip,
+ XFS_B_TO_FSBT(mp, start_boundary),
+ XFS_B_TO_FSB(mp, end_boundary - start_boundary));
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
truncate_pagecache_range(VFS_I(ip), start_boundary,
end_boundary - 1);
+
/* convert the blocks */
error = xfs_alloc_file_space(ip, start_boundary,
end_boundary - start_boundary - 1,
@@ -1446,6 +1435,120 @@ out:
}
/*
+ * xfs_collapse_file_space()
+ * This routine frees disk space and shift extent for the given file.
+ * The first thing we do is to free data blocks in the specified range
+ * by calling xfs_free_file_space(). It would also sync dirty data
+ * and invalidate page cache over the region on which collapse range
+ * is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t len)
+{
+ int done = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+ xfs_extnum_t current_ext = 0;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t shift_fsb;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ trace_xfs_collapse_file_space(ip);
+
+ start_fsb = XFS_B_TO_FSB(mp, offset + len);
+ shift_fsb = XFS_B_TO_FSB(mp, len);
+
+ /*
+ * Writeback the entire file and force remove any post-eof blocks. The
+ * writeback prevents changes to the extent list via concurrent
+ * writeback and the eofblocks trim prevents the extent shift algorithm
+ * from running into a post-eof delalloc extent.
+ *
+ * XXX: This is a temporary fix until the extent shift loop below is
+ * converted to use offsets and lookups within the ILOCK rather than
+ * carrying around the index into the extent list for the next
+ * iteration.
+ */
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ if (xfs_can_free_eofblocks(ip, true)) {
+ error = xfs_free_eofblocks(mp, ip, false);
+ if (error)
+ return error;
+ }
+
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ return error;
+
+ while (!error && !done) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+ /*
+ * We would need to reserve permanent block for transaction.
+ * This will come into picture when after shifting extent into
+ * hole we found that adjacent extents can be merged which
+ * may lead to freeing of a block during record update.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ break;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_bmap_init(&free_list, &first_block);
+
+ /*
+ * We are using the write transaction in which max 2 bmbt
+ * updates are allowed
+ */
+ error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+ shift_fsb, &current_ext,
+ &first_block, &free_list,
+ XFS_BMAP_MAX_SHIFT_EXTENTS);
+ if (error)
+ goto out;
+
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ goto out;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ return error;
+
+out:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
* attr1 because of the fixed fork offset, but attr2 has a dynamically sized
@@ -1474,14 +1577,14 @@ xfs_swap_extents_check_format(
/* Should never get a local format */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode has less extents that then temporary inode then
* why did userspace call us?
*/
if (ip->i_d.di_nextents < tip->i_d.di_nextents)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode is in extent form and the temp inode is in btree
@@ -1490,19 +1593,19 @@ xfs_swap_extents_check_format(
*/
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
- return EINVAL;
+ return -EINVAL;
/* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/*
* If we are in a btree format, check that the temp root block will fit
@@ -1516,26 +1619,50 @@ xfs_swap_extents_check_format(
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(ip) &&
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(tip) &&
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
return 0;
}
int
+xfs_swap_extent_flush(
+ struct xfs_inode *ip)
+{
+ int error;
+
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ truncate_pagecache_range(VFS_I(ip), 0, -1);
+
+ /* Verify O_DIRECT for ftmp */
+ if (VFS_I(ip)->i_mapping->nrpages)
+ return -EINVAL;
+
+ /*
+ * Don't try to swap extents on mmap()d files because we can't lock
+ * out races against page faults safely.
+ */
+ if (mapping_mapped(VFS_I(ip)->i_mapping))
+ return -EBUSY;
+ return 0;
+}
+
+int
xfs_swap_extents(
xfs_inode_t *ip, /* target inode */
xfs_inode_t *tip, /* tmp inode */
@@ -1550,51 +1677,57 @@ xfs_swap_extents(
int aforkblks = 0;
int taforkblks = 0;
__uint64_t tmp;
+ int lock_flags;
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
- error = XFS_ERROR(ENOMEM);
+ error = -ENOMEM;
goto out;
}
/*
- * we have to do two separate lock calls here to keep lockdep
- * happy. If we try to get all the locks in one call, lock will
- * report false positives when we drop the ILOCK and regain them
- * below.
+ * Lock up the inodes against other IO and truncate to begin with.
+ * Then we can ensure the inodes are flushed and have no page cache
+ * safely. Once we have done this we can take the ilocks and do the rest
+ * of the checks.
*/
+ lock_flags = XFS_IOLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
/* Verify both files are either real-time or non-realtime */
if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
- error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
+ error = xfs_swap_extent_flush(ip);
+ if (error)
+ goto out_unlock;
+ error = xfs_swap_extent_flush(tip);
if (error)
goto out_unlock;
- truncate_pagecache_range(VFS_I(tip), 0, -1);
- /* Verify O_DIRECT for ftmp */
- if (VN_CACHED(VFS_I(tip)) != 0) {
- error = XFS_ERROR(EINVAL);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_unlock;
}
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+ lock_flags |= XFS_ILOCK_EXCL;
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
sxp->sx_length != ip->i_d.di_size ||
sxp->sx_length != tip->i_d.di_size) {
- error = XFS_ERROR(EFAULT);
- goto out_unlock;
+ error = -EFAULT;
+ goto out_trans_cancel;
}
trace_xfs_swap_extent_before(ip, 0);
@@ -1606,7 +1739,7 @@ xfs_swap_extents(
xfs_notice(mp,
"%s: inode 0x%llx format is incompatible for exchanging.",
__func__, ip->i_ino);
- goto out_unlock;
+ goto out_trans_cancel;
}
/*
@@ -1620,43 +1753,9 @@ xfs_swap_extents(
(sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
+ error = -EBUSY;
+ goto out_trans_cancel;
}
-
- /* We need to fail if the file is memory mapped. Once we have tossed
- * all existing pages, the page fault will have no option
- * but to go to the filesystem for pages. By making the page fault call
- * vop_read (or write in the case of autogrow) they block on the iolock
- * until we have switched the extents.
- */
- if (VN_MAPPED(VFS_I(ip))) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL);
-
- /*
- * There is a race condition here since we gave up the
- * ilock. However, the data fork will not change since
- * we have the iolock (locked for truncation too) so we
- * are safe. We don't really care if non-io related
- * fields change.
- */
- truncate_pagecache_range(VFS_I(ip), 0, -1);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_IOLOCK_EXCL);
- xfs_trans_cancel(tp, 0);
- goto out;
- }
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
/*
* Count the number of extended attribute blocks
*/
@@ -1674,8 +1773,8 @@ xfs_swap_extents(
goto out_trans_cancel;
}
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ijoin(tp, tip, lock_flags);
/*
* Before we've swapped the forks, lets set the owners of the forks
@@ -1804,8 +1903,8 @@ out:
return error;
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, lock_flags);
+ xfs_iunlock(tip, lock_flags);
goto out;
out_trans_cancel:
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b25772..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -50,12 +50,11 @@ struct xfs_bmalloca {
xfs_extlen_t total; /* total blocks needed for xaction */
xfs_extlen_t minlen; /* minimum allocation size (blocks) */
xfs_extlen_t minleft; /* amount must be left after alloc */
- char eof; /* set if allocating past last extent */
- char wasdel; /* replacing a delayed allocation */
- char userdata;/* set if is user data */
- char aeof; /* allocated space at eof */
- char conv; /* overwriting unwritten extents */
- char stack_switch;
+ bool eof; /* set if allocating past last extent */
+ bool wasdel; /* replacing a delayed allocation */
+ bool userdata;/* set if is user data */
+ bool aeof; /* allocated space at eof */
+ bool conv; /* overwriting unwritten extents */
int flags;
struct completion *done;
struct work_struct work;
@@ -65,8 +64,6 @@ struct xfs_bmalloca {
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
@@ -99,6 +96,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
+int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+ xfs_off_t len);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..cd7b8ca9b064 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -130,7 +130,7 @@ xfs_buf_get_maps(
bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
KM_NOFS);
if (!bp->b_maps)
- return ENOMEM;
+ return -ENOMEM;
return 0;
}
@@ -216,8 +216,7 @@ _xfs_buf_alloc(
STATIC int
_xfs_buf_get_pages(
xfs_buf_t *bp,
- int page_count,
- xfs_buf_flags_t flags)
+ int page_count)
{
/* Make sure that we have a page list */
if (bp->b_pages == NULL) {
@@ -330,7 +329,7 @@ use_alloc_page:
end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
>> PAGE_SHIFT;
page_count = end - start;
- error = _xfs_buf_get_pages(bp, page_count, flags);
+ error = _xfs_buf_get_pages(bp, page_count);
if (unlikely(error))
return error;
@@ -345,7 +344,7 @@ retry:
if (unlikely(page == NULL)) {
if (flags & XBF_READ_AHEAD) {
bp->b_page_count = i;
- error = ENOMEM;
+ error = -ENOMEM;
goto out_free_pages;
}
@@ -396,7 +395,17 @@ _xfs_buf_map_pages(
bp->b_addr = NULL;
} else {
int retried = 0;
+ unsigned noio_flag;
+ /*
+ * vm_map_ram() will allocate auxillary structures (e.g.
+ * pagetables) with GFP_KERNEL, yet we are likely to be under
+ * GFP_NOFS context here. Hence we need to tell memory reclaim
+ * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+ * memory reclaim re-entering the filesystem here and
+ * potentially deadlocking.
+ */
+ noio_flag = memalloc_noio_save();
do {
bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-1, PAGE_KERNEL);
@@ -404,6 +413,7 @@ _xfs_buf_map_pages(
break;
vm_unmap_aliases();
} while (retried++ <= 1);
+ memalloc_noio_restore(noio_flag);
if (!bp->b_addr)
return -ENOMEM;
@@ -445,8 +455,8 @@ _xfs_buf_find(
numbytes = BBTOB(numblks);
/* Check for IOs smaller than the sector size / not sector aligned */
- ASSERT(!(numbytes < (1 << btp->bt_sshift)));
- ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
+ ASSERT(!(numbytes < btp->bt_meta_sectorsize));
+ ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
/*
* Corrupted block numbers can get through to here, unfortunately, so we
@@ -455,7 +465,7 @@ _xfs_buf_find(
eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
if (blkno >= eofs) {
/*
- * XXX (dgc): we should really be returning EFSCORRUPTED here,
+ * XXX (dgc): we should really be returning -EFSCORRUPTED here,
* but none of the higher level infrastructure supports
* returning a specific error on buffer lookup failures.
*/
@@ -698,7 +708,11 @@ xfs_buf_read_uncached(
bp->b_flags |= XBF_READ;
bp->b_ops = ops;
- xfsbdstrat(target->bt_mount, bp);
+ if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ xfs_buf_iorequest(bp);
xfs_buf_iowait(bp);
return bp;
}
@@ -763,7 +777,7 @@ xfs_buf_associate_memory(
bp->b_pages = NULL;
bp->b_addr = mem;
- rval = _xfs_buf_get_pages(bp, page_count, 0);
+ rval = _xfs_buf_get_pages(bp, page_count);
if (rval)
return rval;
@@ -796,7 +810,7 @@ xfs_buf_get_uncached(
goto fail;
page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
- error = _xfs_buf_get_pages(bp, page_count, 0);
+ error = _xfs_buf_get_pages(bp, page_count);
if (error)
goto fail_free_buf;
@@ -1038,8 +1052,8 @@ xfs_buf_ioerror(
xfs_buf_t *bp,
int error)
{
- ASSERT(error >= 0 && error <= 0xffff);
- bp->b_error = (unsigned short)error;
+ ASSERT(error <= 0 && error >= -1000);
+ bp->b_error = error;
trace_xfs_buf_ioerror(bp, error, _RET_IP_);
}
@@ -1050,7 +1064,7 @@ xfs_buf_ioerror_alert(
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
+ (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
}
/*
@@ -1069,7 +1083,7 @@ xfs_bioerror(
/*
* No need to wait until the buffer is unpinned, we aren't flushing it.
*/
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
/*
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
@@ -1080,7 +1094,7 @@ xfs_bioerror(
xfs_buf_ioend(bp, 0);
- return EIO;
+ return -EIO;
}
/*
@@ -1089,7 +1103,7 @@ xfs_bioerror(
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
-STATIC int
+int
xfs_bioerror_relse(
struct xfs_buf *bp)
{
@@ -1113,13 +1127,13 @@ xfs_bioerror_relse(
* There's no reason to mark error for
* ASYNC buffers.
*/
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
complete(&bp->b_iowait);
} else {
xfs_buf_relse(bp);
}
- return EIO;
+ return -EIO;
}
STATIC int
@@ -1152,7 +1166,7 @@ xfs_bwrite(
ASSERT(xfs_buf_islocked(bp));
bp->b_flags |= XBF_WRITE;
- bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+ bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
xfs_bdstrat_cb(bp);
@@ -1164,25 +1178,6 @@ xfs_bwrite(
return error;
}
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem. Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- if (XFS_FORCED_SHUTDOWN(mp)) {
- trace_xfs_bdstrat_shut(bp, _RET_IP_);
- xfs_bioerror_relse(bp);
- return;
- }
-
- xfs_buf_iorequest(bp);
-}
-
STATIC void
_xfs_buf_ioend(
xfs_buf_t *bp,
@@ -1204,7 +1199,7 @@ xfs_buf_bio_end_io(
* buffers that require multiple bios to complete.
*/
if (!bp->b_error)
- xfs_buf_ioerror(bp, -error);
+ xfs_buf_ioerror(bp, error);
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -1255,7 +1250,7 @@ next_chunk:
bio = bio_alloc(GFP_NOIO, nr_pages);
bio->bi_bdev = bp->b_target->bt_bdev;
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
@@ -1277,7 +1272,7 @@ next_chunk:
total_nr_pages--;
}
- if (likely(bio->bi_size)) {
+ if (likely(bio->bi_iter.bi_size)) {
if (xfs_buf_is_vmapped(bp)) {
flush_kernel_vmap_range(bp->b_addr,
xfs_buf_vmap_len(bp));
@@ -1291,7 +1286,7 @@ next_chunk:
* because the caller (xfs_buf_iorequest) holds a count itself.
*/
atomic_dec(&bp->b_io_remaining);
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
bio_put(bio);
}
@@ -1335,6 +1330,20 @@ _xfs_buf_ioapply(
SHUTDOWN_CORRUPT_INCORE);
return;
}
+ } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ /*
+ * non-crc filesystems don't attach verifiers during
+ * log recovery, so don't warn for such filesystems.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_warn(mp,
+ "%s: no ops on block 0x%llx/0x%x",
+ __func__, bp->b_bn, bp->b_length);
+ xfs_hex_dump(bp->b_addr, 64);
+ dump_stack();
+ }
}
} else if (bp->b_flags & XBF_READ_AHEAD) {
rw = READA;
@@ -1376,21 +1385,29 @@ xfs_buf_iorequest(
xfs_buf_wait_unpin(bp);
xfs_buf_hold(bp);
- /* Set the count to 1 initially, this will stop an I/O
+ /*
+ * Set the count to 1 initially, this will stop an I/O
* completion callout which happens before we have started
* all the I/O from calling xfs_buf_ioend too early.
*/
atomic_set(&bp->b_io_remaining, 1);
_xfs_buf_ioapply(bp);
- _xfs_buf_ioend(bp, 1);
+ /*
+ * If _xfs_buf_ioapply failed, we'll get back here with
+ * only the reference we took above. _xfs_buf_ioend will
+ * drop it to zero, so we'd better not queue it for later,
+ * or we'll free it before it's done.
+ */
+ _xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
xfs_buf_rele(bp);
}
/*
* Waits for I/O to complete on the buffer supplied. It returns immediately if
- * no I/O is pending or there is already a pending error on the buffer. It
- * returns the I/O error code, if any, or 0 if there was no error.
+ * no I/O is pending or there is already a pending error on the buffer, in which
+ * case nothing will ever complete. It returns the I/O error code, if any, or
+ * 0 if there was no error.
*/
int
xfs_buf_iowait(
@@ -1516,6 +1533,12 @@ xfs_wait_buftarg(
struct xfs_buf *bp;
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
list_del_init(&bp->b_lru);
+ if (bp->b_flags & XBF_WRITE_FAIL) {
+ xfs_alert(btp->bt_mount,
+"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+"Please run xfs_repair to determine the extent of the problem.",
+ (long long)bp->b_bn);
+ }
xfs_buf_rele(bp);
}
if (loop++ != 0)
@@ -1602,16 +1625,14 @@ xfs_free_buftarg(
kmem_free(btp);
}
-STATIC int
-xfs_setsize_buftarg_flags(
+int
+xfs_setsize_buftarg(
xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize,
- int verbose)
+ unsigned int sectorsize)
{
- btp->bt_bsize = blocksize;
- btp->bt_sshift = ffs(sectorsize) - 1;
- btp->bt_smask = sectorsize - 1;
+ /* Set up metadata sector size info */
+ btp->bt_meta_sectorsize = sectorsize;
+ btp->bt_meta_sectormask = sectorsize - 1;
if (set_blocksize(btp->bt_bdev, sectorsize)) {
char name[BDEVNAME_SIZE];
@@ -1621,41 +1642,33 @@ xfs_setsize_buftarg_flags(
xfs_warn(btp->bt_mount,
"Cannot set_blocksize to %u on device %s",
sectorsize, name);
- return EINVAL;
+ return -EINVAL;
}
+ /* Set up device logical sector size mask */
+ btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
+ btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
+
return 0;
}
/*
- * When allocating the initial buffer target we have not yet
- * read in the superblock, so don't know what sized sectors
- * are being used at this early stage. Play safe.
+ * When allocating the initial buffer target we have not yet
+ * read in the superblock, so don't know what sized sectors
+ * are being used at this early stage. Play safe.
*/
STATIC int
xfs_setsize_buftarg_early(
xfs_buftarg_t *btp,
struct block_device *bdev)
{
- return xfs_setsize_buftarg_flags(btp,
- PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
- xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize)
-{
- return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+ return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
}
xfs_buftarg_t *
xfs_alloc_buftarg(
struct xfs_mount *mp,
- struct block_device *bdev,
- int external,
- const char *fsname)
+ struct block_device *bdev)
{
xfs_buftarg_t *btp;
@@ -1799,7 +1812,7 @@ __xfs_buf_delwri_submit(
blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, io_list, b_list) {
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+ bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
bp->b_flags |= XBF_WRITE;
if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..c753183900b3 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
/* I/O hints for the BIO layer */
#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
+ { XBF_WRITE_FAIL, "WRITE_FAIL" }, \
{ XBF_SYNCIO, "SYNCIO" }, \
{ XBF_FUA, "FUA" }, \
{ XBF_FLUSH, "FLUSH" }, \
@@ -80,19 +82,34 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
{ _XBF_COMPOUND, "COMPOUND" }
+
/*
* Internal state flags.
*/
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
+/*
+ * The xfs_buftarg contains 2 notions of "sector size" -
+ *
+ * 1) The metadata sector size, which is the minimum unit and
+ * alignment of IO which will be performed by metadata operations.
+ * 2) The device logical sector size
+ *
+ * The first is specified at mkfs time, and is stored on-disk in the
+ * superblock's sb_sectsize.
+ *
+ * The latter is derived from the underlying device, and controls direct IO
+ * alignment constraints.
+ */
typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
struct backing_dev_info *bt_bdi;
struct xfs_mount *bt_mount;
- unsigned int bt_bsize;
- unsigned int bt_sshift;
- size_t bt_smask;
+ unsigned int bt_meta_sectorsize;
+ size_t bt_meta_sectormask;
+ size_t bt_logical_sectorsize;
+ size_t bt_logical_sectormask;
/* LRU control structures */
struct shrinker bt_shrinker;
@@ -161,7 +178,7 @@ typedef struct xfs_buf {
atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
- unsigned short b_error; /* error code on I/O */
+ int b_error; /* error code on I/O */
const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING
@@ -269,9 +286,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_buf *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-
extern void xfs_buf_ioend(xfs_buf_t *, int);
extern void xfs_buf_ioerror(xfs_buf_t *, int);
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,10 +296,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
#define xfs_buf_zero(bp, off, len) \
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
- return bp ? bp->b_error : ENOMEM;
-}
+extern int xfs_bioerror_relse(struct xfs_buf *);
/* Buffer Utility Routines */
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
@@ -301,7 +312,8 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_ZEROFLAGS(bp) \
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
- XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+ XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
+ XBF_WRITE_FAIL))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
@@ -352,14 +364,28 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
xfs_buf_rele(bp);
}
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
+
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
+
/*
* Handling of buftargs.
*/
extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
- struct block_device *, int, const char *);
+ struct block_device *);
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..76007deed31f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -182,21 +182,47 @@ xfs_buf_item_size(
trace_xfs_buf_item_size(bip);
}
-static struct xfs_log_iovec *
+static inline void
+xfs_buf_item_copy_iovec(
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp,
+ struct xfs_buf *bp,
+ uint offset,
+ int first_bit,
+ uint nbits)
+{
+ offset += first_bit * XFS_BLF_CHUNK;
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK,
+ xfs_buf_offset(bp, offset),
+ nbits * XFS_BLF_CHUNK);
+}
+
+static inline bool
+xfs_buf_item_straddle(
+ struct xfs_buf *bp,
+ uint offset,
+ int next_bit,
+ int last_bit)
+{
+ return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) !=
+ (xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) +
+ XFS_BLF_CHUNK);
+}
+
+static void
xfs_buf_item_format_segment(
struct xfs_buf_log_item *bip,
- struct xfs_log_iovec *vecp,
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp,
uint offset,
struct xfs_buf_log_format *blfp)
{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
- uint nvecs;
int first_bit;
int last_bit;
int next_bit;
uint nbits;
- uint buffer_offset;
/* copy the flags across from the base format item */
blfp->blf_flags = bip->__bli_format.blf_flags;
@@ -208,21 +234,17 @@ xfs_buf_item_format_segment(
*/
base_size = xfs_buf_log_format_size(blfp);
- nvecs = 0;
first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
/*
* If the map is not be dirty in the transaction, mark
* the size as zero and do not advance the vector pointer.
*/
- goto out;
+ return;
}
- vecp->i_addr = blfp;
- vecp->i_len = base_size;
- vecp->i_type = XLOG_REG_TYPE_BFORMAT;
- vecp++;
- nvecs = 1;
+ blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
+ blfp->blf_size = 1;
if (bip->bli_flags & XFS_BLI_STALE) {
/*
@@ -232,14 +254,13 @@ xfs_buf_item_format_segment(
*/
trace_xfs_buf_item_format_stale(bip);
ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
- goto out;
+ return;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
-
last_bit = first_bit;
nbits = 1;
for (;;) {
@@ -252,42 +273,22 @@ xfs_buf_item_format_segment(
next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
(uint)last_bit + 1);
/*
- * If we run out of bits fill in the last iovec and get
- * out of the loop.
- * Else if we start a new set of bits then fill in the
- * iovec for the series we were looking at and start
- * counting the bits in the new one.
- * Else we're still in the same set of bits so just
- * keep counting and scanning.
+ * If we run out of bits fill in the last iovec and get out of
+ * the loop. Else if we start a new set of bits then fill in
+ * the iovec for the series we were looking at and start
+ * counting the bits in the new one. Else we're still in the
+ * same set of bits so just keep counting and scanning.
*/
if (next_bit == -1) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
- nvecs++;
+ xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
+ first_bit, nbits);
+ blfp->blf_size++;
break;
- } else if (next_bit != last_bit + 1) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
- nvecs++;
- vecp++;
- first_bit = next_bit;
- last_bit = next_bit;
- nbits = 1;
- } else if (xfs_buf_offset(bp, offset +
- (next_bit << XFS_BLF_SHIFT)) !=
- (xfs_buf_offset(bp, offset +
- (last_bit << XFS_BLF_SHIFT)) +
- XFS_BLF_CHUNK)) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
- nvecs++;
- vecp++;
+ } else if (next_bit != last_bit + 1 ||
+ xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) {
+ xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
+ first_bit, nbits);
+ blfp->blf_size++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
@@ -296,9 +297,6 @@ xfs_buf_item_format_segment(
nbits++;
}
}
-out:
- blfp->blf_size = nvecs;
- return vecp;
}
/*
@@ -310,10 +308,11 @@ out:
STATIC void
xfs_buf_item_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+ struct xfs_log_vec *lv)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_log_iovec *vecp = NULL;
uint offset = 0;
int i;
@@ -354,8 +353,8 @@ xfs_buf_item_format(
}
for (i = 0; i < bip->bli_format_count; i++) {
- vecp = xfs_buf_item_format_segment(bip, vecp, offset,
- &bip->bli_formats[i]);
+ xfs_buf_item_format_segment(bip, lv, &vecp, offset,
+ &bip->bli_formats[i]);
offset += bp->b_maps[i].bm_len;
}
@@ -489,13 +488,21 @@ xfs_buf_item_unpin(
xfs_buf_lock(bp);
xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
}
}
+/*
+ * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
+ * seconds so as to not spam logs too much on repeated detection of the same
+ * buffer being bad..
+ */
+
+DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
+
STATIC uint
xfs_buf_item_push(
struct xfs_log_item *lip,
@@ -524,6 +531,14 @@ xfs_buf_item_push(
trace_xfs_buf_item_push(bip);
+ /* has a previous flush failed due to IO errors? */
+ if ((bp->b_flags & XBF_WRITE_FAIL) &&
+ ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+ xfs_warn(bp->b_target->bt_mount,
+"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
+ (long long)bp->b_bn);
+ }
+
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_unlock(bp);
@@ -710,7 +725,7 @@ xfs_buf_item_get_format(
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
KM_SLEEP);
if (!bip->bli_formats)
- return ENOMEM;
+ return -ENOMEM;
return 0;
}
@@ -781,20 +796,6 @@ xfs_buf_item_init(
bip->bli_formats[i].blf_map_size = map_size;
}
-#ifdef XFS_TRANS_DEBUG
- /*
- * Allocate the arrays for tracking what needs to be logged
- * and what our callers request to be logged. bli_orig
- * holds a copy of the original, clean buffer for comparison
- * against, and bli_logged keeps a 1 bit flag per byte in
- * the buffer to indicate which bytes the callers have asked
- * to have logged.
- */
- bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
- memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
- bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
/*
* Put the buf item into the list of items attached to the
* buffer at the front.
@@ -811,7 +812,6 @@ xfs_buf_item_init(
*/
static void
xfs_buf_item_log_segment(
- struct xfs_buf_log_item *bip,
uint first,
uint last,
uint *map)
@@ -919,7 +919,7 @@ xfs_buf_item_log(
if (end > last)
end = last;
- xfs_buf_item_log_segment(bip, first, end,
+ xfs_buf_item_log_segment(first, end,
&bip->bli_formats[i].blf_data_map[0]);
start += bp->b_maps[i].bm_len;
@@ -942,11 +942,6 @@ STATIC void
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
@@ -1057,7 +1052,7 @@ xfs_buf_iodone_callbacks(
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
- if (likely(!xfs_buf_geterror(bp)))
+ if (likely(!bp->b_error))
goto do_callbacks;
/*
@@ -1096,8 +1091,9 @@ xfs_buf_iodone_callbacks(
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
- if (!XFS_BUF_ISSTALE(bp)) {
- bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+ if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
+ bp->b_flags |= XBF_WRITE | XBF_ASYNC |
+ XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_iorequest(bp);
} else {
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index c4e50c6ed584..f1b69edcdf31 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -76,27 +76,26 @@ const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {
STATIC int
xfs_dir2_sf_getdents(
- xfs_inode_t *dp, /* incore directory inode */
+ struct xfs_da_args *args,
struct dir_context *ctx)
{
int i; /* shortform entry number */
- xfs_mount_t *mp; /* filesystem mount point */
+ struct xfs_inode *dp = args->dp; /* incore directory inode */
xfs_dir2_dataptr_t off; /* current entry's offset */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_dataptr_t dot_offset;
xfs_dir2_dataptr_t dotdot_offset;
xfs_ino_t ino;
-
- mp = dp->i_mount;
+ struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
/*
* Give up if the directory is way too short.
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- return XFS_ERROR(EIO);
+ ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
@@ -109,18 +108,18 @@ xfs_dir2_sf_getdents(
/*
* If the block number in the offset is out of range, we're done.
*/
- if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+ if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
return 0;
/*
* Precalculate offsets for . and .. as we will always need them.
*
* XXX(hch): the second argument is sometimes 0 and sometimes
- * mp->m_dirdatablk.
+ * geo->datablk
*/
- dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+ dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
dp->d_ops->data_dot_offset);
- dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+ dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
dp->d_ops->data_dotdot_offset);
/*
@@ -149,7 +148,7 @@ xfs_dir2_sf_getdents(
for (i = 0; i < sfp->count; i++) {
__uint8_t filetype;
- off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+ off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
xfs_dir2_sf_get_offset(sfep));
if (ctx->pos > off) {
@@ -161,13 +160,13 @@ xfs_dir2_sf_getdents(
filetype = dp->d_ops->sf_get_ftype(sfep);
ctx->pos = off & 0x7fffffff;
if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
- xfs_dir3_get_dtype(mp, filetype)))
+ xfs_dir3_get_dtype(dp->i_mount, filetype)))
return 0;
sfep = dp->d_ops->sf_nextentry(sfp, sfep);
}
- ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
- 0x7fffffff;
+ ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
+ 0x7fffffff;
return 0;
}
@@ -176,9 +175,10 @@ xfs_dir2_sf_getdents(
*/
STATIC int
xfs_dir2_block_getdents(
- xfs_inode_t *dp, /* incore inode */
+ struct xfs_da_args *args,
struct dir_context *ctx)
{
+ struct xfs_inode *dp = args->dp; /* incore directory inode */
xfs_dir2_data_hdr_t *hdr; /* block header */
struct xfs_buf *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -186,16 +186,15 @@ xfs_dir2_block_getdents(
xfs_dir2_data_unused_t *dup; /* block unused entry */
char *endptr; /* end of the data entries */
int error; /* error return value */
- xfs_mount_t *mp; /* filesystem mount point */
char *ptr; /* current data entry */
int wantoff; /* starting block offset */
xfs_off_t cook;
+ struct xfs_da_geometry *geo = args->geo;
- mp = dp->i_mount;
/*
* If the block number in the offset is out of range, we're done.
*/
- if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+ if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
return 0;
error = xfs_dir3_block_read(NULL, dp, &bp);
@@ -206,13 +205,13 @@ xfs_dir2_block_getdents(
* Extract the byte offset we start at from the seek pointer.
* We'll skip entries before this.
*/
- wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
+ wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos);
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
/*
* Set up values for the loop.
*/
- btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp = xfs_dir2_block_tail_p(geo, hdr);
ptr = (char *)dp->d_ops->data_entry_p(hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
@@ -244,7 +243,7 @@ xfs_dir2_block_getdents(
if ((char *)dep - (char *)hdr < wantoff)
continue;
- cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+ cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
(char *)dep - (char *)hdr);
ctx->pos = cook & 0x7fffffff;
@@ -254,7 +253,7 @@ xfs_dir2_block_getdents(
*/
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
- xfs_dir3_get_dtype(mp, filetype))) {
+ xfs_dir3_get_dtype(dp->i_mount, filetype))) {
xfs_trans_brelse(NULL, bp);
return 0;
}
@@ -264,8 +263,8 @@ xfs_dir2_block_getdents(
* Reached the end of the block.
* Set the offset to a non-existent block 1 and return.
*/
- ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
- 0x7fffffff;
+ ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
+ 0x7fffffff;
xfs_trans_brelse(NULL, bp);
return 0;
}
@@ -286,13 +285,13 @@ struct xfs_dir2_leaf_map_info {
STATIC int
xfs_dir2_leaf_readbuf(
- struct xfs_inode *dp,
+ struct xfs_da_args *args,
size_t bufsize,
struct xfs_dir2_leaf_map_info *mip,
xfs_dir2_off_t *curoff,
struct xfs_buf **bpp)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_inode *dp = args->dp;
struct xfs_buf *bp = *bpp;
struct xfs_bmbt_irec *map = mip->map;
struct blk_plug plug;
@@ -300,6 +299,7 @@ xfs_dir2_leaf_readbuf(
int length;
int i;
int j;
+ struct xfs_da_geometry *geo = args->geo;
/*
* If we have a buffer, we need to release it and
@@ -309,12 +309,12 @@ xfs_dir2_leaf_readbuf(
if (bp) {
xfs_trans_brelse(NULL, bp);
bp = NULL;
- mip->map_blocks -= mp->m_dirblkfsbs;
+ mip->map_blocks -= geo->fsbcount;
/*
* Loop to get rid of the extents for the
* directory block.
*/
- for (i = mp->m_dirblkfsbs; i > 0; ) {
+ for (i = geo->fsbcount; i > 0; ) {
j = min_t(int, map->br_blockcount, i);
map->br_blockcount -= j;
map->br_startblock += j;
@@ -333,8 +333,7 @@ xfs_dir2_leaf_readbuf(
/*
* Recalculate the readahead blocks wanted.
*/
- mip->ra_want = howmany(bufsize + mp->m_dirblksize,
- mp->m_sb.sb_blocksize) - 1;
+ mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
ASSERT(mip->ra_want >= 0);
/*
@@ -342,14 +341,14 @@ xfs_dir2_leaf_readbuf(
* run out of data blocks, get some more mappings.
*/
if (1 + mip->ra_want > mip->map_blocks &&
- mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
+ mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
/*
* Get more bmaps, fill in after the ones
* we already have in the table.
*/
mip->nmap = mip->map_size - mip->map_valid;
error = xfs_bmapi_read(dp, mip->map_off,
- xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
+ xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
mip->map_off,
&map[mip->map_valid], &mip->nmap, 0);
@@ -370,7 +369,7 @@ xfs_dir2_leaf_readbuf(
i = mip->map_valid + mip->nmap - 1;
mip->map_off = map[i].br_startoff + map[i].br_blockcount;
} else
- mip->map_off = xfs_dir2_byte_to_da(mp,
+ mip->map_off = xfs_dir2_byte_to_da(geo,
XFS_DIR2_LEAF_OFFSET);
/*
@@ -396,18 +395,18 @@ xfs_dir2_leaf_readbuf(
* No valid mappings, so no more data blocks.
*/
if (!mip->map_valid) {
- *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
+ *curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
goto out;
}
/*
* Read the directory block starting at the first mapping.
*/
- mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
+ mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
- map->br_blockcount >= mp->m_dirblkfsbs ?
- XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
-
+ map->br_blockcount >= geo->fsbcount ?
+ XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
+ -1, &bp);
/*
* Should just skip over the data block instead of giving up.
*/
@@ -419,7 +418,7 @@ xfs_dir2_leaf_readbuf(
* was previously ra.
*/
if (mip->ra_current)
- mip->ra_current -= mp->m_dirblkfsbs;
+ mip->ra_current -= geo->fsbcount;
/*
* Do we need more readahead?
@@ -427,16 +426,16 @@ xfs_dir2_leaf_readbuf(
blk_start_plug(&plug);
for (mip->ra_index = mip->ra_offset = i = 0;
mip->ra_want > mip->ra_current && i < mip->map_blocks;
- i += mp->m_dirblkfsbs) {
+ i += geo->fsbcount) {
ASSERT(mip->ra_index < mip->map_valid);
/*
* Read-ahead a contiguous directory block.
*/
if (i > mip->ra_current &&
- map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
- xfs_dir3_data_readahead(NULL, dp,
+ map[mip->ra_index].br_blockcount >= geo->fsbcount) {
+ xfs_dir3_data_readahead(dp,
map[mip->ra_index].br_startoff + mip->ra_offset,
- XFS_FSB_TO_DADDR(mp,
+ XFS_FSB_TO_DADDR(dp->i_mount,
map[mip->ra_index].br_startblock +
mip->ra_offset));
mip->ra_current = i;
@@ -447,7 +446,7 @@ xfs_dir2_leaf_readbuf(
* use our mapping, but this is a very rare case.
*/
else if (i > mip->ra_current) {
- xfs_dir3_data_readahead(NULL, dp,
+ xfs_dir3_data_readahead(dp,
map[mip->ra_index].br_startoff +
mip->ra_offset, -1);
mip->ra_current = i;
@@ -456,15 +455,14 @@ xfs_dir2_leaf_readbuf(
/*
* Advance offset through the mapping table.
*/
- for (j = 0; j < mp->m_dirblkfsbs; j++) {
+ for (j = 0; j < geo->fsbcount; j += length ) {
/*
* The rest of this extent but not more than a dir
* block.
*/
- length = min_t(int, mp->m_dirblkfsbs,
+ length = min_t(int, geo->fsbcount,
map[mip->ra_index].br_blockcount -
mip->ra_offset);
- j += length;
mip->ra_offset += length;
/*
@@ -489,22 +487,23 @@ out:
*/
STATIC int
xfs_dir2_leaf_getdents(
- xfs_inode_t *dp, /* incore directory inode */
+ struct xfs_da_args *args,
struct dir_context *ctx,
size_t bufsize)
{
+ struct xfs_inode *dp = args->dp;
struct xfs_buf *bp = NULL; /* data block buffer */
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
int error = 0; /* error return value */
int length; /* temporary length value */
- xfs_mount_t *mp; /* filesystem mount point */
int byteoff; /* offset in current block */
xfs_dir2_off_t curoff; /* current overall offset */
xfs_dir2_off_t newoff; /* new curoff after new blk */
char *ptr = NULL; /* pointer to current data */
struct xfs_dir2_leaf_map_info *map_info;
+ struct xfs_da_geometry *geo = args->geo;
/*
* If the offset is at or past the largest allowed value,
@@ -513,15 +512,12 @@ xfs_dir2_leaf_getdents(
if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
return 0;
- mp = dp->i_mount;
-
/*
* Set up to bmap a number of blocks based on the caller's
* buffer size, the directory block size, and the filesystem
* block size.
*/
- length = howmany(bufsize + mp->m_dirblksize,
- mp->m_sb.sb_blocksize);
+ length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
(length * sizeof(struct xfs_bmbt_irec)),
KM_SLEEP | KM_NOFS);
@@ -531,14 +527,14 @@ xfs_dir2_leaf_getdents(
* Inside the loop we keep the main offset value as a byte offset
* in the directory file.
*/
- curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
+ curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
/*
* Force this conversion through db so we truncate the offset
* down to get the start of the data block.
*/
- map_info->map_off = xfs_dir2_db_to_da(mp,
- xfs_dir2_byte_to_db(mp, curoff));
+ map_info->map_off = xfs_dir2_db_to_da(geo,
+ xfs_dir2_byte_to_db(geo, curoff));
/*
* Loop over directory entries until we reach the end offset.
@@ -551,9 +547,9 @@ xfs_dir2_leaf_getdents(
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
- if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
+ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
- error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
+ error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
&curoff, &bp);
if (error || !map_info->map_valid)
break;
@@ -561,7 +557,8 @@ xfs_dir2_leaf_getdents(
/*
* Having done a read, we need to set a new offset.
*/
- newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
+ newoff = xfs_dir2_db_off_to_byte(geo,
+ map_info->curdb, 0);
/*
* Start of the current block.
*/
@@ -571,7 +568,7 @@ xfs_dir2_leaf_getdents(
* Make sure we're in the right block.
*/
else if (curoff > newoff)
- ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
+ ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
map_info->curdb);
hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
@@ -579,7 +576,7 @@ xfs_dir2_leaf_getdents(
* Find our position in the block.
*/
ptr = (char *)dp->d_ops->data_entry_p(hdr);
- byteoff = xfs_dir2_byte_to_off(mp, curoff);
+ byteoff = xfs_dir2_byte_to_off(geo, curoff);
/*
* Skip past the header.
*/
@@ -608,10 +605,10 @@ xfs_dir2_leaf_getdents(
* Now set our real offset.
*/
curoff =
- xfs_dir2_db_off_to_byte(mp,
- xfs_dir2_byte_to_db(mp, curoff),
+ xfs_dir2_db_off_to_byte(geo,
+ xfs_dir2_byte_to_db(geo, curoff),
(char *)ptr - (char *)hdr);
- if (ptr >= (char *)hdr + mp->m_dirblksize) {
+ if (ptr >= (char *)hdr + geo->blksize) {
continue;
}
}
@@ -635,10 +632,10 @@ xfs_dir2_leaf_getdents(
length = dp->d_ops->data_entsize(dep->namelen);
filetype = dp->d_ops->data_get_ftype(dep);
- ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+ ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
- xfs_dir3_get_dtype(mp, filetype)))
+ xfs_dir3_get_dtype(dp->i_mount, filetype)))
break;
/*
@@ -653,10 +650,10 @@ xfs_dir2_leaf_getdents(
/*
* All done. Set output offset value to current offset.
*/
- if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
+ if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR))
ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
else
- ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+ ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
kmem_free(map_info);
if (bp)
xfs_trans_brelse(NULL, bp);
@@ -668,28 +665,36 @@ xfs_dir2_leaf_getdents(
*/
int
xfs_readdir(
- xfs_inode_t *dp,
- struct dir_context *ctx,
- size_t bufsize)
+ struct xfs_inode *dp,
+ struct dir_context *ctx,
+ size_t bufsize)
{
- int rval; /* return value */
- int v; /* type-checking value */
+ struct xfs_da_args args = { NULL };
+ int rval;
+ int v;
+ uint lock_mode;
trace_xfs_readdir(dp);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_getdents);
+ args.dp = dp;
+ args.geo = dp->i_mount->m_dir_geo;
+
+ lock_mode = xfs_ilock_data_map_shared(dp);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_getdents(dp, ctx);
- else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
+ rval = xfs_dir2_sf_getdents(&args, ctx);
+ else if ((rval = xfs_dir2_isblock(&args, &v)))
;
else if (v)
- rval = xfs_dir2_block_getdents(dp, ctx);
+ rval = xfs_dir2_block_getdents(&args, ctx);
else
- rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
+ rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
+ xfs_iunlock(dp, lock_mode);
+
return rval;
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4f11ef011139..13d08a1b390e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -124,7 +124,7 @@ xfs_trim_extents(
}
trace_xfs_discard_extent(mp, agno, fbno, flen);
- error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
+ error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
if (error)
goto out_del_cursor;
*blocks_trimmed += flen;
@@ -166,11 +166,11 @@ xfs_ioc_trim(
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (!blk_queue_discard(q))
- return -XFS_ERROR(EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (copy_from_user(&range, urange, sizeof(range)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/*
* Truncating down the len isn't actually quite correct, but using
@@ -182,7 +182,7 @@ xfs_ioc_trim(
if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
range.len < mp->m_sb.sb_blocksize)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
start = BTOBB(range.start);
end = start + BTOBBT(range.len) - 1;
@@ -195,7 +195,7 @@ xfs_ioc_trim(
end_agno = xfs_daddr_to_agno(mp, end);
for (agno = start_agno; agno <= end_agno; agno++) {
- error = -xfs_trim_extents(mp, agno, start, end, minlen,
+ error = xfs_trim_extents(mp, agno, start, end, minlen,
&blocks_trimmed);
if (error)
last_error = error;
@@ -206,7 +206,7 @@ xfs_ioc_trim(
range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
if (copy_to_user(urange, &range, sizeof(range)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -222,11 +222,11 @@ xfs_discard_extents(
trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
busyp->length);
- error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+ error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_NOFS, 0);
- if (error && error != EOPNOTSUPP) {
+ if (error && error != -EOPNOTSUPP) {
xfs_info(mp,
"discard failed for extent [0x%llu,%u], error %d",
(unsigned long long)busyp->bno,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 6b1e695caf0e..63c2de49f61d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -327,7 +327,7 @@ xfs_qm_dqalloc(
*/
if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (ESRCH);
+ return -ESRCH;
}
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
@@ -353,10 +353,10 @@ xfs_qm_dqalloc(
dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen,
0);
-
- error = xfs_buf_geterror(bp);
- if (error)
+ if (!bp) {
+ error = -ENOMEM;
goto error1;
+ }
bp->b_ops = &xfs_dquot_buf_ops;
/*
@@ -400,7 +400,7 @@ xfs_qm_dqalloc(
error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (error);
+ return error;
}
STATIC int
@@ -426,7 +426,7 @@ xfs_qm_dqrepair(
if (error) {
ASSERT(*bpp == NULL);
- return XFS_ERROR(error);
+ return error;
}
(*bpp)->b_ops = &xfs_dquot_buf_ops;
@@ -442,7 +442,7 @@ xfs_qm_dqrepair(
if (error) {
/* repair failed, we're screwed */
xfs_trans_brelse(tp, *bpp);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -469,17 +469,18 @@ xfs_qm_dqtobp(
struct xfs_mount *mp = dqp->q_mount;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
struct xfs_trans *tp = (tpp ? *tpp : NULL);
+ uint lock_mode;
dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
- xfs_ilock(quotip, XFS_ILOCK_SHARED);
+ lock_mode = xfs_ilock_data_map_shared(quotip);
if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
/*
* Return if this type of quotas is turned off while we
* didn't have the quota inode lock.
*/
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
- return ESRCH;
+ xfs_iunlock(quotip, lock_mode);
+ return -ESRCH;
}
/*
@@ -488,7 +489,7 @@ xfs_qm_dqtobp(
error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+ xfs_iunlock(quotip, lock_mode);
if (error)
return error;
@@ -507,7 +508,7 @@ xfs_qm_dqtobp(
* We don't allocate unless we're asked to
*/
if (!(flags & XFS_QMOPT_DQALLOC))
- return ENOENT;
+ return -ENOENT;
ASSERT(tp);
error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
@@ -529,7 +530,7 @@ xfs_qm_dqtobp(
mp->m_quotainfo->qi_dqchunklen,
0, &bp, &xfs_dquot_buf_ops);
- if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
+ if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
mp->m_quotainfo->qi_dqperchunk;
ASSERT(bp == NULL);
@@ -538,7 +539,7 @@ xfs_qm_dqtobp(
if (error) {
ASSERT(bp == NULL);
- return XFS_ERROR(error);
+ return error;
}
}
@@ -546,7 +547,7 @@ xfs_qm_dqtobp(
*O_bpp = bp;
*O_ddpp = bp->b_addr + dqp->q_bufoffset;
- return (0);
+ return 0;
}
@@ -614,7 +615,7 @@ xfs_qm_dqread(
if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error)
goto error1;
@@ -714,7 +715,7 @@ xfs_qm_dqget(
if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
(! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
(! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
- return (ESRCH);
+ return -ESRCH;
}
#ifdef DEBUG
@@ -722,7 +723,7 @@ xfs_qm_dqget(
if ((xfs_dqerror_target == mp->m_ddev_targp) &&
(xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
xfs_debug(mp, "Returning error in dqget");
- return (EIO);
+ return -EIO;
}
}
@@ -795,14 +796,14 @@ restart:
} else {
/* inode stays locked on return */
xfs_qm_dqdestroy(dqp);
- return XFS_ERROR(ESRCH);
+ return -ESRCH;
}
}
mutex_lock(&qi->qi_tree_lock);
- error = -radix_tree_insert(tree, id, dqp);
+ error = radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
- WARN_ON(error != EEXIST);
+ WARN_ON(error != -EEXIST);
/*
* Duplicate found. Just throw away the new dquot and start
@@ -828,48 +829,7 @@ restart:
ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
trace_xfs_dqget_miss(dqp);
*O_dqpp = dqp;
- return (0);
-}
-
-
-STATIC void
-xfs_qm_dqput_final(
- struct xfs_dquot *dqp)
-{
- struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
- struct xfs_dquot *gdqp;
- struct xfs_dquot *pdqp;
-
- trace_xfs_dqput_free(dqp);
-
- if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
- XFS_STATS_INC(xs_qm_dquot_unused);
-
- /*
- * If we just added a udquot to the freelist, then we want to release
- * the gdquot/pdquot reference that it (probably) has. Otherwise it'll
- * keep the gdquot/pdquot from getting reclaimed.
- */
- gdqp = dqp->q_gdquot;
- if (gdqp) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
-
- pdqp = dqp->q_pdquot;
- if (pdqp) {
- xfs_dqlock(pdqp);
- dqp->q_pdquot = NULL;
- }
- xfs_dqunlock(dqp);
-
- /*
- * If we had a group/project quota hint, release it now.
- */
- if (gdqp)
- xfs_qm_dqput(gdqp);
- if (pdqp)
- xfs_qm_dqput(pdqp);
+ return 0;
}
/*
@@ -887,10 +847,14 @@ xfs_qm_dqput(
trace_xfs_dqput(dqp);
- if (--dqp->q_nrefs > 0)
- xfs_dqunlock(dqp);
- else
- xfs_qm_dqput_final(dqp);
+ if (--dqp->q_nrefs == 0) {
+ struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
+ trace_xfs_dqput_free(dqp);
+
+ if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
+ XFS_STATS_INC(xs_qm_dquot_unused);
+ }
+ xfs_dqunlock(dqp);
}
/*
@@ -1002,7 +966,7 @@ xfs_qm_dqflush(
SHUTDOWN_CORRUPT_INCORE);
else
spin_unlock(&mp->m_ail->xa_lock);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_unlock;
}
@@ -1010,7 +974,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ &xfs_dquot_buf_ops);
if (error)
goto out_unlock;
@@ -1028,7 +993,7 @@ xfs_qm_dqflush(
xfs_buf_relse(bp);
xfs_dqfunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* This is the only portion of data that needs to persist */
@@ -1081,7 +1046,7 @@ xfs_qm_dqflush(
out_unlock:
xfs_dqfunlock(dqp);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index d22ed0053c32..c24c67e22a2a 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -52,8 +52,6 @@ typedef struct xfs_dquot {
int q_bufoffset; /* off of dq in buffer (# dquots) */
xfs_fileoff_t q_fileoffset; /* offset in quotas file */
- struct xfs_dquot*q_gdquot; /* group dquot, hint only */
- struct xfs_dquot*q_pdquot; /* project dquot, hint only */
xfs_disk_dquot_t q_core; /* actual usage & quotas */
xfs_dq_logitem_t q_logitem; /* dquot log item */
xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
@@ -141,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
}
}
+/*
+ * Check whether a dquot is under low free space conditions. We assume the quota
+ * is enabled and enforced.
+ */
+static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
+{
+ int64_t freesp;
+
+ freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
+ if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
+ return true;
+
+ return false;
+}
+
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 92e5f62eefc6..f33fbaaa4d8a 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -57,20 +57,24 @@ xfs_qm_dquot_logitem_size(
STATIC void
xfs_qm_dquot_logitem_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *logvec)
+ struct xfs_log_vec *lv)
{
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
-
- logvec->i_addr = &qlip->qli_format;
- logvec->i_len = sizeof(xfs_dq_logformat_t);
- logvec->i_type = XLOG_REG_TYPE_QFORMAT;
- logvec++;
- logvec->i_addr = &qlip->qli_dquot->q_core;
- logvec->i_len = sizeof(xfs_disk_dquot_t);
- logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
- qlip->qli_format.qlf_size = 2;
-
+ struct xfs_log_iovec *vecp = NULL;
+ struct xfs_dq_logformat *qlf;
+
+ qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT);
+ qlf->qlf_type = XFS_LI_DQUOT;
+ qlf->qlf_size = 2;
+ qlf->qlf_id = be32_to_cpu(qlip->qli_dquot->q_core.d_id);
+ qlf->qlf_blkno = qlip->qli_dquot->q_blkno;
+ qlf->qlf_len = 1;
+ qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset;
+ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat));
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT,
+ &qlip->qli_dquot->q_core,
+ sizeof(struct xfs_disk_dquot));
}
/*
@@ -257,18 +261,6 @@ xfs_qm_dquot_logitem_init(
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
&xfs_dquot_item_ops);
lp->qli_dquot = dqp;
- lp->qli_format.qlf_type = XFS_LI_DQUOT;
- lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
- lp->qli_format.qlf_blkno = dqp->q_blkno;
- lp->qli_format.qlf_len = 1;
- /*
- * This is just the offset of this dquot within its buffer
- * (which is currently 1 FSB and probably won't change).
- * Hence 32 bits for this offset should be just fine.
- * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
- * here, and recompute it at recovery time.
- */
- lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
}
/*------------------ QUOTAOFF LOG ITEMS -------------------*/
@@ -294,26 +286,20 @@ xfs_qm_qoff_logitem_size(
*nbytes += sizeof(struct xfs_qoff_logitem);
}
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
STATIC void
xfs_qm_qoff_logitem_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *log_vector)
+ struct xfs_log_vec *lv)
{
struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
-
- ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
- log_vector->i_addr = &qflip->qql_format;
- log_vector->i_len = sizeof(xfs_qoff_logitem_t);
- log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
- qflip->qql_format.qf_size = 1;
+ struct xfs_log_iovec *vecp = NULL;
+ struct xfs_qoff_logformat *qlf;
+
+ qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF);
+ qlf->qf_type = XFS_LI_QUOTAOFF;
+ qlf->qf_size = 1;
+ qlf->qf_flags = qflip->qql_flags;
+ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem));
}
/*
@@ -453,8 +439,7 @@ xfs_qm_qoff_logitem_init(
xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
qf->qql_item.li_mountp = mp;
- qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
- qf->qql_format.qf_flags = flags;
qf->qql_start_lip = start;
+ qf->qql_flags = flags;
return qf;
}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index 5acae2ada70b..502e9464634a 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -27,13 +27,12 @@ typedef struct xfs_dq_logitem {
xfs_log_item_t qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
- xfs_dq_logformat_t qli_format; /* logged structure */
} xfs_dq_logitem_t;
typedef struct xfs_qoff_logitem {
xfs_log_item_t qql_item; /* common portion */
struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
- xfs_qoff_logformat_t qql_format; /* logged structure */
+ unsigned int qql_flags;
} xfs_qoff_logitem_t;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b807d627..b92fd7bc49e3 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -27,29 +27,6 @@
#ifdef DEBUG
-int xfs_etrap[XFS_ERROR_NTRAP] = {
- 0,
-};
-
-int
-xfs_error_trap(int e)
-{
- int i;
-
- if (!e)
- return 0;
- for (i = 0; i < XFS_ERROR_NTRAP; i++) {
- if (xfs_etrap[i] == 0)
- break;
- if (e != xfs_etrap[i])
- continue;
- xfs_notice(NULL, "%s: error %d", __func__, e);
- BUG();
- break;
- }
- return e;
-}
-
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
@@ -156,7 +133,7 @@ xfs_error_report(
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
- "Internal error %s at line %d of file %s. Caller 0x%p",
+ "Internal error %s at line %d of file %s. Caller %pF",
tag, linenum, filename, ra);
xfs_stack_trace();
@@ -178,3 +155,28 @@ xfs_corruption_error(
xfs_error_report(tag, level, mp, filename, linenum, ra);
xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
}
+
+/*
+ * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
+ __return_address, bp->b_bn);
+
+ xfs_alert(mp, "Unmount and run xfs_repair");
+
+ if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
+ xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+ }
+
+ if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+ xfs_stack_trace();
+}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 079a367f44ee..279a76e52791 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,15 +18,6 @@
#ifndef __XFS_ERROR_H__
#define __XFS_ERROR_H__
-#ifdef DEBUG
-#define XFS_ERROR_NTRAP 10
-extern int xfs_etrap[XFS_ERROR_NTRAP];
-extern int xfs_error_trap(int);
-#define XFS_ERROR(e) xfs_error_trap(e)
-#else
-#define XFS_ERROR(e) (e)
-#endif
-
struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
@@ -34,6 +25,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
int linenum, inst_t *ra);
+extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERROR_REPORT(e, lvl, mp) \
xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -55,7 +47,7 @@ extern void xfs_corruption_error(const char *tag, int level,
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
XFS_ERRLEVEL_LOW, NULL); \
- error = XFS_ERROR(EFSCORRUPTED); \
+ error = -EFSCORRUPTED; \
goto l; \
} \
}
@@ -67,7 +59,7 @@ extern void xfs_corruption_error(const char *tag, int level,
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
XFS_ERRLEVEL_LOW, NULL); \
- return XFS_ERROR(EFSCORRUPTED); \
+ return -EFSCORRUPTED; \
} \
}
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1399e187d425..5a6bd5d8779a 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -147,9 +147,9 @@ xfs_nfs_get_inode(
* We don't use ESTALE directly down the chain to not
* confuse applications using bulkstat that expect EINVAL.
*/
- if (error == EINVAL || error == ENOENT)
- error = ESTALE;
- return ERR_PTR(-error);
+ if (error == -EINVAL || error == -ENOENT)
+ error = -ESTALE;
+ return ERR_PTR(error);
}
if (ip->i_d.di_gen != generation) {
@@ -217,7 +217,7 @@ xfs_fs_get_parent(
error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
if (unlikely(error))
- return ERR_PTR(-error);
+ return ERR_PTR(error);
return d_obtain_alias(VFS_I(cip));
}
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 3680d04f973f..c4327419dc5c 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -26,6 +26,7 @@
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_extfree_item.h"
+#include "xfs_log.h"
kmem_zone_t *xfs_efi_zone;
@@ -101,9 +102,10 @@ xfs_efi_item_size(
STATIC void
xfs_efi_item_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *log_vector)
+ struct xfs_log_vec *lv)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
ASSERT(atomic_read(&efip->efi_next_extent) ==
efip->efi_format.efi_nextents);
@@ -111,10 +113,9 @@ xfs_efi_item_format(
efip->efi_format.efi_type = XFS_LI_EFI;
efip->efi_format.efi_size = 1;
- log_vector->i_addr = &efip->efi_format;
- log_vector->i_len = xfs_efi_item_sizeof(efip);
- log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
- ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t));
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
+ &efip->efi_format,
+ xfs_efi_item_sizeof(efip));
}
@@ -297,7 +298,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
@@ -368,19 +369,19 @@ xfs_efd_item_size(
STATIC void
xfs_efd_item_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *log_vector)
+ struct xfs_log_vec *lv)
{
struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
efdp->efd_format.efd_type = XFS_LI_EFD;
efdp->efd_format.efd_size = 1;
- log_vector->i_addr = &efdp->efd_format;
- log_vector->i_len = xfs_efd_item_sizeof(efdp);
- log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
- ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t));
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
+ &efdp->efd_format,
+ xfs_efd_item_sizeof(efdp));
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 52c91e143725..de5368c803f9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_dinode.h"
+#include "xfs_icache.h"
#include <linux/aio.h>
#include <linux/dcache.h>
@@ -179,7 +180,7 @@ xfs_file_fsync(
return error;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
xfs_iflags_clear(ip, XFS_ITRUNCATED);
@@ -225,46 +226,40 @@ xfs_file_fsync(
!log_flushed)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
- return -error;
+ return error;
}
STATIC ssize_t
-xfs_file_aio_read(
+xfs_file_read_iter(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
+ struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- size_t size = 0;
+ size_t size = iov_iter_count(to);
ssize_t ret = 0;
int ioflags = 0;
xfs_fsize_t n;
+ loff_t pos = iocb->ki_pos;
XFS_STATS_INC(xs_read_calls);
- BUG_ON(iocb->ki_pos != pos);
-
if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
+ ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
- ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
- if (ret < 0)
- return ret;
-
- if (unlikely(ioflags & IO_ISDIRECT)) {
+ if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((pos & target->bt_smask) || (size & target->bt_smask)) {
+ /* DIO must be aligned to device logical sector size */
+ if ((pos | size) & target->bt_logical_sectormask) {
if (pos == i_size_read(inode))
return 0;
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -289,26 +284,36 @@ xfs_file_aio_read(
* proceeed concurrently without serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
+ if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
- ret = -filemap_write_and_wait_range(
+ ret = filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
- pos, -1);
+ pos, pos + size - 1);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + size - 1) >> PAGE_CACHE_SHIFT);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
trace_xfs_file_read(ip, size, pos, ioflags);
- ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
+ ret = generic_file_read_iter(iocb, to);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
@@ -331,7 +336,7 @@ xfs_file_splice_read(
XFS_STATS_INC(xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
@@ -349,47 +354,6 @@ xfs_file_splice_read(
}
/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t count,
- unsigned int flags)
-{
- struct inode *inode = outfilp->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- int ioflags = 0;
- ssize_t ret;
-
- XFS_STATS_INC(xs_write_calls);
-
- if (outfilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
-}
-
-/*
* This routine is called to handle zeroing any space in the last block of the
* file that is beyond the EOF. We do this since the size is being increased
* without writing anything to that block and we don't want to read the
@@ -571,7 +535,7 @@ restart:
xfs_rw_ilock(ip, *iolock);
goto restart;
}
- error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode));
if (error)
return error;
}
@@ -624,10 +588,7 @@ restart:
STATIC ssize_t
xfs_file_dio_aio_write(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -635,15 +596,18 @@ xfs_file_dio_aio_write(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
- size_t count = ocount;
int unaligned_io = 0;
int iolock;
+ size_t count = iov_iter_count(from);
+ loff_t pos = iocb->ki_pos;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((pos & target->bt_smask) || (count & target->bt_smask))
- return -XFS_ERROR(EINVAL);
+ /* DIO must be aligned to device logical sector size */
+ if ((pos | count) & target->bt_logical_sectormask)
+ return -EINVAL;
+ /* "unaligned" here means not aligned to a filesystem block */
if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
unaligned_io = 1;
@@ -674,13 +638,23 @@ xfs_file_dio_aio_write(
ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
goto out;
+ iov_iter_truncate(from, count);
if (mapping->nrpages) {
- ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
- pos, -1);
+ ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ pos, pos + count - 1);
if (ret)
goto out;
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count - 1) >> PAGE_CACHE_SHIFT);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
/*
@@ -695,8 +669,7 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
+ ret = generic_file_direct_write(iocb, from, pos);
out:
xfs_rw_iunlock(ip, iolock);
@@ -709,10 +682,7 @@ out:
STATIC ssize_t
xfs_file_buffered_aio_write(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -721,7 +691,8 @@ xfs_file_buffered_aio_write(
ssize_t ret;
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- size_t count = ocount;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
xfs_rw_ilock(ip, iolock);
@@ -729,22 +700,37 @@ xfs_file_buffered_aio_write(
if (ret)
goto out;
+ iov_iter_truncate(from, count);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_buffered_write(iocb, iovp, nr_segs,
- pos, &iocb->ki_pos, count, 0);
+ ret = generic_perform_write(file, from, pos);
+ if (likely(ret >= 0))
+ iocb->ki_pos = pos + ret;
/*
- * If we just got an ENOSPC, try to write back all dirty inodes to
- * convert delalloc space to free up some of the excess reserved
- * metadata space.
+ * If we hit a space limit, try to free up some lingering preallocated
+ * space before returning an error. In the case of ENOSPC, first try to
+ * write back all dirty inodes to free up some of the excess reserved
+ * metadata space. This reduces the chances that the eofblocks scan
+ * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
+ * also behaves as a filter to prevent too many eofblocks scans from
+ * running at the same time.
*/
- if (ret == -ENOSPC && !enospc) {
+ if (ret == -EDQUOT && !enospc) {
+ enospc = xfs_inode_free_quota_eofblocks(ip);
+ if (enospc)
+ goto write_retry;
+ } else if (ret == -ENOSPC && !enospc) {
+ struct xfs_eofblocks eofb = {0};
+
enospc = 1;
xfs_flush_inodes(ip->i_mount);
+ eofb.eof_scan_owner = ip->i_ino; /* for locking */
+ eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
goto write_retry;
}
@@ -755,40 +741,29 @@ out:
}
STATIC ssize_t
-xfs_file_aio_write(
+xfs_file_write_iter(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
- size_t ocount = 0;
+ size_t ocount = iov_iter_count(from);
XFS_STATS_INC(xs_write_calls);
- BUG_ON(iocb->ki_pos != pos);
-
- ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
- if (ret)
- return ret;
-
if (ocount == 0)
return 0;
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- ret = -EIO;
- goto out;
- }
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
if (unlikely(file->f_flags & O_DIRECT))
- ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
+ ret = xfs_file_dio_aio_write(iocb, from);
else
- ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
- ocount);
+ ret = xfs_file_buffered_aio_write(iocb, from);
if (ret > 0) {
ssize_t err;
@@ -796,12 +771,10 @@ xfs_file_aio_write(
XFS_STATS_ADD(xs_write_bytes, ret);
/* Handle various SYNC-type writes */
- err = generic_write_sync(file, pos, ret);
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
if (err < 0)
ret = err;
}
-
-out:
return ret;
}
@@ -820,7 +793,8 @@ xfs_file_fallocate(
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -828,17 +802,42 @@ xfs_file_fallocate(
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
+ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+ if (offset & blksize_mask || len & blksize_mask) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * There is no need to overlap collapse range with EOF,
+ * in which case it is effectively a truncate operation
+ */
+ if (offset + len >= i_size_read(inode)) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
+ new_size = i_size_read(inode) - len;
+
+ error = xfs_collapse_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
} else {
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
- error = -inode_newsize_ok(inode, new_size);
+ error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ if (mode & FALLOC_FL_ZERO_RANGE)
+ error = xfs_zero_file_space(ip, offset, len);
+ else
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
@@ -856,7 +855,7 @@ xfs_file_fallocate(
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;
- if (!(mode & FALLOC_FL_PUNCH_HOLE))
+ if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -879,7 +878,7 @@ xfs_file_fallocate(
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
+ return error;
}
@@ -912,9 +911,9 @@ xfs_dir_open(
* If there are any blocks, read-ahead block 0 as we're almost
* certain to have the next operation be a read there.
*/
- mode = xfs_ilock_map_shared(ip);
+ mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- xfs_dir3_data_readahead(NULL, ip, 0, -1);
+ xfs_dir3_data_readahead(ip, 0, -1);
xfs_iunlock(ip, mode);
return 0;
}
@@ -924,7 +923,7 @@ xfs_file_release(
struct inode *inode,
struct file *filp)
{
- return -xfs_release(XFS_I(inode));
+ return xfs_release(XFS_I(inode));
}
STATIC int
@@ -953,7 +952,7 @@ xfs_file_readdir(
error = xfs_readdir(ip, ctx, bufsize);
if (error)
- return -error;
+ return error;
return 0;
}
@@ -1215,11 +1214,11 @@ xfs_seek_data(
uint lock;
int error;
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
isize = i_size_read(inode);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1241,7 +1240,7 @@ xfs_seek_data(
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1272,7 +1271,7 @@ xfs_seek_data(
* we are reading after EOF if nothing in map[1].
*/
if (nmap == 1) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1285,7 +1284,7 @@ xfs_seek_data(
fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
start = XFS_FSB_TO_B(mp, fsbno);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
}
@@ -1294,10 +1293,10 @@ out:
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
if (error)
- return -error;
+ return error;
return offset;
}
@@ -1317,13 +1316,13 @@ xfs_seek_hole(
int error;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
isize = i_size_read(inode);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1342,7 +1341,7 @@ xfs_seek_hole(
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1402,10 +1401,10 @@ out:
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
if (error)
- return -error;
+ return error;
return offset;
}
@@ -1431,12 +1430,12 @@ xfs_file_llseek(
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = xfs_file_aio_read,
- .aio_write = xfs_file_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = xfs_file_read_iter,
+ .write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
- .splice_write = xfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
@@ -1462,6 +1461,7 @@ const struct file_operations xfs_dir_file_operations = {
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 12b6e7701985..e92730c1d3ca 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * Copyright (c) 2014 Christoph Hellwig.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
@@ -32,100 +33,20 @@
#include "xfs_filestream.h"
#include "xfs_trace.h"
-#ifdef XFS_FILESTREAMS_TRACE
-
-ktrace_t *xfs_filestreams_trace_buf;
-
-STATIC void
-xfs_filestreams_trace(
- xfs_mount_t *mp, /* mount point */
- int type, /* type of trace */
- const char *func, /* source function */
- int line, /* source line number */
- __psunsigned_t arg0,
- __psunsigned_t arg1,
- __psunsigned_t arg2,
- __psunsigned_t arg3,
- __psunsigned_t arg4,
- __psunsigned_t arg5)
-{
- ktrace_enter(xfs_filestreams_trace_buf,
- (void *)(__psint_t)(type | (line << 16)),
- (void *)func,
- (void *)(__psunsigned_t)current_pid(),
- (void *)mp,
- (void *)(__psunsigned_t)arg0,
- (void *)(__psunsigned_t)arg1,
- (void *)(__psunsigned_t)arg2,
- (void *)(__psunsigned_t)arg3,
- (void *)(__psunsigned_t)arg4,
- (void *)(__psunsigned_t)arg5,
- NULL, NULL, NULL, NULL, NULL, NULL);
-}
-
-#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
-#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
-#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
-#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
-#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
-#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
-#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
- xfs_filestreams_trace(mp, t, __func__, __LINE__, \
- (__psunsigned_t)a0, (__psunsigned_t)a1, \
- (__psunsigned_t)a2, (__psunsigned_t)a3, \
- (__psunsigned_t)a4, (__psunsigned_t)a5)
-
-#define TRACE_AG_SCAN(mp, ag, ag2) \
- TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
-#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
- TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
- TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
- cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
- TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt) \
- TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
- TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
- TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
- TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag) \
- TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
-
-
-#else
-#define TRACE_AG_SCAN(mp, ag, ag2)
-#define TRACE_AG_PICK1(mp, max_ag, maxfree)
-#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
-#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
-#define TRACE_FREE(mp, ip, pip, ag, cnt)
-#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
-#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
-#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
-#define TRACE_ORPHAN(mp, ip, ag)
-#endif
-
-static kmem_zone_t *item_zone;
+struct xfs_fstrm_item {
+ struct xfs_mru_cache_elem mru;
+ struct xfs_inode *ip;
+ xfs_agnumber_t ag; /* AG in use for this directory */
+};
-/*
- * Structure for associating a file or a directory with an allocation group.
- * The parent directory pointer is only needed for files, but since there will
- * generally be vastly more files than directories in the cache, using the same
- * data structure simplifies the code with very little memory overhead.
- */
-typedef struct fstrm_item
-{
- xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
- xfs_inode_t *ip; /* inode self-pointer. */
- xfs_inode_t *pip; /* Parent directory inode pointer. */
-} fstrm_item_t;
+enum xfs_fstrm_alloc {
+ XFS_PICK_USERDATA = 1,
+ XFS_PICK_LOWSPACE = 2,
+};
/*
* Allocation group filestream associations are tracked with per-ag atomic
- * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * counters. These counters allow xfs_filestream_pick_ag() to tell whether a
* particular AG already has active filestreams associated with it. The mount
* point's m_peraglock is used to protect these counters from per-ag array
* re-allocation during a growfs operation. When xfs_growfs_data_private() is
@@ -160,7 +81,7 @@ typedef struct fstrm_item
* the cache that reference per-ag array elements that have since been
* reallocated.
*/
-static int
+int
xfs_filestream_peek_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
@@ -200,23 +121,40 @@ xfs_filestream_put_ag(
xfs_perag_put(pag);
}
+static void
+xfs_fstrm_free_func(
+ struct xfs_mru_cache_elem *mru)
+{
+ struct xfs_fstrm_item *item =
+ container_of(mru, struct xfs_fstrm_item, mru);
+
+ xfs_filestream_put_ag(item->ip->i_mount, item->ag);
+
+ trace_xfs_filestream_free(item->ip, item->ag);
+
+ kmem_free(item);
+}
+
/*
* Scan the AGs starting at startag looking for an AG that isn't in use and has
* at least minlen blocks free.
*/
static int
-_xfs_filestream_pick_ag(
- xfs_mount_t *mp,
- xfs_agnumber_t startag,
- xfs_agnumber_t *agp,
- int flags,
- xfs_extlen_t minlen)
+xfs_filestream_pick_ag(
+ struct xfs_inode *ip,
+ xfs_agnumber_t startag,
+ xfs_agnumber_t *agp,
+ int flags,
+ xfs_extlen_t minlen)
{
- int streams, max_streams;
- int err, trylock, nscan;
- xfs_extlen_t longest, free, minfree, maxfree = 0;
- xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
- struct xfs_perag *pag;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_fstrm_item *item;
+ struct xfs_perag *pag;
+ xfs_extlen_t longest, free = 0, minfree, maxfree = 0;
+ xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
+ int err, trylock, nscan;
+
+ ASSERT(S_ISDIR(ip->i_d.di_mode));
/* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50;
@@ -228,8 +166,9 @@ _xfs_filestream_pick_ag(
trylock = XFS_ALLOC_FLAG_TRYLOCK;
for (nscan = 0; 1; nscan++) {
+ trace_xfs_filestream_scan(ip, ag);
+
pag = xfs_perag_get(mp, ag);
- TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
if (!pag->pagf_init) {
err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
@@ -246,7 +185,6 @@ _xfs_filestream_pick_ag(
/* Keep track of the AG with the most free blocks. */
if (pag->pagf_freeblks > maxfree) {
maxfree = pag->pagf_freeblks;
- max_streams = atomic_read(&pag->pagf_fstrms);
max_ag = ag;
}
@@ -269,7 +207,6 @@ _xfs_filestream_pick_ag(
/* Break out, retaining the reference on the AG. */
free = pag->pagf_freeblks;
- streams = atomic_read(&pag->pagf_fstrms);
xfs_perag_put(pag);
*agp = ag;
break;
@@ -305,317 +242,98 @@ next_ag:
*/
if (max_ag != NULLAGNUMBER) {
xfs_filestream_get_ag(mp, max_ag);
- TRACE_AG_PICK1(mp, max_ag, maxfree);
- streams = max_streams;
free = maxfree;
*agp = max_ag;
break;
}
/* take AG 0 if none matched */
- TRACE_AG_PICK1(mp, max_ag, maxfree);
+ trace_xfs_filestream_pick(ip, *agp, free, nscan);
*agp = 0;
return 0;
}
- TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
-
- return 0;
-}
+ trace_xfs_filestream_pick(ip, *agp, free, nscan);
-/*
- * Set the allocation group number for a file or a directory, updating inode
- * references and per-AG references as appropriate.
- */
-static int
-_xfs_filestream_update_ag(
- xfs_inode_t *ip,
- xfs_inode_t *pip,
- xfs_agnumber_t ag)
-{
- int err = 0;
- xfs_mount_t *mp;
- xfs_mru_cache_t *cache;
- fstrm_item_t *item;
- xfs_agnumber_t old_ag;
- xfs_inode_t *old_pip;
-
- /*
- * Either ip is a regular file and pip is a directory, or ip is a
- * directory and pip is NULL.
- */
- ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
- S_ISDIR(pip->i_d.di_mode)) ||
- (S_ISDIR(ip->i_d.di_mode) && !pip)));
-
- mp = ip->i_mount;
- cache = mp->m_filestream;
-
- item = xfs_mru_cache_lookup(cache, ip->i_ino);
- if (item) {
- ASSERT(item->ip == ip);
- old_ag = item->ag;
- item->ag = ag;
- old_pip = item->pip;
- item->pip = pip;
- xfs_mru_cache_done(cache);
-
- /*
- * If the AG has changed, drop the old ref and take a new one,
- * effectively transferring the reference from old to new AG.
- */
- if (ag != old_ag) {
- xfs_filestream_put_ag(mp, old_ag);
- xfs_filestream_get_ag(mp, ag);
- }
-
- /*
- * If ip is a file and its pip has changed, drop the old ref and
- * take a new one.
- */
- if (pip && pip != old_pip) {
- IRELE(old_pip);
- IHOLD(pip);
- }
-
- TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
- ag, xfs_filestream_peek_ag(mp, ag));
+ if (*agp == NULLAGNUMBER)
return 0;
- }
- item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+ err = -ENOMEM;
+ item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
if (!item)
- return ENOMEM;
+ goto out_put_ag;
- item->ag = ag;
+ item->ag = *agp;
item->ip = ip;
- item->pip = pip;
- err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+ err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
if (err) {
- kmem_zone_free(item_zone, item);
- return err;
+ if (err == -EEXIST)
+ err = 0;
+ goto out_free_item;
}
- /* Take a reference on the AG. */
- xfs_filestream_get_ag(mp, ag);
-
- /*
- * Take a reference on the inode itself regardless of whether it's a
- * regular file or a directory.
- */
- IHOLD(ip);
-
- /*
- * In the case of a regular file, take a reference on the parent inode
- * as well to ensure it remains in-core.
- */
- if (pip)
- IHOLD(pip);
-
- TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
- ag, xfs_filestream_peek_ag(mp, ag));
-
return 0;
-}
-
-/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
-STATIC void
-xfs_fstrm_free_func(
- unsigned long ino,
- void *data)
-{
- fstrm_item_t *item = (fstrm_item_t *)data;
- xfs_inode_t *ip = item->ip;
-
- ASSERT(ip->i_ino == ino);
-
- xfs_iflags_clear(ip, XFS_IFILESTREAM);
-
- /* Drop the reference taken on the AG when the item was added. */
- xfs_filestream_put_ag(ip->i_mount, item->ag);
-
- TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
- xfs_filestream_peek_ag(ip->i_mount, item->ag));
-
- /*
- * _xfs_filestream_update_ag() always takes a reference on the inode
- * itself, whether it's a file or a directory. Release it here.
- * This can result in the inode being freed and so we must
- * not hold any inode locks when freeing filesstreams objects
- * otherwise we can deadlock here.
- */
- IRELE(ip);
-
- /*
- * In the case of a regular file, _xfs_filestream_update_ag() also
- * takes a ref on the parent inode to keep it in-core. Release that
- * too.
- */
- if (item->pip)
- IRELE(item->pip);
-
- /* Finally, free the memory allocated for the item. */
- kmem_zone_free(item_zone, item);
-}
-
-/*
- * xfs_filestream_init() is called at xfs initialisation time to set up the
- * memory zone that will be used for filestream data structure allocation.
- */
-int
-xfs_filestream_init(void)
-{
- item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
- if (!item_zone)
- return -ENOMEM;
-
- return 0;
-}
-
-/*
- * xfs_filestream_uninit() is called at xfs termination time to destroy the
- * memory zone that was used for filestream data structure allocation.
- */
-void
-xfs_filestream_uninit(void)
-{
- kmem_zone_destroy(item_zone);
-}
-
-/*
- * xfs_filestream_mount() is called when a file system is mounted with the
- * filestream option. It is responsible for allocating the data structures
- * needed to track the new file system's file streams.
- */
-int
-xfs_filestream_mount(
- xfs_mount_t *mp)
-{
- int err;
- unsigned int lifetime, grp_count;
-
- /*
- * The filestream timer tunable is currently fixed within the range of
- * one second to four minutes, with five seconds being the default. The
- * group count is somewhat arbitrary, but it'd be nice to adhere to the
- * timer tunable to within about 10 percent. This requires at least 10
- * groups.
- */
- lifetime = xfs_fstrm_centisecs * 10;
- grp_count = 10;
-
- err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
- xfs_fstrm_free_func);
+out_free_item:
+ kmem_free(item);
+out_put_ag:
+ xfs_filestream_put_ag(mp, *agp);
return err;
}
-/*
- * xfs_filestream_unmount() is called when a file system that was mounted with
- * the filestream option is unmounted. It drains the data structures created
- * to track the file system's file streams and frees all the memory that was
- * allocated.
- */
-void
-xfs_filestream_unmount(
- xfs_mount_t *mp)
+static struct xfs_inode *
+xfs_filestream_get_parent(
+ struct xfs_inode *ip)
{
- xfs_mru_cache_destroy(mp->m_filestream);
-}
+ struct inode *inode = VFS_I(ip), *dir = NULL;
+ struct dentry *dentry, *parent;
-/*
- * Return the AG of the filestream the file or directory belongs to, or
- * NULLAGNUMBER otherwise.
- */
-xfs_agnumber_t
-xfs_filestream_lookup_ag(
- xfs_inode_t *ip)
-{
- xfs_mru_cache_t *cache;
- fstrm_item_t *item;
- xfs_agnumber_t ag;
- int ref;
-
- if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
- ASSERT(0);
- return NULLAGNUMBER;
- }
+ dentry = d_find_alias(inode);
+ if (!dentry)
+ goto out;
- cache = ip->i_mount->m_filestream;
- item = xfs_mru_cache_lookup(cache, ip->i_ino);
- if (!item) {
- TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
- return NULLAGNUMBER;
- }
+ parent = dget_parent(dentry);
+ if (!parent)
+ goto out_dput;
- ASSERT(ip == item->ip);
- ag = item->ag;
- ref = xfs_filestream_peek_ag(ip->i_mount, ag);
- xfs_mru_cache_done(cache);
+ dir = igrab(parent->d_inode);
+ dput(parent);
- TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
- return ag;
+out_dput:
+ dput(dentry);
+out:
+ return dir ? XFS_I(dir) : NULL;
}
/*
- * xfs_filestream_associate() should only be called to associate a regular file
- * with its parent directory. Calling it with a child directory isn't
- * appropriate because filestreams don't apply to entire directory hierarchies.
- * Creating a file in a child directory of an existing filestream directory
- * starts a new filestream with its own allocation group association.
+ * Find the right allocation group for a file, either by finding an
+ * existing file stream or creating a new one.
*
- * Returns < 0 on error, 0 if successful association occurred, > 0 if
- * we failed to get an association because of locking issues.
+ * Returns NULLAGNUMBER in case of an error.
*/
-int
-xfs_filestream_associate(
- xfs_inode_t *pip,
- xfs_inode_t *ip)
+xfs_agnumber_t
+xfs_filestream_lookup_ag(
+ struct xfs_inode *ip)
{
- xfs_mount_t *mp;
- xfs_mru_cache_t *cache;
- fstrm_item_t *item;
- xfs_agnumber_t ag, rotorstep, startag;
- int err = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_inode *pip = NULL;
+ xfs_agnumber_t startag, ag = NULLAGNUMBER;
+ struct xfs_mru_cache_elem *mru;
- ASSERT(S_ISDIR(pip->i_d.di_mode));
ASSERT(S_ISREG(ip->i_d.di_mode));
- if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
- return -EINVAL;
- mp = pip->i_mount;
- cache = mp->m_filestream;
+ pip = xfs_filestream_get_parent(ip);
+ if (!pip)
+ goto out;
- /*
- * We have a problem, Houston.
- *
- * Taking the iolock here violates inode locking order - we already
- * hold the ilock. Hence if we block getting this lock we may never
- * wake. Unfortunately, that means if we can't get the lock, we're
- * screwed in terms of getting a stream association - we can't spin
- * waiting for the lock because someone else is waiting on the lock we
- * hold and we cannot drop that as we are in a transaction here.
- *
- * Lucky for us, this inversion is not a problem because it's a
- * directory inode that we are trying to lock here.
- *
- * So, if we can't get the iolock without sleeping then just give up
- */
- if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
- return 1;
-
- /* If the parent directory is already in the cache, use its AG. */
- item = xfs_mru_cache_lookup(cache, pip->i_ino);
- if (item) {
- ASSERT(item->ip == pip);
- ag = item->ag;
- xfs_mru_cache_done(cache);
-
- TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
- err = _xfs_filestream_update_ag(ip, pip, ag);
+ mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
+ if (mru) {
+ ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
+ xfs_mru_cache_done(mp->m_filestream);
- goto exit;
+ trace_xfs_filestream_lookup(ip, ag);
+ goto out;
}
/*
@@ -623,202 +341,94 @@ xfs_filestream_associate(
* use the directory inode's AG.
*/
if (mp->m_flags & XFS_MOUNT_32BITINODES) {
- rotorstep = xfs_rotorstep;
+ xfs_agnumber_t rotorstep = xfs_rotorstep;
startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep);
} else
startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
- /* Pick a new AG for the parent inode starting at startag. */
- err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
- if (err || ag == NULLAGNUMBER)
- goto exit_did_pick;
-
- /* Associate the parent inode with the AG. */
- err = _xfs_filestream_update_ag(pip, NULL, ag);
- if (err)
- goto exit_did_pick;
-
- /* Associate the file inode with the AG. */
- err = _xfs_filestream_update_ag(ip, pip, ag);
- if (err)
- goto exit_did_pick;
-
- TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
-
-exit_did_pick:
- /*
- * If _xfs_filestream_pick_ag() returned a valid AG, remove the
- * reference it took on it, since the file and directory will have taken
- * their own now if they were successfully cached.
- */
- if (ag != NULLAGNUMBER)
- xfs_filestream_put_ag(mp, ag);
-
-exit:
- xfs_iunlock(pip, XFS_IOLOCK_EXCL);
- return -err;
+ if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
+ ag = NULLAGNUMBER;
+out:
+ IRELE(pip);
+ return ag;
}
/*
- * Pick a new allocation group for the current file and its file stream. This
- * function is called by xfs_bmap_filestreams() with the mount point's per-ag
- * lock held.
+ * Pick a new allocation group for the current file and its file stream.
+ *
+ * This is called when the allocator can't find a suitable extent in the
+ * current AG, and we have to move the stream into a new AG with more space.
*/
int
xfs_filestream_new_ag(
struct xfs_bmalloca *ap,
xfs_agnumber_t *agp)
{
- int flags, err;
- xfs_inode_t *ip, *pip = NULL;
- xfs_mount_t *mp;
- xfs_mru_cache_t *cache;
- xfs_extlen_t minlen;
- fstrm_item_t *dir, *file;
- xfs_agnumber_t ag = NULLAGNUMBER;
-
- ip = ap->ip;
- mp = ip->i_mount;
- cache = mp->m_filestream;
- minlen = ap->length;
- *agp = NULLAGNUMBER;
+ struct xfs_inode *ip = ap->ip, *pip;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_extlen_t minlen = ap->length;
+ xfs_agnumber_t startag = 0;
+ int flags, err = 0;
+ struct xfs_mru_cache_elem *mru;
- /*
- * Look for the file in the cache, removing it if it's found. Doing
- * this allows it to be held across the dir lookup that follows.
- */
- file = xfs_mru_cache_remove(cache, ip->i_ino);
- if (file) {
- ASSERT(ip == file->ip);
-
- /* Save the file's parent inode and old AG number for later. */
- pip = file->pip;
- ag = file->ag;
-
- /* Look for the file's directory in the cache. */
- dir = xfs_mru_cache_lookup(cache, pip->i_ino);
- if (dir) {
- ASSERT(pip == dir->ip);
-
- /*
- * If the directory has already moved on to a new AG,
- * use that AG as the new AG for the file. Don't
- * forget to twiddle the AG refcounts to match the
- * movement.
- */
- if (dir->ag != file->ag) {
- xfs_filestream_put_ag(mp, file->ag);
- xfs_filestream_get_ag(mp, dir->ag);
- *agp = file->ag = dir->ag;
- }
-
- xfs_mru_cache_done(cache);
- }
+ *agp = NULLAGNUMBER;
- /*
- * Put the file back in the cache. If this fails, the free
- * function needs to be called to tidy up in the same way as if
- * the item had simply expired from the cache.
- */
- err = xfs_mru_cache_insert(cache, ip->i_ino, file);
- if (err) {
- xfs_fstrm_free_func(ip->i_ino, file);
- return err;
- }
+ pip = xfs_filestream_get_parent(ip);
+ if (!pip)
+ goto exit;
- /*
- * If the file's AG was moved to the directory's new AG, there's
- * nothing more to be done.
- */
- if (*agp != NULLAGNUMBER) {
- TRACE_MOVEAG(mp, ip, pip,
- ag, xfs_filestream_peek_ag(mp, ag),
- *agp, xfs_filestream_peek_ag(mp, *agp));
- return 0;
- }
+ mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
+ if (mru) {
+ struct xfs_fstrm_item *item =
+ container_of(mru, struct xfs_fstrm_item, mru);
+ startag = (item->ag + 1) % mp->m_sb.sb_agcount;
}
- /*
- * If the file's parent directory is known, take its iolock in exclusive
- * mode to prevent two sibling files from racing each other to migrate
- * themselves and their parent to different AGs.
- *
- * Note that we lock the parent directory iolock inside the child
- * iolock here. That's fine as we never hold both parent and child
- * iolock in any other place. This is different from the ilock,
- * which requires locking of the child after the parent for namespace
- * operations.
- */
- if (pip)
- xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
-
- /*
- * A new AG needs to be found for the file. If the file's parent
- * directory is also known, it will be moved to the new AG as well to
- * ensure that files created inside it in future use the new AG.
- */
- ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
(ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
- err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
- if (err || *agp == NULLAGNUMBER)
- goto exit;
+ err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
/*
- * If the file wasn't found in the file cache, then its parent directory
- * inode isn't known. For this to have happened, the file must either
- * be pre-existing, or it was created long enough ago that its cache
- * entry has expired. This isn't the sort of usage that the filestreams
- * allocator is trying to optimise, so there's no point trying to track
- * its new AG somehow in the filestream data structures.
+ * Only free the item here so we skip over the old AG earlier.
*/
- if (!pip) {
- TRACE_ORPHAN(mp, ip, *agp);
- goto exit;
- }
-
- /* Associate the parent inode with the AG. */
- err = _xfs_filestream_update_ag(pip, NULL, *agp);
- if (err)
- goto exit;
-
- /* Associate the file inode with the AG. */
- err = _xfs_filestream_update_ag(ip, pip, *agp);
- if (err)
- goto exit;
-
- TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
- *agp, xfs_filestream_peek_ag(mp, *agp));
+ if (mru)
+ xfs_fstrm_free_func(mru);
+ IRELE(pip);
exit:
- /*
- * If _xfs_filestream_pick_ag() returned a valid AG, remove the
- * reference it took on it, since the file and directory will have taken
- * their own now if they were successfully cached.
- */
- if (*agp != NULLAGNUMBER)
- xfs_filestream_put_ag(mp, *agp);
- else
+ if (*agp == NULLAGNUMBER)
*agp = 0;
-
- if (pip)
- xfs_iunlock(pip, XFS_IOLOCK_EXCL);
-
return err;
}
-/*
- * Remove an association between an inode and a filestream object.
- * Typically this is done on last close of an unlinked file.
- */
void
xfs_filestream_deassociate(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
+ xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
+}
+
+int
+xfs_filestream_mount(
+ xfs_mount_t *mp)
+{
+ /*
+ * The filestream timer tunable is currently fixed within the range of
+ * one second to four minutes, with five seconds being the default. The
+ * group count is somewhat arbitrary, but it'd be nice to adhere to the
+ * timer tunable to within about 10 percent. This requires at least 10
+ * groups.
+ */
+ return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
+ 10, xfs_fstrm_free_func);
+}
- xfs_mru_cache_delete(cache, ip->i_ino);
+void
+xfs_filestream_unmount(
+ xfs_mount_t *mp)
+{
+ xfs_mru_cache_destroy(mp->m_filestream);
}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 6d61dbee8564..2ef43406e53b 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -20,50 +20,20 @@
struct xfs_mount;
struct xfs_inode;
-struct xfs_perag;
struct xfs_bmalloca;
-#ifdef XFS_FILESTREAMS_TRACE
-#define XFS_FSTRM_KTRACE_INFO 1
-#define XFS_FSTRM_KTRACE_AGSCAN 2
-#define XFS_FSTRM_KTRACE_AGPICK1 3
-#define XFS_FSTRM_KTRACE_AGPICK2 4
-#define XFS_FSTRM_KTRACE_UPDATE 5
-#define XFS_FSTRM_KTRACE_FREE 6
-#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
-#define XFS_FSTRM_KTRACE_ASSOCIATE 8
-#define XFS_FSTRM_KTRACE_MOVEAG 9
-#define XFS_FSTRM_KTRACE_ORPHAN 10
-
-#define XFS_FSTRM_KTRACE_SIZE 16384
-extern ktrace_t *xfs_filestreams_trace_buf;
-
-#endif
-
-/* allocation selection flags */
-typedef enum xfs_fstrm_alloc {
- XFS_PICK_USERDATA = 1,
- XFS_PICK_LOWSPACE = 2,
-} xfs_fstrm_alloc_t;
-
-/* prototypes for filestream.c */
-int xfs_filestream_init(void);
-void xfs_filestream_uninit(void);
int xfs_filestream_mount(struct xfs_mount *mp);
void xfs_filestream_unmount(struct xfs_mount *mp);
-xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
-int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
void xfs_filestream_deassociate(struct xfs_inode *ip);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
-
-/* filestreams for the inode? */
static inline int
xfs_inode_is_filestream(
struct xfs_inode *ip)
{
return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
- xfs_iflags_test(ip, XFS_IFILESTREAM) ||
(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c5fc116dfaa3..18dc721ca19f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
+#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
/*
* Minimum and maximum sizes need for growth checks.
@@ -254,8 +255,8 @@ typedef struct xfs_fsop_resblks {
((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
/* Used for sanity checks on superblock */
-#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
-#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \
+#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \
(s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
/*
@@ -374,6 +375,9 @@ struct xfs_fs_eofblocks {
#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
+#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm;
+ * kernel only, not included in
+ * valid mask */
#define XFS_EOF_FLAGS_VALID \
(XFS_EOF_FLAGS_SYNC | \
XFS_EOF_FLAGS_UID | \
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 02fb943cbf22..f91de1ef05e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,6 +24,8 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
@@ -74,23 +76,18 @@ xfs_fs_geometry(
}
if (new_version >= 3) {
geo->version = XFS_FSOP_GEOM_VERSION;
- geo->flags =
+ geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
+ XFS_FSOP_GEOM_FLAGS_DIRV2 |
(xfs_sb_version_hasattr(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
- (xfs_sb_version_hasnlink(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_NLINK : 0) |
(xfs_sb_version_hasquota(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
(xfs_sb_version_hasalign(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
(xfs_sb_version_hasdalign(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
- (xfs_sb_version_hasshared(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_SHARED : 0) |
(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
- (xfs_sb_version_hasdirv2(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
(xfs_sb_version_hassector(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
(xfs_sb_version_hasasciici(&mp->m_sb) ?
@@ -104,11 +101,13 @@ xfs_fs_geometry(
(xfs_sb_version_hascrc(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
(xfs_sb_version_hasftype(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FTYPE : 0);
+ XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
+ (xfs_sb_version_hasfinobt(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
- geo->dirblocksize = mp->m_dirblksize;
+ geo->dirblocksize = mp->m_dir_geo->blksize;
}
if (new_version >= 4) {
geo->flags |=
@@ -169,7 +168,7 @@ xfs_growfs_data_private(
nb = in->newblocks;
pct = in->imaxpct;
if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
return error;
dpct = pct - mp->m_sb.sb_imax_pct;
@@ -177,7 +176,7 @@ xfs_growfs_data_private(
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL);
if (!bp)
- return EIO;
+ return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
@@ -192,7 +191,7 @@ xfs_growfs_data_private(
nagcount--;
nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
if (nb < mp->m_sb.sb_dblocks)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
new = nb - mp->m_sb.sb_dblocks;
oagcount = mp->m_sb.sb_agcount;
@@ -230,7 +229,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agf_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -271,7 +270,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agfl_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -299,7 +298,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agi_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -316,6 +315,10 @@ xfs_growfs_data_private(
agi->agi_dirino = cpu_to_be32(NULLAGINO);
if (xfs_sb_version_hascrc(&mp->m_sb))
uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
+ agi->agi_free_level = cpu_to_be32(1);
+ }
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
@@ -333,7 +336,7 @@ xfs_growfs_data_private(
&xfs_allocbt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -362,7 +365,7 @@ xfs_growfs_data_private(
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_allocbt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -392,7 +395,7 @@ xfs_growfs_data_private(
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_inobt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -407,6 +410,34 @@ xfs_growfs_data_private(
xfs_buf_relse(bp);
if (error)
goto error0;
+
+ /*
+ * FINO btree root block
+ */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ bp = xfs_growfs_get_hdr_buf(mp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+ BTOBB(mp->m_sb.sb_blocksize), 0,
+ &xfs_inobt_buf_ops);
+ if (!bp) {
+ error = -ENOMEM;
+ goto error0;
+ }
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
+ 0, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
+ 0, agno, 0);
+
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
+ goto error0;
+ }
+
}
xfs_trans_agblocks_delta(tp, nfree);
/*
@@ -500,7 +531,7 @@ xfs_growfs_data_private(
bp->b_ops = &xfs_sb_buf_ops;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
} else
- error = ENOMEM;
+ error = -ENOMEM;
}
/*
@@ -545,17 +576,17 @@ xfs_growfs_log_private(
nb = in->newblocks;
if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (nb == mp->m_sb.sb_logblocks &&
in->isint == (mp->m_sb.sb_logstart != 0))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Moving the log is hard, need new interfaces to sync
* the log first, hold off all activity while moving it.
* Can have shorter or longer log in the same space,
* or transform internal to external log or vice versa.
*/
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
/*
@@ -573,9 +604,9 @@ xfs_growfs_data(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (!mutex_trylock(&mp->m_growlock))
- return XFS_ERROR(EWOULDBLOCK);
+ return -EWOULDBLOCK;
error = xfs_growfs_data_private(mp, in);
mutex_unlock(&mp->m_growlock);
return error;
@@ -589,9 +620,9 @@ xfs_growfs_log(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (!mutex_trylock(&mp->m_growlock))
- return XFS_ERROR(EWOULDBLOCK);
+ return -EWOULDBLOCK;
error = xfs_growfs_log_private(mp, in);
mutex_unlock(&mp->m_growlock);
return error;
@@ -643,7 +674,7 @@ xfs_reserve_blocks(
/* If inval is null, report current values and return */
if (inval == (__uint64_t *)NULL) {
if (!outval)
- return EINVAL;
+ return -EINVAL;
outval->resblks = mp->m_resblks;
outval->resblks_avail = mp->m_resblks_avail;
return 0;
@@ -726,7 +757,7 @@ out:
int error;
error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
fdblks_delta, 0);
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto retry;
}
return 0;
@@ -787,7 +818,7 @@ xfs_fs_goingdown(
SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
break;
default:
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 98d35244eecc..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_bmap_util.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -158,7 +161,7 @@ xfs_iget_cache_hit(
if (ip->i_ino != ino) {
trace_xfs_iget_skip(ip);
XFS_STATS_INC(xs_ig_frecycle);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -176,7 +179,7 @@ xfs_iget_cache_hit(
if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
trace_xfs_iget_skip(ip);
XFS_STATS_INC(xs_ig_frecycle);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -184,7 +187,7 @@ xfs_iget_cache_hit(
* If lookup is racing with unlink return an error immediately.
*/
if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
+ error = -ENOENT;
goto out_error;
}
@@ -206,7 +209,7 @@ xfs_iget_cache_hit(
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
- error = -inode_init_always(mp->m_super, inode);
+ error = inode_init_always(mp->m_super, inode);
if (error) {
/*
* Re-initializing the inode failed, and we are in deep
@@ -243,7 +246,7 @@ xfs_iget_cache_hit(
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
trace_xfs_iget_skip(ip);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -285,7 +288,7 @@ xfs_iget_cache_miss(
ip = xfs_inode_alloc(mp, ino);
if (!ip)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_iread(mp, tp, ip, flags);
if (error)
@@ -294,7 +297,7 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
+ error = -ENOENT;
goto out_destroy;
}
@@ -305,7 +308,7 @@ xfs_iget_cache_miss(
* recurse into the file system.
*/
if (radix_tree_preload(GFP_NOFS)) {
- error = EAGAIN;
+ error = -EAGAIN;
goto out_destroy;
}
@@ -341,7 +344,7 @@ xfs_iget_cache_miss(
if (unlikely(error)) {
WARN_ON(error != -EEXIST);
XFS_STATS_INC(xs_ig_dup);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_preload_end;
}
spin_unlock(&pag->pag_ici_lock);
@@ -408,7 +411,7 @@ xfs_iget(
/* reject inode numbers outside existing AGs */
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
- return EINVAL;
+ return -EINVAL;
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -445,7 +448,7 @@ again:
return 0;
out_error_or_again:
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
delay(1);
goto again;
}
@@ -489,26 +492,25 @@ xfs_inode_ag_walk_grab(
/* nothing to sync during shutdown */
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
/* If we can't grab the inode, it must on it's way to reclaim. */
if (!igrab(inode))
- return ENOENT;
+ return -ENOENT;
/* inode is valid */
return 0;
out_unlock_noent:
spin_unlock(&ip->i_flags_lock);
- return ENOENT;
+ return -ENOENT;
}
STATIC int
xfs_inode_ag_walk(
struct xfs_mount *mp,
struct xfs_perag *pag,
- int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags,
+ int (*execute)(struct xfs_inode *ip, int flags,
void *args),
int flags,
void *args,
@@ -582,18 +584,18 @@ restart:
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
- error = execute(batch[i], pag, flags, args);
+ error = execute(batch[i], flags, args);
IRELE(batch[i]);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
skipped++;
continue;
}
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
/* bail out if the filesystem is corrupted. */
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
cond_resched();
@@ -636,8 +638,7 @@ xfs_eofblocks_worker(
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags,
+ int (*execute)(struct xfs_inode *ip, int flags,
void *args),
int flags,
void *args)
@@ -654,18 +655,17 @@ xfs_inode_ag_iterator(
xfs_perag_put(pag);
if (error) {
last_error = error;
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
}
}
- return XFS_ERROR(last_error);
+ return last_error;
}
int
xfs_inode_ag_iterator_tag(
struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags,
+ int (*execute)(struct xfs_inode *ip, int flags,
void *args),
int flags,
void *args,
@@ -683,11 +683,11 @@ xfs_inode_ag_iterator_tag(
xfs_perag_put(pag);
if (error) {
last_error = error;
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
}
}
- return XFS_ERROR(last_error);
+ return last_error;
}
/*
@@ -947,7 +947,7 @@ restart:
* see the stale flag set on the inode.
*/
error = xfs_iflush(ip, &bp);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* backoff longer than in xfs_ifree_cluster */
delay(2);
@@ -1000,7 +1000,7 @@ out:
xfs_iflags_clear(ip, XFS_IRECLAIM);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
- * We could return EAGAIN here to make reclaim rescan the inode tree in
+ * We could return -EAGAIN here to make reclaim rescan the inode tree in
* a short while. However, this just burns CPU time scanning the tree
* waiting for IO to complete and the reclaim work never goes back to
* the idle state. Instead, return 0 to let the next scheduled
@@ -1103,7 +1103,7 @@ restart:
if (!batch[i])
continue;
error = xfs_reclaim_inode(batch[i], pag, flags);
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
@@ -1132,7 +1132,7 @@ restart:
trylock = 0;
goto restart;
}
- return XFS_ERROR(last_error);
+ return last_error;
}
int
@@ -1206,15 +1206,42 @@ xfs_inode_match_id(
return 1;
}
+/*
+ * A union-based inode filtering algorithm. Process the inode if any of the
+ * criteria match. This is for global/internal scans only.
+ */
+STATIC int
+xfs_inode_match_id_union(
+ struct xfs_inode *ip,
+ struct xfs_eofblocks *eofb)
+{
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
+ uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
+ gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
+ xfs_get_projid(ip) == eofb->eof_prid)
+ return 1;
+
+ return 0;
+}
+
STATIC int
xfs_inode_free_eofblocks(
struct xfs_inode *ip,
- struct xfs_perag *pag,
int flags,
void *args)
{
int ret;
struct xfs_eofblocks *eofb = args;
+ bool need_iolock = true;
+ int match;
+
+ ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
@@ -1232,19 +1259,31 @@ xfs_inode_free_eofblocks(
return 0;
if (eofb) {
- if (!xfs_inode_match_id(ip, eofb))
+ if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+ match = xfs_inode_match_id_union(ip, eofb);
+ else
+ match = xfs_inode_match_id(ip, eofb);
+ if (!match)
return 0;
/* skip the inode if the file size is too small */
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
+
+ /*
+ * A scan owner implies we already hold the iolock. Skip it in
+ * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+ * the possibility of EAGAIN being returned.
+ */
+ if (eofb->eof_scan_owner == ip->i_ino)
+ need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, true);
+ ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
/* don't revisit the inode if we're not waiting */
- if (ret == EAGAIN && !(flags & SYNC_WAIT))
+ if (ret == -EAGAIN && !(flags & SYNC_WAIT))
ret = 0;
return ret;
@@ -1264,6 +1303,55 @@ xfs_icache_free_eofblocks(
eofb, XFS_ICI_EOFBLOCKS_TAG);
}
+/*
+ * Run eofblocks scans on the quotas applicable to the inode. For inodes with
+ * multiple quotas, we don't know exactly which quota caused an allocation
+ * failure. We make a best effort by including each quota under low free space
+ * conditions (less than 1% free space) in the scan.
+ */
+int
+xfs_inode_free_quota_eofblocks(
+ struct xfs_inode *ip)
+{
+ int scan = 0;
+ struct xfs_eofblocks eofb = {0};
+ struct xfs_dquot *dq;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ /*
+ * Set the scan owner to avoid a potential livelock. Otherwise, the scan
+ * can repeatedly trylock on the inode we're currently processing. We
+ * run a sync scan to increase effectiveness and use the union filter to
+ * cover all applicable quotas in a single scan.
+ */
+ eofb.eof_scan_owner = ip->i_ino;
+ eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
+
+ if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_USER);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_uid = VFS_I(ip)->i_uid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_UID;
+ scan = 1;
+ }
+ }
+
+ if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_gid = VFS_I(ip)->i_gid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_GID;
+ scan = 1;
+ }
+ }
+
+ if (scan)
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+
+ return scan;
+}
+
void
xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9ed68bb750f5..46748b86b12f 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,6 +27,7 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
+ xfs_ino_t eof_scan_owner;
};
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -57,15 +58,14 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
+int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
void xfs_eofblocks_worker(struct work_struct *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
- int flags, void *args),
+ int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args);
int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
- int flags, void *args),
+ int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args, int tag);
static inline int
@@ -74,31 +74,32 @@ xfs_fs_eofblocks_from_user(
struct xfs_eofblocks *dst)
{
if (src->eof_version != XFS_EOFBLOCKS_VERSION)
- return EINVAL;
+ return -EINVAL;
if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
- return EINVAL;
+ return -EINVAL;
if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
memchr_inv(src->pad64, 0, sizeof(src->pad64)))
- return EINVAL;
+ return -EINVAL;
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
+ dst->eof_scan_owner = NULLFSINO;
dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
if (!uid_valid(dst->eof_uid))
- return EINVAL;
+ return -EINVAL;
}
dst->eof_gid = INVALID_GID;
if (src->eof_flags & XFS_EOF_FLAGS_GID) {
dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
if (!gid_valid(dst->eof_gid))
- return EINVAL;
+ return -EINVAL;
}
return 0;
}
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index d2eaccfa73f4..7e4549233251 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -28,6 +28,7 @@
#include "xfs_trans_priv.h"
#include "xfs_error.h"
#include "xfs_icreate_item.h"
+#include "xfs_log.h"
kmem_zone_t *xfs_icreate_zone; /* inode create item zone */
@@ -58,13 +59,14 @@ xfs_icreate_item_size(
STATIC void
xfs_icreate_item_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *log_vector)
+ struct xfs_log_vec *lv)
{
struct xfs_icreate_item *icp = ICR_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
- log_vector->i_addr = (xfs_caddr_t)&icp->ic_format;
- log_vector->i_len = sizeof(struct xfs_icreate_log);
- log_vector->i_type = XLOG_REG_TYPE_ICREATE;
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE,
+ &icp->ic_format,
+ sizeof(struct xfs_icreate_log));
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 001aa893ed59..fea3c92fb3f0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
#include "xfs_bmap_util.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_dinode.h"
#include "xfs_filestream.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
+STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+
/*
* helper function to extract extent size hint from inode
*/
@@ -77,48 +78,44 @@ xfs_get_extsz_hint(
}
/*
- * This is a wrapper routine around the xfs_ilock() routine used to centralize
- * some grungy code. It is used in places that wish to lock the inode solely
- * for reading the extents. The reason these places can't just call
- * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the
- * extents from disk for a file in b-tree format. If the inode is in b-tree
- * format, then we need to lock the inode exclusively until the extents are read
- * in. Locking it exclusively all the time would limit our parallelism
- * unnecessarily, though. What we do instead is check to see if the extents
- * have been read in yet, and only lock the inode exclusively if they have not.
+ * These two are wrapper routines around the xfs_ilock() routine used to
+ * centralize some grungy code. They are used in places that wish to lock the
+ * inode solely for reading the extents. The reason these places can't just
+ * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
+ * bringing in of the extents from disk for a file in b-tree format. If the
+ * inode is in b-tree format, then we need to lock the inode exclusively until
+ * the extents are read in. Locking it exclusively all the time would limit
+ * our parallelism unnecessarily, though. What we do instead is check to see
+ * if the extents have been read in yet, and only lock the inode exclusively
+ * if they have not.
*
- * The function returns a value which should be given to the corresponding
- * xfs_iunlock_map_shared(). This value is the mode in which the lock was
- * actually taken.
+ * The functions return a value which should be given to the corresponding
+ * xfs_iunlock() call.
*/
uint
-xfs_ilock_map_shared(
- xfs_inode_t *ip)
+xfs_ilock_data_map_shared(
+ struct xfs_inode *ip)
{
- uint lock_mode;
+ uint lock_mode = XFS_ILOCK_SHARED;
- if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
- ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ (ip->i_df.if_flags & XFS_IFEXTENTS) == 0)
lock_mode = XFS_ILOCK_EXCL;
- } else {
- lock_mode = XFS_ILOCK_SHARED;
- }
-
xfs_ilock(ip, lock_mode);
-
return lock_mode;
}
-/*
- * This is simply the unlock routine to go with xfs_ilock_map_shared().
- * All it does is call xfs_iunlock() with the given lock_mode.
- */
-void
-xfs_iunlock_map_shared(
- xfs_inode_t *ip,
- unsigned int lock_mode)
+uint
+xfs_ilock_attr_map_shared(
+ struct xfs_inode *ip)
{
- xfs_iunlock(ip, lock_mode);
+ uint lock_mode = XFS_ILOCK_SHARED;
+
+ if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE &&
+ (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0)
+ lock_mode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lock_mode);
+ return lock_mode;
}
/*
@@ -586,11 +583,11 @@ xfs_lookup(
trace_xfs_lookup(dp, name);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return XFS_ERROR(EIO);
+ return -EIO;
- lock_mode = xfs_ilock_map_shared(dp);
+ lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
- xfs_iunlock_map_shared(dp, lock_mode);
+ xfs_iunlock(dp, lock_mode);
if (error)
goto out;
@@ -658,7 +655,6 @@ xfs_ialloc(
uint flags;
int error;
timespec_t tv;
- int filestreams = 0;
/*
* Call the space management code to pick
@@ -685,6 +681,14 @@ xfs_ialloc(
return error;
ASSERT(ip != NULL);
+ /*
+ * We always convert v1 inodes to v2 now - we only support filesystems
+ * with >= v2 inode capability, so there is no reason for ever leaving
+ * an inode in v1 format.
+ */
+ if (ip->i_d.di_version == 1)
+ ip->i_d.di_version = 2;
+
ip->i_d.di_mode = mode;
ip->i_d.di_onlink = 0;
ip->i_d.di_nlink = nlink;
@@ -694,27 +698,6 @@ xfs_ialloc(
xfs_set_projid(ip, prid);
memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
- /*
- * If the superblock version is up to where we support new format
- * inodes and this is currently an old format inode, then change
- * the inode version number now. This way we only do the conversion
- * here rather than here and in the flush/logging code.
- */
- if (xfs_sb_version_hasnlink(&mp->m_sb) &&
- ip->i_d.di_version == 1) {
- ip->i_d.di_version = 2;
- /*
- * We've already zeroed the old link count, the projid field,
- * and the pad field.
- */
- }
-
- /*
- * Project ids won't be stored on disk if we are using a version 1 inode.
- */
- if ((prid != 0) && (ip->i_d.di_version == 1))
- xfs_bump_ino_vers2(tp, ip);
-
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
@@ -775,13 +758,6 @@ xfs_ialloc(
flags |= XFS_ILOG_DEV;
break;
case S_IFREG:
- /*
- * we can't set up filestreams until after the VFS inode
- * is set up properly.
- */
- if (pip && xfs_inode_is_filestream(pip))
- filestreams = 1;
- /* fall through */
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
uint di_flags = 0;
@@ -847,15 +823,6 @@ xfs_ialloc(
/* now that we have an i_mode we can setup inode ops and unlock */
xfs_setup_inode(ip);
- /* now we have set up the vfs inode we can associate the filestream */
- if (filestreams) {
- error = xfs_filestream_associate(pip, ip);
- if (error < 0)
- return -error;
- if (!error)
- xfs_iflags_set(ip, XFS_IFILESTREAM);
- }
-
*ipp = ip;
return 0;
}
@@ -926,7 +893,7 @@ xfs_dir_ialloc(
}
if (!ialloc_context && !ip) {
*ipp = NULL;
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
@@ -1076,40 +1043,6 @@ xfs_droplink(
}
/*
- * This gets called when the inode's version needs to be changed from 1 to 2.
- * Currently this happens when the nlink field overflows the old 16-bit value
- * or when chproj is called to change the project for the first time.
- * As a side effect the superblock version will also get rev'd
- * to contain the NLINK bit.
- */
-void
-xfs_bump_ino_vers2(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
-{
- xfs_mount_t *mp;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_d.di_version == 1);
-
- ip->i_d.di_version = 2;
- ip->i_d.di_onlink = 0;
- memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
- mp = tp->t_mountp;
- if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
- spin_lock(&mp->m_sb_lock);
- if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
- xfs_sb_version_addnlink(&mp->m_sb);
- spin_unlock(&mp->m_sb_lock);
- xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
- } else {
- spin_unlock(&mp->m_sb_lock);
- }
- }
- /* Caller must log the inode */
-}
-
-/*
* Increment the link count on an inode & log the change.
*/
int
@@ -1119,22 +1052,10 @@ xfs_bumplink(
{
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- ASSERT(ip->i_d.di_nlink > 0);
+ ASSERT(ip->i_d.di_version > 1);
+ ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
ip->i_d.di_nlink++;
inc_nlink(VFS_I(ip));
- if ((ip->i_d.di_version == 1) &&
- (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
- /*
- * The inode has increased its number of links beyond
- * what can fit in an old format inode. It now needs
- * to be converted to a version 2 inode with a 32 bit
- * link count. If this is the first inode in the file
- * system to do this, then we need to bump the superblock
- * version number as well.
- */
- xfs_bump_ino_vers2(tp, ip);
- }
-
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
return 0;
}
@@ -1167,12 +1088,9 @@ xfs_create(
trace_xfs_create(dp, name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
- if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- prid = xfs_get_projid(dp);
- else
- prid = XFS_PROJID_DEFAULT;
+ prid = xfs_get_initial_prid(dp);
/*
* Make sure that we have allocated dquot(s) on disk.
@@ -1207,12 +1125,12 @@ xfs_create(
*/
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(tp, &tres, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/* flush outstanding delalloc blocks and retry */
xfs_flush_inodes(mp);
error = xfs_trans_reserve(tp, &tres, resblks, 0);
}
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
error = xfs_trans_reserve(tp, &tres, 0, 0);
@@ -1247,7 +1165,7 @@ xfs_create(
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
prid, resblks > 0, &ip, &committed);
if (error) {
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto out_trans_cancel;
goto out_trans_abort;
}
@@ -1266,7 +1184,7 @@ xfs_create(
&first_block, &free_list, resblks ?
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
goto out_trans_abort;
}
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1337,6 +1255,114 @@ xfs_create(
}
int
+xfs_create_tmpfile(
+ struct xfs_inode *dp,
+ struct dentry *dentry,
+ umode_t mode,
+ struct xfs_inode **ipp)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_inode *ip = NULL;
+ struct xfs_trans *tp = NULL;
+ int error;
+ uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+ prid_t prid;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ struct xfs_trans_res *tres;
+ uint resblks;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ prid = xfs_get_initial_prid(dp);
+
+ /*
+ * Make sure that we have allocated dquot(s) on disk.
+ */
+ error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+ xfs_kgid_to_gid(current_fsgid()), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
+ if (error)
+ return error;
+
+ resblks = XFS_IALLOC_SPACE_RES(mp);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
+
+ tres = &M_RES(mp)->tr_create_tmpfile;
+ error = xfs_trans_reserve(tp, tres, resblks, 0);
+ if (error == -ENOSPC) {
+ /* No space at all so try a "no-allocation" reservation */
+ resblks = 0;
+ error = xfs_trans_reserve(tp, tres, 0, 0);
+ }
+ if (error) {
+ cancel_flags = 0;
+ goto out_trans_cancel;
+ }
+
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+ pdqp, resblks, 1, 0);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+ prid, resblks > 0, &ip, NULL);
+ if (error) {
+ if (error == -ENOSPC)
+ goto out_trans_cancel;
+ goto out_trans_abort;
+ }
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+
+ /*
+ * Attach the dquot(s) to the inodes and modify them incore.
+ * These ids of the inode couldn't have changed since the new
+ * inode has been locked ever since it was created.
+ */
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+
+ ip->i_d.di_nlink--;
+ error = xfs_iunlink(tp, ip);
+ if (error)
+ goto out_trans_abort;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto out_release_inode;
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ *ipp = ip;
+ return 0;
+
+ out_trans_abort:
+ cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+ xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+ /*
+ * Wait until after the current transaction is aborted to
+ * release the inode. This prevents recursive transactions
+ * and deadlocks from xfs_inactive.
+ */
+ if (ip)
+ IRELE(ip);
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return error;
+}
+
+int
xfs_link(
xfs_inode_t *tdp,
xfs_inode_t *sip,
@@ -1356,7 +1382,7 @@ xfs_link(
ASSERT(!S_ISDIR(sip->i_d.di_mode));
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(sip, 0);
if (error)
@@ -1370,7 +1396,7 @@ xfs_link(
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
}
@@ -1391,7 +1417,7 @@ xfs_link(
*/
if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
(xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
- error = XFS_ERROR(EXDEV);
+ error = -EXDEV;
goto error_return;
}
@@ -1401,6 +1427,12 @@ xfs_link(
xfs_bmap_init(&free_list, &first_block);
+ if (sip->i_d.di_nlink == 0) {
+ error = xfs_iunlink_remove(tp, sip);
+ if (error)
+ goto abort_return;
+ }
+
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
&first_block, &free_list, resblks);
if (error)
@@ -1591,16 +1623,6 @@ xfs_release(
int truncated;
/*
- * If we are using filestreams, and we have an unlinked
- * file that we are processing the last close on, then nothing
- * will be able to reopen and write to this file. Purge this
- * inode from the filestreams cache so that it doesn't delay
- * teardown of the inode.
- */
- if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
- xfs_filestream_deassociate(ip);
-
- /*
* If we previously truncated this file and removed old data
* in the process, we want to initiate "early" writeout on
* the last close. This is an attempt to combat the notorious
@@ -1613,8 +1635,8 @@ xfs_release(
truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
if (truncated) {
xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
- if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
- error = -filemap_flush(VFS_I(ip)->i_mapping);
+ if (ip->i_delayed_blks > 0) {
+ error = filemap_flush(VFS_I(ip)->i_mapping);
if (error)
return error;
}
@@ -1651,7 +1673,7 @@ xfs_release(
return 0;
error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != EAGAIN)
+ if (error && error != -EAGAIN)
return error;
/* delalloc blocks after truncation means it really is dirty */
@@ -1730,9 +1752,33 @@ xfs_inactive_ifree(
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
+
+ /*
+ * The ifree transaction might need to allocate blocks for record
+ * insertion to the finobt. We don't want to fail here at ENOSPC, so
+ * allow ifree to dip into the reserved block pool if necessary.
+ *
+ * Freeing large sets of inodes generally means freeing inode chunks,
+ * directory and file data blocks, so this should be relatively safe.
+ * Only under severe circumstances should it be possible to free enough
+ * inodes to exhaust the reserve block pool via finobt expansion while
+ * at the same time not creating free space in the filesystem.
+ *
+ * Send a warning if the reservation does happen to fail, as the inode
+ * now remains allocated and sits on the unlinked list until the fs is
+ * repaired.
+ */
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
+ XFS_IFREE_SPACE_RES(mp), 0);
if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ if (error == -ENOSPC) {
+ xfs_warn_ratelimited(mp,
+ "Failed to remove inode(s) from unlinked list. "
+ "Please free space, unmount and run xfs_repair.");
+ } else {
+ ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ }
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
return error;
}
@@ -2141,8 +2187,8 @@ xfs_ifree_cluster(
{
xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster;
+ int inodes_per_cluster;
int nbufs;
- int ninodes;
int i, j;
xfs_daddr_t blkno;
xfs_buf_t *bp;
@@ -2152,18 +2198,11 @@ xfs_ifree_cluster(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
- if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
- blks_per_cluster = 1;
- ninodes = mp->m_sb.sb_inopblock;
- nbufs = XFS_IALLOC_BLOCKS(mp);
- } else {
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
- mp->m_sb.sb_blocksize;
- ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
- nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
- }
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
+ nbufs = mp->m_ialloc_blks / blks_per_cluster;
- for (j = 0; j < nbufs; j++, inum += ninodes) {
+ for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
@@ -2180,7 +2219,7 @@ xfs_ifree_cluster(
XBF_UNMAPPED);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
/*
* This buffer may not have been correctly initialised as we
@@ -2225,7 +2264,7 @@ xfs_ifree_cluster(
* transaction stale above, which means there is no point in
* even trying to lock them.
*/
- for (i = 0; i < ninodes; i++) {
+ for (i = 0; i < inodes_per_cluster; i++) {
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -2452,7 +2491,7 @@ xfs_remove(
trace_xfs_remove(dp, name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(dp, 0);
if (error)
@@ -2482,12 +2521,12 @@ xfs_remove(
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
}
if (error) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
cancel_flags = 0;
goto out_trans_cancel;
}
@@ -2504,11 +2543,11 @@ xfs_remove(
if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) {
- error = XFS_ERROR(ENOTEMPTY);
+ error = -ENOTEMPTY;
goto out_trans_cancel;
}
if (!xfs_dir_isempty(ip)) {
- error = XFS_ERROR(ENOTEMPTY);
+ error = -ENOTEMPTY;
goto out_trans_cancel;
}
@@ -2543,7 +2582,7 @@ xfs_remove(
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
&first_block, &free_list, resblks);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto out_bmap_cancel;
}
@@ -2563,13 +2602,7 @@ xfs_remove(
if (error)
goto std_return;
- /*
- * If we are using filestreams, kill the stream association.
- * If the file is still open it may get a new one but that
- * will get killed on last close in xfs_close() so we don't
- * have to worry about that.
- */
- if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
+ if (is_dir && xfs_inode_is_filestream(ip))
xfs_filestream_deassociate(ip);
return 0;
@@ -2669,7 +2702,7 @@ xfs_rename(
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
spaceres = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
}
@@ -2714,7 +2747,7 @@ xfs_rename(
*/
if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
(xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
- error = XFS_ERROR(EXDEV);
+ error = -EXDEV;
goto error_return;
}
@@ -2737,7 +2770,7 @@ xfs_rename(
error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block,
&free_list, spaceres);
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto error_return;
if (error)
goto abort_return;
@@ -2762,7 +2795,7 @@ xfs_rename(
*/
if (!(xfs_dir_isempty(target_ip)) ||
(target_ip->i_d.di_nlink > 2)) {
- error = XFS_ERROR(EEXIST);
+ error = -EEXIST;
goto error_return;
}
}
@@ -2814,7 +2847,7 @@ xfs_rename(
error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
target_dp->i_ino,
&first_block, &free_list, spaceres);
- ASSERT(error != EEXIST);
+ ASSERT(error != -EEXIST);
if (error)
goto abort_return;
}
@@ -2906,13 +2939,13 @@ xfs_iflush_cluster(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
+ inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
if (!ilist)
goto out_put;
- mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+ mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
rcu_read_lock();
/* really need a gang lookup range call here */
@@ -3022,7 +3055,7 @@ cluster_corrupt_out:
if (bp->b_iodone) {
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
xfs_buf_ioend(bp, 0);
} else {
xfs_buf_stale(bp);
@@ -3036,7 +3069,7 @@ cluster_corrupt_out:
xfs_iflush_abort(iq, false);
kmem_free(ilist);
xfs_perag_put(pag);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
@@ -3091,7 +3124,7 @@ xfs_iflush(
* as we wait for an empty AIL as part of the unmount process.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto abort_out;
}
@@ -3134,7 +3167,7 @@ corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
abort_out:
/*
* Unlocks the flush lock
@@ -3157,6 +3190,7 @@ xfs_iflush_int(
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip != NULL && iip->ili_fields != 0);
+ ASSERT(ip->i_d.di_version > 1);
/* set *dip = inode's place in the buffer */
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -3217,7 +3251,7 @@ xfs_iflush_int(
}
/*
- * Inode item log recovery for v1/v2 inodes are dependent on the
+ * Inode item log recovery for v2 inodes are dependent on the
* di_flushiter count for correct sequencing. We bump the flush
* iteration count so we can detect flushes which postdate a log record
* during recovery. This is redundant as we now log every change and
@@ -3240,40 +3274,9 @@ xfs_iflush_int(
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
ip->i_d.di_flushiter = 0;
- /*
- * If this is really an old format inode and the superblock version
- * has not been updated to support only new format inodes, then
- * convert back to the old inode format. If the superblock version
- * has been updated, then make the conversion permanent.
- */
- ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
- if (ip->i_d.di_version == 1) {
- if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
- /*
- * Convert it back.
- */
- ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
- dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
- } else {
- /*
- * The superblock version has already been bumped,
- * so just make the conversion to the new inode
- * format permanent.
- */
- ip->i_d.di_version = 2;
- dip->di_version = 2;
- ip->i_d.di_onlink = 0;
- dip->di_onlink = 0;
- memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
- memset(&(dip->di_pad[0]), 0,
- sizeof(dip->di_pad));
- ASSERT(xfs_get_projid(ip) == 0);
- }
- }
-
- xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
+ xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
if (XFS_IFORK_Q(ip))
- xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
+ xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
xfs_inobp_check(mp, bp);
/*
@@ -3328,5 +3331,5 @@ xfs_iflush_int(
return 0;
corrupt_out:
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9e6efccbae04..c10e3fadd9af 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
#include "xfs_inode_buf.h"
#include "xfs_inode_fork.h"
+#include "xfs_dinode.h"
/*
* Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
}
+static inline prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+ if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+ return xfs_get_projid(dp);
+
+ return XFS_PROJID_DEFAULT;
+}
+
/*
* In-core inode flags.
*/
@@ -199,7 +209,6 @@ xfs_set_projid(struct xfs_inode *ip,
#define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
#define XFS_INEW (1 << 3) /* inode has just been allocated */
-#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -215,8 +224,7 @@ xfs_set_projid(struct xfs_inode *ip,
*/
#define XFS_IRECLAIM_RESET_FLAGS \
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
- XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
- XFS_IFILESTREAM);
+ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
@@ -323,6 +331,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
+ umode_t mode, struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -337,8 +347,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
int xfs_isilocked(xfs_inode_t *, uint);
-uint xfs_ilock_map_shared(xfs_inode_t *);
-void xfs_iunlock_map_shared(xfs_inode_t *, uint);
+uint xfs_ilock_data_map_shared(struct xfs_inode *);
+uint xfs_ilock_attr_map_shared(struct xfs_inode *);
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
xfs_nlink_t, xfs_dev_t, prid_t, int,
struct xfs_buf **, xfs_inode_t **);
@@ -367,7 +377,6 @@ int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
struct xfs_inode **, int *);
int xfs_droplink(struct xfs_trans *, struct xfs_inode *);
int xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
-void xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *);
/* from xfs_file.c */
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
@@ -389,4 +398,14 @@ do { \
extern struct kmem_zone *xfs_inode_zone;
+/*
+ * Flags for read/write calls
+ */
+#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
+#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+ { XFS_IO_ISDIRECT, "DIRECT" }, \
+ { XFS_IO_INVIS, "INVIS"}
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7c0d391f9a6e..de5a7be36e60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -30,6 +30,7 @@
#include "xfs_trace.h"
#include "xfs_trans_priv.h"
#include "xfs_dinode.h"
+#include "xfs_log.h"
kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -39,27 +40,14 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_inode_log_item, ili_item);
}
-
-/*
- * This returns the number of iovecs needed to log the given inode item.
- *
- * We need one iovec for the inode log format structure, one for the
- * inode core, and possibly one for the inode data/extents/b-tree root
- * and one for the inode attribute data/extents/b-tree root.
- */
STATIC void
-xfs_inode_item_size(
- struct xfs_log_item *lip,
+xfs_inode_item_data_fork_size(
+ struct xfs_inode_log_item *iip,
int *nvecs,
int *nbytes)
{
- struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- *nvecs += 2;
- *nbytes += sizeof(struct xfs_inode_log_format) +
- xfs_icdinode_size(ip->i_d.di_version);
-
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
if ((iip->ili_fields & XFS_ILOG_DEXT) &&
@@ -70,7 +58,6 @@ xfs_inode_item_size(
*nvecs += 1;
}
break;
-
case XFS_DINODE_FMT_BTREE:
if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
ip->i_df.if_broot_bytes > 0) {
@@ -78,7 +65,6 @@ xfs_inode_item_size(
*nvecs += 1;
}
break;
-
case XFS_DINODE_FMT_LOCAL:
if ((iip->ili_fields & XFS_ILOG_DDATA) &&
ip->i_df.if_bytes > 0) {
@@ -90,19 +76,20 @@ xfs_inode_item_size(
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_UUID:
break;
-
default:
ASSERT(0);
break;
}
+}
- if (!XFS_IFORK_Q(ip))
- return;
-
+STATIC void
+xfs_inode_item_attr_fork_size(
+ struct xfs_inode_log_item *iip,
+ int *nvecs,
+ int *nbytes)
+{
+ struct xfs_inode *ip = iip->ili_inode;
- /*
- * Log any necessary attribute data.
- */
switch (ip->i_d.di_aformat) {
case XFS_DINODE_FMT_EXTENTS:
if ((iip->ili_fields & XFS_ILOG_AEXT) &&
@@ -113,7 +100,6 @@ xfs_inode_item_size(
*nvecs += 1;
}
break;
-
case XFS_DINODE_FMT_BTREE:
if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
ip->i_afp->if_broot_bytes > 0) {
@@ -121,7 +107,6 @@ xfs_inode_item_size(
*nvecs += 1;
}
break;
-
case XFS_DINODE_FMT_LOCAL:
if ((iip->ili_fields & XFS_ILOG_ADATA) &&
ip->i_afp->if_bytes > 0) {
@@ -129,7 +114,6 @@ xfs_inode_item_size(
*nvecs += 1;
}
break;
-
default:
ASSERT(0);
break;
@@ -137,98 +121,39 @@ xfs_inode_item_size(
}
/*
- * xfs_inode_item_format_extents - convert in-core extents to on-disk form
- *
- * For either the data or attr fork in extent format, we need to endian convert
- * the in-core extent as we place them into the on-disk inode. In this case, we
- * need to do this conversion before we write the extents into the log. Because
- * we don't have the disk inode to write into here, we allocate a buffer and
- * format the extents into it via xfs_iextents_copy(). We free the buffer in
- * the unlock routine after the copy for the log has been made.
+ * This returns the number of iovecs needed to log the given inode item.
*
- * In the case of the data fork, the in-core and on-disk fork sizes can be
- * different due to delayed allocation extents. We only log on-disk extents
- * here, so always use the physical fork size to determine the size of the
- * buffer we need to allocate.
+ * We need one iovec for the inode log format structure, one for the
+ * inode core, and possibly one for the inode data/extents/b-tree root
+ * and one for the inode attribute data/extents/b-tree root.
*/
STATIC void
-xfs_inode_item_format_extents(
- struct xfs_inode *ip,
- struct xfs_log_iovec *vecp,
- int whichfork,
- int type)
+xfs_inode_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
{
- xfs_bmbt_rec_t *ext_buffer;
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
- ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
- if (whichfork == XFS_DATA_FORK)
- ip->i_itemp->ili_extents_buf = ext_buffer;
- else
- ip->i_itemp->ili_aextents_buf = ext_buffer;
+ *nvecs += 2;
+ *nbytes += sizeof(struct xfs_inode_log_format) +
+ xfs_icdinode_size(ip->i_d.di_version);
- vecp->i_addr = ext_buffer;
- vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
- vecp->i_type = type;
+ xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
+ if (XFS_IFORK_Q(ip))
+ xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
}
-/*
- * This is called to fill in the vector of log iovecs for the
- * given inode log item. It fills the first item with an inode
- * log format structure, the second with the on-disk inode structure,
- * and a possible third and/or fourth with the inode data/extents/b-tree
- * root and inode attributes data/extents/b-tree root.
- */
STATIC void
-xfs_inode_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+xfs_inode_item_format_data_fork(
+ struct xfs_inode_log_item *iip,
+ struct xfs_inode_log_format *ilf,
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp)
{
- struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- uint nvecs;
size_t data_bytes;
- xfs_mount_t *mp;
-
- vecp->i_addr = &iip->ili_format;
- vecp->i_len = sizeof(xfs_inode_log_format_t);
- vecp->i_type = XLOG_REG_TYPE_IFORMAT;
- vecp++;
- nvecs = 1;
-
- vecp->i_addr = &ip->i_d;
- vecp->i_len = xfs_icdinode_size(ip->i_d.di_version);
- vecp->i_type = XLOG_REG_TYPE_ICORE;
- vecp++;
- nvecs++;
-
- /*
- * If this is really an old format inode, then we need to
- * log it as such. This means that we have to copy the link
- * count from the new field to the old. We don't have to worry
- * about the new fields, because nothing trusts them as long as
- * the old inode version number is there. If the superblock already
- * has a new version number, then we don't bother converting back.
- */
- mp = ip->i_mount;
- ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
- if (ip->i_d.di_version == 1) {
- if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
- /*
- * Convert it back.
- */
- ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
- ip->i_d.di_onlink = ip->i_d.di_nlink;
- } else {
- /*
- * The superblock version has already been bumped,
- * so just make the conversion to the new inode
- * format permanent.
- */
- ip->i_d.di_version = 2;
- ip->i_d.di_onlink = 0;
- memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
- }
- }
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
@@ -239,36 +164,23 @@ xfs_inode_item_format(
if ((iip->ili_fields & XFS_ILOG_DEXT) &&
ip->i_d.di_nextents > 0 &&
ip->i_df.if_bytes > 0) {
+ struct xfs_bmbt_rec *p;
+
ASSERT(ip->i_df.if_u1.if_extents != NULL);
ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
- ASSERT(iip->ili_extents_buf == NULL);
-
-#ifdef XFS_NATIVE_HOST
- if (ip->i_d.di_nextents == ip->i_df.if_bytes /
- (uint)sizeof(xfs_bmbt_rec_t)) {
- /*
- * There are no delayed allocation
- * extents, so just point to the
- * real extents array.
- */
- vecp->i_addr = ip->i_df.if_u1.if_extents;
- vecp->i_len = ip->i_df.if_bytes;
- vecp->i_type = XLOG_REG_TYPE_IEXT;
- } else
-#endif
- {
- xfs_inode_item_format_extents(ip, vecp,
- XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
- }
- ASSERT(vecp->i_len <= ip->i_df.if_bytes);
- iip->ili_format.ilf_dsize = vecp->i_len;
- vecp++;
- nvecs++;
+
+ p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
+ data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
+ xlog_finish_iovec(lv, *vecp, data_bytes);
+
+ ASSERT(data_bytes <= ip->i_df.if_bytes);
+
+ ilf->ilf_dsize = data_bytes;
+ ilf->ilf_size++;
} else {
iip->ili_fields &= ~XFS_ILOG_DEXT;
}
break;
-
case XFS_DINODE_FMT_BTREE:
iip->ili_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
@@ -277,80 +189,70 @@ xfs_inode_item_format(
if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
ip->i_df.if_broot_bytes > 0) {
ASSERT(ip->i_df.if_broot != NULL);
- vecp->i_addr = ip->i_df.if_broot;
- vecp->i_len = ip->i_df.if_broot_bytes;
- vecp->i_type = XLOG_REG_TYPE_IBROOT;
- vecp++;
- nvecs++;
- iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
+ ip->i_df.if_broot,
+ ip->i_df.if_broot_bytes);
+ ilf->ilf_dsize = ip->i_df.if_broot_bytes;
+ ilf->ilf_size++;
} else {
ASSERT(!(iip->ili_fields &
XFS_ILOG_DBROOT));
iip->ili_fields &= ~XFS_ILOG_DBROOT;
}
break;
-
case XFS_DINODE_FMT_LOCAL:
iip->ili_fields &=
~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
XFS_ILOG_DEV | XFS_ILOG_UUID);
if ((iip->ili_fields & XFS_ILOG_DDATA) &&
ip->i_df.if_bytes > 0) {
- ASSERT(ip->i_df.if_u1.if_data != NULL);
- ASSERT(ip->i_d.di_size > 0);
-
- vecp->i_addr = ip->i_df.if_u1.if_data;
/*
* Round i_bytes up to a word boundary.
* The underlying memory is guaranteed to
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_df.if_bytes, 4);
- ASSERT((ip->i_df.if_real_bytes == 0) ||
- (ip->i_df.if_real_bytes == data_bytes));
- vecp->i_len = (int)data_bytes;
- vecp->i_type = XLOG_REG_TYPE_ILOCAL;
- vecp++;
- nvecs++;
- iip->ili_format.ilf_dsize = (unsigned)data_bytes;
+ ASSERT(ip->i_df.if_real_bytes == 0 ||
+ ip->i_df.if_real_bytes == data_bytes);
+ ASSERT(ip->i_df.if_u1.if_data != NULL);
+ ASSERT(ip->i_d.di_size > 0);
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
+ ip->i_df.if_u1.if_data, data_bytes);
+ ilf->ilf_dsize = (unsigned)data_bytes;
+ ilf->ilf_size++;
} else {
iip->ili_fields &= ~XFS_ILOG_DDATA;
}
break;
-
case XFS_DINODE_FMT_DEV:
iip->ili_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
XFS_ILOG_DEXT | XFS_ILOG_UUID);
- if (iip->ili_fields & XFS_ILOG_DEV) {
- iip->ili_format.ilf_u.ilfu_rdev =
- ip->i_df.if_u2.if_rdev;
- }
+ if (iip->ili_fields & XFS_ILOG_DEV)
+ ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev;
break;
-
case XFS_DINODE_FMT_UUID:
iip->ili_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
XFS_ILOG_DEXT | XFS_ILOG_DEV);
- if (iip->ili_fields & XFS_ILOG_UUID) {
- iip->ili_format.ilf_u.ilfu_uuid =
- ip->i_df.if_u2.if_uuid;
- }
+ if (iip->ili_fields & XFS_ILOG_UUID)
+ ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;
break;
-
default:
ASSERT(0);
break;
}
+}
- /*
- * If there are no attributes associated with the file, then we're done.
- */
- if (!XFS_IFORK_Q(ip)) {
- iip->ili_fields &=
- ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
- goto out;
- }
+STATIC void
+xfs_inode_item_format_attr_fork(
+ struct xfs_inode_log_item *iip,
+ struct xfs_inode_log_format *ilf,
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp)
+{
+ struct xfs_inode *ip = iip->ili_inode;
+ size_t data_bytes;
switch (ip->i_d.di_aformat) {
case XFS_DINODE_FMT_EXTENTS:
@@ -360,30 +262,22 @@ xfs_inode_item_format(
if ((iip->ili_fields & XFS_ILOG_AEXT) &&
ip->i_d.di_anextents > 0 &&
ip->i_afp->if_bytes > 0) {
+ struct xfs_bmbt_rec *p;
+
ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
ip->i_d.di_anextents);
ASSERT(ip->i_afp->if_u1.if_extents != NULL);
-#ifdef XFS_NATIVE_HOST
- /*
- * There are not delayed allocation extents
- * for attributes, so just point at the array.
- */
- vecp->i_addr = ip->i_afp->if_u1.if_extents;
- vecp->i_len = ip->i_afp->if_bytes;
- vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
-#else
- ASSERT(iip->ili_aextents_buf == NULL);
- xfs_inode_item_format_extents(ip, vecp,
- XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
-#endif
- iip->ili_format.ilf_asize = vecp->i_len;
- vecp++;
- nvecs++;
+
+ p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
+ data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
+ xlog_finish_iovec(lv, *vecp, data_bytes);
+
+ ilf->ilf_asize = data_bytes;
+ ilf->ilf_size++;
} else {
iip->ili_fields &= ~XFS_ILOG_AEXT;
}
break;
-
case XFS_DINODE_FMT_BTREE:
iip->ili_fields &=
~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
@@ -392,61 +286,89 @@ xfs_inode_item_format(
ip->i_afp->if_broot_bytes > 0) {
ASSERT(ip->i_afp->if_broot != NULL);
- vecp->i_addr = ip->i_afp->if_broot;
- vecp->i_len = ip->i_afp->if_broot_bytes;
- vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
- vecp++;
- nvecs++;
- iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
+ ip->i_afp->if_broot,
+ ip->i_afp->if_broot_bytes);
+ ilf->ilf_asize = ip->i_afp->if_broot_bytes;
+ ilf->ilf_size++;
} else {
iip->ili_fields &= ~XFS_ILOG_ABROOT;
}
break;
-
case XFS_DINODE_FMT_LOCAL:
iip->ili_fields &=
~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
if ((iip->ili_fields & XFS_ILOG_ADATA) &&
ip->i_afp->if_bytes > 0) {
- ASSERT(ip->i_afp->if_u1.if_data != NULL);
-
- vecp->i_addr = ip->i_afp->if_u1.if_data;
/*
* Round i_bytes up to a word boundary.
* The underlying memory is guaranteed to
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_afp->if_bytes, 4);
- ASSERT((ip->i_afp->if_real_bytes == 0) ||
- (ip->i_afp->if_real_bytes == data_bytes));
- vecp->i_len = (int)data_bytes;
- vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
- vecp++;
- nvecs++;
- iip->ili_format.ilf_asize = (unsigned)data_bytes;
+ ASSERT(ip->i_afp->if_real_bytes == 0 ||
+ ip->i_afp->if_real_bytes == data_bytes);
+ ASSERT(ip->i_afp->if_u1.if_data != NULL);
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
+ ip->i_afp->if_u1.if_data,
+ data_bytes);
+ ilf->ilf_asize = (unsigned)data_bytes;
+ ilf->ilf_size++;
} else {
iip->ili_fields &= ~XFS_ILOG_ADATA;
}
break;
-
default:
ASSERT(0);
break;
}
-
-out:
- /*
- * Now update the log format that goes out to disk from the in-core
- * values. We always write the inode core to make the arithmetic
- * games in recovery easier, which isn't a big deal as just about any
- * transaction would dirty it anyway.
- */
- iip->ili_format.ilf_fields = XFS_ILOG_CORE |
- (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
- iip->ili_format.ilf_size = nvecs;
}
+/*
+ * This is called to fill in the vector of log iovecs for the given inode
+ * log item. It fills the first item with an inode log format structure,
+ * the second with the on-disk inode structure, and a possible third and/or
+ * fourth with the inode data/extents/b-tree root and inode attributes
+ * data/extents/b-tree root.
+ */
+STATIC void
+xfs_inode_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+ struct xfs_inode_log_format *ilf;
+ struct xfs_log_iovec *vecp = NULL;
+
+ ASSERT(ip->i_d.di_version > 1);
+
+ ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
+ ilf->ilf_type = XFS_LI_INODE;
+ ilf->ilf_ino = ip->i_ino;
+ ilf->ilf_blkno = ip->i_imap.im_blkno;
+ ilf->ilf_len = ip->i_imap.im_len;
+ ilf->ilf_boffset = ip->i_imap.im_boffset;
+ ilf->ilf_fields = XFS_ILOG_CORE;
+ ilf->ilf_size = 2; /* format + core */
+ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
+ &ip->i_d,
+ xfs_icdinode_size(ip->i_d.di_version));
+
+ xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
+ if (XFS_IFORK_Q(ip)) {
+ xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
+ } else {
+ iip->ili_fields &=
+ ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
+ }
+
+ /* update the format with the exact fields we actually logged */
+ ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
+}
/*
* This is called to pin the inode associated with the inode log
@@ -563,27 +485,6 @@ xfs_inode_item_unlock(
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- /*
- * If the inode needed a separate buffer with which to log
- * its extents, then free it now.
- */
- if (iip->ili_extents_buf != NULL) {
- ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
- ASSERT(ip->i_d.di_nextents > 0);
- ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
- ASSERT(ip->i_df.if_bytes > 0);
- kmem_free(iip->ili_extents_buf);
- iip->ili_extents_buf = NULL;
- }
- if (iip->ili_aextents_buf != NULL) {
- ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
- ASSERT(ip->i_d.di_anextents > 0);
- ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
- ASSERT(ip->i_afp->if_bytes > 0);
- kmem_free(iip->ili_aextents_buf);
- iip->ili_aextents_buf = NULL;
- }
-
lock_flags = iip->ili_lock_flags;
iip->ili_lock_flags = 0;
if (lock_flags)
@@ -670,11 +571,6 @@ xfs_inode_item_init(
iip->ili_inode = ip;
xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
&xfs_inode_item_ops);
- iip->ili_format.ilf_type = XFS_LI_INODE;
- iip->ili_format.ilf_ino = ip->i_ino;
- iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
- iip->ili_format.ilf_len = ip->i_imap.im_len;
- iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
}
/*
@@ -892,5 +788,5 @@ xfs_inode_item_format_convert(
in_f->ilf_boffset = in_f64->ilf_boffset;
return 0;
}
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index dce4d656768c..488d81254e28 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -34,11 +34,6 @@ typedef struct xfs_inode_log_item {
unsigned short ili_logged; /* flushed logged data */
unsigned int ili_last_fields; /* fields when flushed */
unsigned int ili_fields; /* fields to be logged */
- struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
- data exts */
- struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
- attr exts */
- xfs_inode_log_format_t ili_format; /* logged structure */
} xfs_inode_log_item_t;
static inline int xfs_inode_clean(xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 33ad9a77791f..3799695b9249 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -112,15 +112,11 @@ xfs_find_handle(
memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
hsize = sizeof(xfs_fsid_t);
} else {
- int lock_mode;
-
- lock_mode = xfs_ilock_map_shared(ip);
handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
sizeof(handle.ha_fid.fid_len);
handle.ha_fid.fid_pad = 0;
handle.ha_fid.fid_gen = ip->i_d.di_gen;
handle.ha_fid.fid_ino = ip->i_ino;
- xfs_iunlock_map_shared(ip, lock_mode);
hsize = XFS_HSIZE(handle);
}
@@ -211,7 +207,7 @@ xfs_open_by_handle(
struct path path;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
@@ -220,7 +216,7 @@ xfs_open_by_handle(
/* Restrict xfs_open_by_handle to directories & regular files. */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out_dput;
}
@@ -232,18 +228,18 @@ xfs_open_by_handle(
fmode = OPEN_FMODE(permflag);
if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
(fmode & FMODE_WRITE) && IS_APPEND(inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out_dput;
}
if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
- error = -XFS_ERROR(EACCES);
+ error = -EACCES;
goto out_dput;
}
/* Can't write directories. */
if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
- error = -XFS_ERROR(EISDIR);
+ error = -EISDIR;
goto out_dput;
}
@@ -275,32 +271,6 @@ xfs_open_by_handle(
return error;
}
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
- char __user *buffer,
- int buflen,
- const char *link)
-{
- int len;
-
- len = PTR_ERR(link);
- if (IS_ERR(link))
- goto out;
-
- len = strlen(link);
- if (len > (unsigned) buflen)
- len = buflen;
- if (copy_to_user(buffer, link, len))
- len = -EFAULT;
- out:
- return len;
-}
-
-
int
xfs_readlink_by_handle(
struct file *parfilp,
@@ -312,7 +282,7 @@ xfs_readlink_by_handle(
int error;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
@@ -320,25 +290,25 @@ xfs_readlink_by_handle(
/* Restrict this handle operation to symlinks only. */
if (!S_ISLNK(dentry->d_inode->i_mode)) {
- error = -XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_dput;
}
if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out_dput;
}
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link) {
- error = -XFS_ERROR(ENOMEM);
+ error = -ENOMEM;
goto out_dput;
}
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+ error = xfs_readlink(XFS_I(dentry->d_inode), link);
if (error)
goto out_kfree;
- error = do_readlink(hreq->ohandle, olen, link);
+ error = readlink_copy(hreq->ohandle, olen, link);
if (error)
goto out_kfree;
@@ -360,10 +330,10 @@ xfs_set_dmattrs(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -394,9 +364,9 @@ xfs_fssetdm_by_handle(
struct dentry *dentry;
if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(parfilp);
if (error)
@@ -409,16 +379,16 @@ xfs_fssetdm_by_handle(
}
if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out;
}
if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out;
}
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+ error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
fsd.fsd_dmstate);
out:
@@ -439,18 +409,18 @@ xfs_attrlist_by_handle(
char *kbuf;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
@@ -461,7 +431,7 @@ xfs_attrlist_by_handle(
goto out_dput;
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+ error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
al_hreq.flags, cursor);
if (error)
goto out_kfree;
@@ -485,20 +455,20 @@ xfs_attrmulti_attr_get(
__uint32_t flags)
{
unsigned char *kbuf;
- int error = EFAULT;
+ int error = -EFAULT;
if (*len > XATTR_SIZE_MAX)
- return EINVAL;
+ return -EINVAL;
kbuf = kmem_zalloc_large(*len, KM_SLEEP);
if (!kbuf)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
if (error)
goto out_kfree;
if (copy_to_user(ubuf, kbuf, *len))
- error = EFAULT;
+ error = -EFAULT;
out_kfree:
kmem_free(kbuf);
@@ -514,20 +484,17 @@ xfs_attrmulti_attr_set(
__uint32_t flags)
{
unsigned char *kbuf;
- int error = EFAULT;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
+ return -EPERM;
if (len > XATTR_SIZE_MAX)
- return EINVAL;
+ return -EINVAL;
kbuf = memdup_user(ubuf, len);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
- error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
- return error;
+ return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
}
int
@@ -537,7 +504,7 @@ xfs_attrmulti_attr_remove(
__uint32_t flags)
{
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
+ return -EPERM;
return xfs_attr_remove(XFS_I(inode), name, flags);
}
@@ -554,9 +521,9 @@ xfs_attrmulti_by_handle(
unsigned char *attr_name;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
@@ -566,7 +533,7 @@ xfs_attrmulti_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- error = E2BIG;
+ error = -E2BIG;
size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
@@ -577,6 +544,7 @@ xfs_attrmulti_by_handle(
goto out_dput;
}
+ error = -ENOMEM;
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
@@ -617,19 +585,19 @@ xfs_attrmulti_by_handle(
mnt_drop_write_file(parfilp);
break;
default:
- ops[i].am_error = EINVAL;
+ ops[i].am_error = -EINVAL;
}
}
if (copy_to_user(am_hreq.ops, ops, size))
- error = XFS_ERROR(EFAULT);
+ error = -EFAULT;
kfree(attr_name);
out_kfree_ops:
kfree(ops);
out_dput:
dput(dentry);
- return -error;
+ return error;
}
int
@@ -654,16 +622,16 @@ xfs_ioc_space(
*/
if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (!(filp->f_mode & FMODE_WRITE))
- return -XFS_ERROR(EBADF);
+ return -EBADF;
if (!S_ISREG(inode->i_mode))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
error = mnt_want_write_file(filp);
if (error)
@@ -681,7 +649,7 @@ xfs_ioc_space(
bf->l_start += XFS_ISIZE(ip);
break;
default:
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
@@ -698,7 +666,7 @@ xfs_ioc_space(
case XFS_IOC_UNRESVSP:
case XFS_IOC_UNRESVSP64:
if (bf->l_len <= 0) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
break;
@@ -711,7 +679,7 @@ xfs_ioc_space(
bf->l_start > mp->m_super->s_maxbytes ||
bf->l_start + bf->l_len < 0 ||
bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
@@ -752,7 +720,7 @@ xfs_ioc_space(
break;
default:
ASSERT(0);
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
}
if (error)
@@ -768,7 +736,7 @@ xfs_ioc_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- if (!(ioflags & IO_INVIS)) {
+ if (!(ioflags & XFS_IO_INVIS)) {
ip->i_d.di_mode &= ~S_ISUID;
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;
@@ -788,7 +756,7 @@ xfs_ioc_space(
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -810,41 +778,41 @@ xfs_ioc_bulkstat(
return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (cmd == XFS_IOC_FSINUMBERS)
error = xfs_inumbers(mp, &inlast, &count,
bulkreq.ubuffer, xfs_inumbers_fmt);
else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_single(mp, &inlast,
- bulkreq.ubuffer, &done);
+ error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
+ sizeof(xfs_bstat_t), NULL, &done);
else /* XFS_IOC_FSBULKSTAT */
error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
sizeof(xfs_bstat_t), bulkreq.ubuffer,
&done);
if (error)
- return -error;
+ return error;
if (bulkreq.ocount != NULL) {
if (copy_to_user(bulkreq.lastip, &inlast,
sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
return 0;
@@ -860,7 +828,7 @@ xfs_ioc_fsgeometry_v1(
error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
- return -error;
+ return error;
/*
* Caller should have passed an argument of type
@@ -868,7 +836,7 @@ xfs_ioc_fsgeometry_v1(
* xfs_fsop_geom_t that xfs_fs_geometry() fills in.
*/
if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -882,10 +850,10 @@ xfs_ioc_fsgeometry(
error = xfs_fs_geometry(mp, &fsgeo, 4);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1070,16 +1038,16 @@ xfs_ioctl_setattr(
trace_xfs_ioctl_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
/*
* Disallow 32bit project ids when projid32bit feature is not enabled.
*/
if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* If disk quotas is on, we make sure that the dquots do exist on disk,
@@ -1117,7 +1085,7 @@ xfs_ioctl_setattr(
* CAP_FSETID capability is applicable.
*/
if (!inode_owner_or_capable(VFS_I(ip))) {
- code = XFS_ERROR(EPERM);
+ code = -EPERM;
goto error_return;
}
@@ -1128,7 +1096,7 @@ xfs_ioctl_setattr(
*/
if (mask & FSX_PROJID) {
if (current_user_ns() != &init_user_ns) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
@@ -1151,7 +1119,7 @@ xfs_ioctl_setattr(
if (ip->i_d.di_nextents &&
((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
fa->fsx_extsize)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
+ code = -EINVAL; /* EFBIG? */
goto error_return;
}
@@ -1170,7 +1138,7 @@ xfs_ioctl_setattr(
extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
if (extsize_fsb > MAXEXTLEN) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
@@ -1182,13 +1150,13 @@ xfs_ioctl_setattr(
} else {
size = mp->m_sb.sb_blocksize;
if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
if (fa->fsx_extsize % size) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
@@ -1202,7 +1170,7 @@ xfs_ioctl_setattr(
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
(XFS_IS_REALTIME_INODE(ip)) !=
(fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
+ code = -EINVAL; /* EFBIG? */
goto error_return;
}
@@ -1213,7 +1181,7 @@ xfs_ioctl_setattr(
if ((mp->m_sb.sb_rblocks == 0) ||
(mp->m_sb.sb_rextsize == 0) ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
@@ -1227,7 +1195,7 @@ xfs_ioctl_setattr(
(fa->fsx_xflags &
(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE)) {
- code = XFS_ERROR(EPERM);
+ code = -EPERM;
goto error_return;
}
}
@@ -1245,7 +1213,7 @@ xfs_ioctl_setattr(
* cleared upon successful return from chown()
*/
if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
- !inode_capable(VFS_I(ip), CAP_FSETID))
+ !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
/*
@@ -1257,15 +1225,8 @@ xfs_ioctl_setattr(
olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_pdquot, pdqp);
}
+ ASSERT(ip->i_d.di_version > 1);
xfs_set_projid(ip, fa->fsx_projid);
-
- /*
- * We may have to rev the inode as well as
- * the superblock version number since projids didn't
- * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
- */
- if (ip->i_d.di_version == 1)
- xfs_bump_ino_vers2(tp, ip);
}
}
@@ -1337,7 +1298,7 @@ xfs_ioc_fssetxattr(
return error;
error = xfs_ioctl_setattr(ip, &fa, mask);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -1382,7 +1343,7 @@ xfs_ioc_setxflags(
return error;
error = xfs_ioctl_setattr(ip, &fa, mask);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -1392,7 +1353,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
/* copy only getbmap portion (not getbmapx) */
if (copy_to_user(base, bmv, sizeof(struct getbmap)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
*ap += sizeof(struct getbmap);
return 0;
@@ -1409,23 +1370,23 @@ xfs_ioc_getbmap(
int error;
if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
- if (ioflags & IO_INVIS)
+ if (ioflags & XFS_IO_INVIS)
bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
(struct getbmap *)arg+1);
if (error)
- return -error;
+ return error;
/* copy back header - only size of getbmap */
if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1435,7 +1396,7 @@ xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
struct getbmapx __user *base = *ap;
if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
*ap += sizeof(struct getbmapx);
return 0;
@@ -1450,22 +1411,22 @@ xfs_ioc_getbmapx(
int error;
if (copy_from_user(&bmx, arg, sizeof(bmx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bmx.bmv_iflags & (~BMV_IF_VALID))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
(struct getbmapx *)arg+1);
if (error)
- return -error;
+ return error;
/* copy back header */
if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1481,33 +1442,33 @@ xfs_ioc_swapext(
/* Pull information for the target fd */
f = fdget((int)sxp->sx_fdtarget);
if (!f.file) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out;
}
if (!(f.file->f_mode & FMODE_WRITE) ||
!(f.file->f_mode & FMODE_READ) ||
(f.file->f_flags & O_APPEND)) {
- error = XFS_ERROR(EBADF);
+ error = -EBADF;
goto out_put_file;
}
tmp = fdget((int)sxp->sx_fdtmp);
if (!tmp.file) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_file;
}
if (!(tmp.file->f_mode & FMODE_WRITE) ||
!(tmp.file->f_mode & FMODE_READ) ||
(tmp.file->f_flags & O_APPEND)) {
- error = XFS_ERROR(EBADF);
+ error = -EBADF;
goto out_put_tmp_file;
}
if (IS_SWAPFILE(file_inode(f.file)) ||
IS_SWAPFILE(file_inode(tmp.file))) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
@@ -1515,17 +1476,17 @@ xfs_ioc_swapext(
tip = XFS_I(file_inode(tmp.file));
if (ip->i_mount != tip->i_mount) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
if (ip->i_ino == tip->i_ino) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_put_tmp_file;
}
@@ -1559,7 +1520,7 @@ xfs_file_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_ioctl(ip);
@@ -1578,7 +1539,7 @@ xfs_file_ioctl(
xfs_flock64_t bf;
if (copy_from_user(&bf, arg, sizeof(bf)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
}
case XFS_IOC_DIOINFO: {
@@ -1587,11 +1548,11 @@ xfs_file_ioctl(
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+ da.d_mem = da.d_miniosz = target->bt_logical_sectorsize;
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
if (copy_to_user(arg, &da, sizeof(da)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1624,7 +1585,7 @@ xfs_file_ioctl(
struct fsdmidata dmi;
if (copy_from_user(&dmi, arg, sizeof(dmi)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
@@ -1633,7 +1594,7 @@ xfs_file_ioctl(
error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
dmi.fsd_dmstate);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_GETBMAP:
@@ -1649,14 +1610,14 @@ xfs_file_ioctl(
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(hreq)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_find_handle(cmd, &hreq);
}
case XFS_IOC_OPEN_BY_HANDLE: {
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_open_by_handle(filp, &hreq);
}
case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -1666,7 +1627,7 @@ xfs_file_ioctl(
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_readlink_by_handle(filp, &hreq);
}
case XFS_IOC_ATTRLIST_BY_HANDLE:
@@ -1679,13 +1640,13 @@ xfs_file_ioctl(
struct xfs_swapext sxp;
if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_ioc_swapext(&sxp);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSCOUNTS: {
@@ -1693,10 +1654,10 @@ xfs_file_ioctl(
error = xfs_fs_counts(mp, &out);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1708,10 +1669,10 @@ xfs_file_ioctl(
return -EPERM;
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return -XFS_ERROR(EROFS);
+ return -EROFS;
if (copy_from_user(&inout, arg, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
@@ -1722,10 +1683,10 @@ xfs_file_ioctl(
error = xfs_reserve_blocks(mp, &in, &inout);
mnt_drop_write_file(filp);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &inout, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1737,10 +1698,10 @@ xfs_file_ioctl(
error = xfs_reserve_blocks(mp, NULL, &out);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1749,42 +1710,42 @@ xfs_file_ioctl(
xfs_growfs_data_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_data(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSLOG: {
xfs_growfs_log_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_log(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSRT: {
xfs_growfs_rt_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_rt(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_GOINGDOWN: {
@@ -1794,10 +1755,9 @@ xfs_file_ioctl(
return -EPERM;
if (get_user(in, (__uint32_t __user *)arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
- error = xfs_fs_goingdown(mp, in);
- return -error;
+ return xfs_fs_goingdown(mp, in);
}
case XFS_IOC_ERROR_INJECTION: {
@@ -1807,18 +1767,16 @@ xfs_file_ioctl(
return -EPERM;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
- error = xfs_errortag_add(in.errtag, mp);
- return -error;
+ return xfs_errortag_add(in.errtag, mp);
}
case XFS_IOC_ERROR_CLEARALL:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = xfs_errortag_clearall(mp, 1);
- return -error;
+ return xfs_errortag_clearall(mp, 1);
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_fs_eofblocks eofb;
@@ -1828,16 +1786,16 @@ xfs_file_ioctl(
return -EPERM;
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return -XFS_ERROR(EROFS);
+ return -EROFS;
if (copy_from_user(&eofb, arg, sizeof(eofb)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = xfs_fs_eofblocks_from_user(&eofb, &keofb);
if (error)
- return -error;
+ return error;
- return -xfs_icache_free_eofblocks(mp, &keofb);
+ return xfs_icache_free_eofblocks(mp, &keofb);
}
default:
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index a7992f8de9d3..a554646ff141 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,7 +28,6 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_vnode.h"
#include "xfs_inode.h"
#include "xfs_itable.h"
#include "xfs_error.h"
@@ -56,7 +55,7 @@ xfs_compat_flock64_copyin(
get_user(bf->l_sysid, &arg32->l_sysid) ||
get_user(bf->l_pid, &arg32->l_pid) ||
copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -70,10 +69,10 @@ xfs_compat_ioc_fsgeometry_v1(
error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
- return -error;
+ return error;
/* The 32-bit variant simply has some padding at the end */
if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -84,7 +83,7 @@ xfs_compat_growfs_data_copyin(
{
if (get_user(in->newblocks, &arg32->newblocks) ||
get_user(in->imaxpct, &arg32->imaxpct))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -95,14 +94,14 @@ xfs_compat_growfs_rt_copyin(
{
if (get_user(in->newblocks, &arg32->newblocks) ||
get_user(in->extsize, &arg32->extsize))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
STATIC int
xfs_inumbers_fmt_compat(
void __user *ubuffer,
- const xfs_inogrp_t *buffer,
+ const struct xfs_inogrp *buffer,
long count,
long *written)
{
@@ -113,7 +112,7 @@ xfs_inumbers_fmt_compat(
if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
*written = count * sizeof(*p32);
return 0;
@@ -132,7 +131,7 @@ xfs_ioctl32_bstime_copyin(
if (get_user(sec32, &bstime32->tv_sec) ||
get_user(bstime->tv_nsec, &bstime32->tv_nsec))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bstime->tv_sec = sec32;
return 0;
}
@@ -164,7 +163,7 @@ xfs_ioctl32_bstat_copyin(
get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
get_user(bstat->bs_aextents, &bstat32->bs_aextents))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -180,7 +179,7 @@ xfs_bstime_store_compat(
sec32 = p->tv_sec;
if (put_user(sec32, &p32->tv_sec) ||
put_user(p->tv_nsec, &p32->tv_nsec))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -195,7 +194,7 @@ xfs_bulkstat_one_fmt_compat(
compat_xfs_bstat_t __user *p32 = ubuffer;
if (ubsize < sizeof(*p32))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
if (put_user(buffer->bs_ino, &p32->bs_ino) ||
put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -218,7 +217,7 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (ubused)
*ubused = sizeof(*p32);
return 0;
@@ -256,30 +255,30 @@ xfs_compat_ioc_bulkstat(
/* should be called again (unused here, but used in dmapi) */
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (get_user(addr, &p32->lastip))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.lastip = compat_ptr(addr);
if (get_user(bulkreq.icount, &p32->icount) ||
get_user(addr, &p32->ubuffer))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.ubuffer = compat_ptr(addr);
if (get_user(addr, &p32->ocount))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.ocount = compat_ptr(addr);
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (cmd == XFS_IOC_FSINUMBERS_32) {
error = xfs_inumbers(mp, &inlast, &count,
@@ -294,17 +293,17 @@ xfs_compat_ioc_bulkstat(
xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
bulkreq.ubuffer, &done);
} else
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
if (error)
- return -error;
+ return error;
if (bulkreq.ocount != NULL) {
if (copy_to_user(bulkreq.lastip, &inlast,
sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
return 0;
@@ -318,7 +317,7 @@ xfs_compat_handlereq_copyin(
compat_xfs_fsop_handlereq_t hreq32;
if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
hreq->fd = hreq32.fd;
hreq->path = compat_ptr(hreq32.path);
@@ -352,19 +351,19 @@ xfs_compat_attrlist_by_handle(
char *kbuf;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
@@ -376,7 +375,7 @@ xfs_compat_attrlist_by_handle(
goto out_dput;
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+ error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
al_hreq.flags, cursor);
if (error)
goto out_kfree;
@@ -404,10 +403,10 @@ xfs_compat_attrmulti_by_handle(
unsigned char *attr_name;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&am_hreq, arg,
sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
@@ -417,17 +416,18 @@ xfs_compat_attrmulti_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- error = E2BIG;
+ error = -E2BIG;
size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
ops = memdup_user(compat_ptr(am_hreq.ops), size);
if (IS_ERR(ops)) {
- error = PTR_ERR(ops);
+ error = -PTR_ERR(ops);
goto out_dput;
}
+ error = -ENOMEM;
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
@@ -469,19 +469,19 @@ xfs_compat_attrmulti_by_handle(
mnt_drop_write_file(parfilp);
break;
default:
- ops[i].am_error = EINVAL;
+ ops[i].am_error = -EINVAL;
}
}
if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
- error = XFS_ERROR(EFAULT);
+ error = -EFAULT;
kfree(attr_name);
out_kfree_ops:
kfree(ops);
out_dput:
dput(dentry);
- return -error;
+ return error;
}
STATIC int
@@ -495,26 +495,26 @@ xfs_compat_fssetdm_by_handle(
struct dentry *dentry;
if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&dmhreq, arg,
sizeof(compat_xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out;
}
if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out;
}
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+ error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
fsd.fsd_dmstate);
out:
@@ -536,7 +536,7 @@ xfs_file_compat_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_compat_ioctl(ip);
@@ -587,7 +587,7 @@ xfs_file_compat_ioctl(
struct xfs_flock64 bf;
if (xfs_compat_flock64_copyin(&bf, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
}
@@ -597,25 +597,25 @@ xfs_file_compat_ioctl(
struct xfs_growfs_data in;
if (xfs_compat_growfs_data_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_data(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSRT_32: {
struct xfs_growfs_rt in;
if (xfs_compat_growfs_rt_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_rt(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
#endif
/* long changes size, but xfs only copiese out 32 bits */
@@ -632,13 +632,13 @@ xfs_file_compat_ioctl(
if (copy_from_user(&sxp, sxu,
offsetof(struct xfs_swapext, sx_stat)) ||
xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_ioc_swapext(&sxp);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
@@ -650,7 +650,7 @@ xfs_file_compat_ioctl(
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
return xfs_find_handle(cmd, &hreq);
}
@@ -658,14 +658,14 @@ xfs_file_compat_ioctl(
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_open_by_handle(filp, &hreq);
}
case XFS_IOC_READLINK_BY_HANDLE_32: {
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_readlink_by_handle(filp, &hreq);
}
case XFS_IOC_ATTRLIST_BY_HANDLE_32:
@@ -675,6 +675,6 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSSETDM_BY_HANDLE_32:
return xfs_compat_fssetdm_by_handle(filp, arg);
default:
- return -XFS_ERROR(ENOIOCTLCMD);
+ return -ENOIOCTLCMD;
}
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbea283d..e9c47b6f5e5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -110,7 +110,7 @@ xfs_alert_fsblock_zero(
(unsigned long long)imap->br_startoff,
(unsigned long long)imap->br_blockcount,
imap->br_state);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
int
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
xfs_fsblock_t firstfsb;
xfs_extlen_t extsz, temp;
int nimaps;
- int bmapi_flag;
int quota_flag;
int rt;
xfs_trans_t *tp;
@@ -139,7 +138,7 @@ xfs_iomap_write_direct(
error = xfs_qm_dqattach(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
@@ -149,7 +148,7 @@ xfs_iomap_write_direct(
if ((offset + count) > XFS_ISIZE(ip)) {
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
- return XFS_ERROR(error);
+ return error;
} else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -189,7 +188,7 @@ xfs_iomap_write_direct(
*/
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, 0);
- bmapi_flag = 0;
- if (offset < XFS_ISIZE(ip) || extsz)
- bmapi_flag |= XFS_BMAPI_PREALLOC;
-
/*
* From this point onwards we overwrite the imap pointer that the
* caller gave to us.
*/
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
- error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
- &firstfsb, 0, imap, &nimaps, &free_list);
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_PREALLOC, &firstfsb, 0,
+ imap, &nimaps, &free_list);
if (error)
goto out_bmap_cancel;
@@ -229,7 +225,7 @@ xfs_iomap_write_direct(
* Copy any maps to caller's array and return any error.
*/
if (nimaps == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto out_unlock;
}
@@ -401,7 +397,8 @@ xfs_quota_calc_throttle(
struct xfs_inode *ip,
int type,
xfs_fsblock_t *qblocks,
- int *qshift)
+ int *qshift,
+ int64_t *qfreesp)
{
int64_t freesp;
int shift = 0;
@@ -410,6 +407,7 @@ xfs_quota_calc_throttle(
/* over hi wmark, squash the prealloc completely */
if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
*qblocks = 0;
+ *qfreesp = 0;
return;
}
@@ -422,6 +420,9 @@ xfs_quota_calc_throttle(
shift += 2;
}
+ if (freesp < *qfreesp)
+ *qfreesp = freesp;
+
/* only overwrite the throttle values if we are more aggressive */
if ((freesp >> shift) < (*qblocks >> *qshift)) {
*qblocks = freesp;
@@ -480,15 +481,18 @@ xfs_iomap_prealloc_size(
}
/*
- * Check each quota to cap the prealloc size and provide a shift
- * value to throttle with.
+ * Check each quota to cap the prealloc size, provide a shift value to
+ * throttle with and adjust amount of available space.
*/
if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
+ &freesp);
/*
* The final prealloc size is set to the minimum of free space available
@@ -556,7 +560,7 @@ xfs_iomap_write_delay(
*/
error = xfs_qm_dqattach_locked(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -600,11 +604,11 @@ retry:
imap, &nimaps, XFS_BMAPI_ENTIRE);
switch (error) {
case 0:
- case ENOSPC:
- case EDQUOT:
+ case -ENOSPC:
+ case -EDQUOT:
break;
default:
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -618,7 +622,7 @@ retry:
error = 0;
goto retry;
}
- return XFS_ERROR(error ? error : ENOSPC);
+ return error ? error : -ENOSPC;
}
if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
@@ -667,7 +671,7 @@ xfs_iomap_write_allocate(
*/
error = xfs_qm_dqattach(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
count_fsb = imap->br_blockcount;
@@ -694,7 +698,7 @@ xfs_iomap_write_allocate(
nres, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
@@ -734,7 +738,7 @@ xfs_iomap_write_allocate(
*/
nimaps = 1;
end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
- error = xfs_bmap_last_offset(NULL, ip, &last_block,
+ error = xfs_bmap_last_offset(ip, &last_block,
XFS_DATA_FORK);
if (error)
goto trans_cancel;
@@ -743,7 +747,7 @@ xfs_iomap_write_allocate(
if ((map_start_fsb + count_fsb) > last_block) {
count_fsb = last_block - map_start_fsb;
if (count_fsb == 0) {
- error = EAGAIN;
+ error = -EAGAIN;
goto trans_cancel;
}
}
@@ -753,8 +757,7 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb,
- XFS_BMAPI_STACK_SWITCH,
+ count_fsb, 0,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
@@ -798,7 +801,7 @@ trans_cancel:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
error0:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return XFS_ERROR(error);
+ return error;
}
int
@@ -858,7 +861,7 @@ xfs_iomap_write_unwritten(
resblks, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -897,7 +900,7 @@ xfs_iomap_write_unwritten(
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
- return XFS_ERROR(error);
+ return error;
if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_alert_fsblock_zero(ip, &imap);
@@ -920,5 +923,5 @@ error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return XFS_ERROR(error);
+ return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..72129493e9d3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_dinode.h"
+#include "xfs_trans_space.h"
#include <linux/capability.h>
#include <linux/xattr.h>
@@ -48,6 +49,18 @@
#include <linux/fiemap.h>
#include <linux/slab.h>
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
static int
xfs_initxattrs(
struct inode *inode,
@@ -60,7 +73,7 @@ xfs_initxattrs(
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
error = xfs_attr_set(ip, xattr->name, xattr->value,
- xattr->value_len, ATTR_SECURE);
+ xattr->value_len, ATTR_SECURE);
if (error < 0)
break;
}
@@ -81,7 +94,7 @@ xfs_init_security(
const struct qstr *qstr)
{
return security_inode_init_security(inode, dir, qstr,
- &xfs_initxattrs, NULL);
+ &xfs_initxattrs, NULL);
}
static void
@@ -111,19 +124,19 @@ xfs_cleanup_inode(
xfs_dentry_to_name(&teardown, dentry, 0);
xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
- iput(inode);
}
STATIC int
-xfs_vn_mknod(
+xfs_generic_create(
struct inode *dir,
struct dentry *dentry,
umode_t mode,
- dev_t rdev)
+ dev_t rdev,
+ bool tmpfile) /* unnamed file */
{
struct inode *inode;
struct xfs_inode *ip = NULL;
- struct posix_acl *default_acl = NULL;
+ struct posix_acl *default_acl, *acl;
struct xfs_name name;
int error;
@@ -139,17 +152,16 @@ xfs_vn_mknod(
rdev = 0;
}
- if (IS_POSIXACL(dir)) {
- default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(default_acl))
- return PTR_ERR(default_acl);
+ error = posix_acl_create(dir, &mode, &default_acl, &acl);
+ if (error)
+ return error;
- if (!default_acl)
- mode &= ~current_umask();
+ if (!tmpfile) {
+ xfs_dentry_to_name(&name, dentry, mode);
+ error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+ } else {
+ error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
}
-
- xfs_dentry_to_name(&name, dentry, mode);
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
if (unlikely(error))
goto out_free_acl;
@@ -159,22 +171,46 @@ xfs_vn_mknod(
if (unlikely(error))
goto out_cleanup_inode;
+#ifdef CONFIG_XFS_POSIX_ACL
if (default_acl) {
- error = -xfs_inherit_acl(inode, default_acl);
- default_acl = NULL;
- if (unlikely(error))
+ error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ if (error)
+ goto out_cleanup_inode;
+ }
+ if (acl) {
+ error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ if (error)
goto out_cleanup_inode;
}
+#endif
+ if (tmpfile)
+ d_tmpfile(dentry, inode);
+ else
+ d_instantiate(dentry, inode);
- d_instantiate(dentry, inode);
- return -error;
+ out_free_acl:
+ if (default_acl)
+ posix_acl_release(default_acl);
+ if (acl)
+ posix_acl_release(acl);
+ return error;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
- posix_acl_release(default_acl);
- return -error;
+ if (!tmpfile)
+ xfs_cleanup_inode(dir, inode, dentry);
+ iput(inode);
+ goto out_free_acl;
+}
+
+STATIC int
+xfs_vn_mknod(
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ dev_t rdev)
+{
+ return xfs_generic_create(dir, dentry, mode, rdev, false);
}
STATIC int
@@ -212,8 +248,8 @@ xfs_vn_lookup(
xfs_dentry_to_name(&name, dentry, 0);
error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
+ if (unlikely(error != -ENOENT))
+ return ERR_PTR(error);
d_add(dentry, NULL);
return NULL;
}
@@ -239,8 +275,8 @@ xfs_vn_ci_lookup(
xfs_dentry_to_name(&xname, dentry, 0);
error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
+ if (unlikely(error != -ENOENT))
+ return ERR_PTR(error);
/*
* call d_add(dentry, NULL) here when d_drop_negative_children
* is called in xfs_vn_mknod (ie. allow negative dentries
@@ -275,7 +311,7 @@ xfs_vn_link(
error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
if (unlikely(error))
- return -error;
+ return error;
ihold(inode);
d_instantiate(dentry, inode);
@@ -292,7 +328,7 @@ xfs_vn_unlink(
xfs_dentry_to_name(&name, dentry, 0);
- error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+ error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
if (error)
return error;
@@ -337,8 +373,9 @@ xfs_vn_symlink(
out_cleanup_inode:
xfs_cleanup_inode(dir, inode, dentry);
+ iput(inode);
out:
- return -error;
+ return error;
}
STATIC int
@@ -355,8 +392,8 @@ xfs_vn_rename(
xfs_dentry_to_name(&oname, odentry, 0);
xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
- return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
- XFS_I(ndir), &nname, new_inode ?
+ return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+ XFS_I(ndir), &nname, new_inode ?
XFS_I(new_inode) : NULL);
}
@@ -377,7 +414,7 @@ xfs_vn_follow_link(
if (!link)
goto out_err;
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+ error = xfs_readlink(XFS_I(dentry->d_inode), link);
if (unlikely(error))
goto out_kfree;
@@ -391,18 +428,6 @@ xfs_vn_follow_link(
return NULL;
}
-STATIC void
-xfs_vn_put_link(
- struct dentry *dentry,
- struct nameidata *nd,
- void *p)
-{
- char *s = nd_get_link(nd);
-
- if (!IS_ERR(s))
- kfree(s);
-}
-
STATIC int
xfs_vn_getattr(
struct vfsmount *mnt,
@@ -416,7 +441,7 @@ xfs_vn_getattr(
trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
stat->size = XFS_ISIZE(ip);
stat->dev = inode->i_sb->s_dev;
@@ -459,14 +484,12 @@ xfs_vn_getattr(
static void
xfs_setattr_mode(
- struct xfs_trans *tp,
struct xfs_inode *ip,
struct iattr *iattr)
{
- struct inode *inode = VFS_I(ip);
- umode_t mode = iattr->ia_mode;
+ struct inode *inode = VFS_I(ip);
+ umode_t mode = iattr->ia_mode;
- ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ip->i_d.di_mode &= S_IFMT;
@@ -476,6 +499,32 @@ xfs_setattr_mode(
inode->i_mode |= mode & ~S_IFMT;
}
+static void
+xfs_setattr_time(
+ struct xfs_inode *ip,
+ struct iattr *iattr)
+{
+ struct inode *inode = VFS_I(ip);
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ if (iattr->ia_valid & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+ }
+ if (iattr->ia_valid & ATTR_CTIME) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+ }
+ if (iattr->ia_valid & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ }
+}
+
int
xfs_setattr_nonsize(
struct xfs_inode *ip,
@@ -497,14 +546,14 @@ xfs_setattr_nonsize(
/* If acls are being inherited, we already have this checked */
if (!(flags & XFS_ATTR_NOACL)) {
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
- error = -inode_change_ok(inode, iattr);
+ error = inode_change_ok(inode, iattr);
if (error)
- return XFS_ERROR(error);
+ return error;
}
ASSERT((mask & ATTR_SIZE) == 0);
@@ -618,7 +667,8 @@ xfs_setattr_nonsize(
}
if (!gid_eq(igid, gid)) {
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_PQUOTA_ON(mp));
+ ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
+ !XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
olddquot2 = xfs_qm_vop_chown(tp, ip,
@@ -629,30 +679,10 @@ xfs_setattr_nonsize(
}
}
- /*
- * Change file access modes.
- */
if (mask & ATTR_MODE)
- xfs_setattr_mode(tp, ip, iattr);
-
- /*
- * Change file access or modified times.
- */
- if (mask & ATTR_ATIME) {
- inode->i_atime = iattr->ia_atime;
- ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- }
- if (mask & ATTR_CTIME) {
- inode->i_ctime = iattr->ia_ctime;
- ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- }
- if (mask & ATTR_MTIME) {
- inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- }
+ xfs_setattr_mode(ip, iattr);
+ if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+ xfs_setattr_time(ip, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -673,7 +703,7 @@ xfs_setattr_nonsize(
xfs_qm_dqrele(gdqp);
if (error)
- return XFS_ERROR(error);
+ return error;
/*
* XXX(hch): Updating the ACL entries is not atomic vs the i_mode
@@ -683,9 +713,9 @@ xfs_setattr_nonsize(
* Posix ACL code seems to care about this issue either.
*/
if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
- error = -xfs_acl_chmod(inode);
+ error = posix_acl_chmod(inode, inode->i_mode);
if (error)
- return XFS_ERROR(error);
+ return error;
}
return 0;
@@ -709,7 +739,6 @@ xfs_setattr_size(
{
struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
- int mask = iattr->ia_valid;
xfs_off_t oldsize, newsize;
struct xfs_trans *tp;
int error;
@@ -719,19 +748,19 @@ xfs_setattr_size(
trace_xfs_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
- error = -inode_change_ok(inode, iattr);
+ error = inode_change_ok(inode, iattr);
if (error)
- return XFS_ERROR(error);
+ return error;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
- ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+ ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
@@ -740,7 +769,7 @@ xfs_setattr_size(
* Short circuit the truncate case for zero length files.
*/
if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
- if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+ if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
return 0;
/*
@@ -789,7 +818,7 @@ xfs_setattr_size(
* care about here.
*/
if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
- error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
@@ -800,22 +829,34 @@ xfs_setattr_size(
*/
inode_dio_wait(inode);
- error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
+ /*
+ * Do all the page cache truncate work outside the transaction context
+ * as the "lock" order is page lock->log space reservation. i.e.
+ * locking pages inside the transaction can ABBA deadlock with
+ * writeback. We have to do the VFS inode size update before we truncate
+ * the pagecache, however, to avoid racing with page faults beyond the
+ * new EOF they are not serialised against truncate operations except by
+ * page locks and size updates.
+ *
+ * Hence we are in a situation where a truncate can fail with ENOMEM
+ * from xfs_trans_reserve(), but having already truncated the in-memory
+ * version of the file (i.e. made user visible changes). There's not
+ * much we can do about this, except to hope that the caller sees ENOMEM
+ * and retries the truncate operation.
+ */
+ error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
return error;
+ truncate_setsize(inode, newsize);
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error)
goto out_trans_cancel;
- truncate_setsize(inode, newsize);
-
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
-
xfs_trans_ijoin(tp, ip, 0);
/*
@@ -828,10 +869,11 @@ xfs_setattr_size(
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ if (newsize != oldsize &&
+ !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
iattr->ia_ctime = iattr->ia_mtime =
current_fs_time(inode->i_sb);
- mask |= ATTR_CTIME | ATTR_MTIME;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
}
/*
@@ -867,22 +909,10 @@ xfs_setattr_size(
xfs_inode_clear_eofblocks_tag(ip);
}
- /*
- * Change file access modes.
- */
- if (mask & ATTR_MODE)
- xfs_setattr_mode(tp, ip, iattr);
-
- if (mask & ATTR_CTIME) {
- inode->i_ctime = iattr->ia_ctime;
- ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- }
- if (mask & ATTR_MTIME) {
- inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- }
+ if (iattr->ia_valid & ATTR_MODE)
+ xfs_setattr_mode(ip, iattr);
+ if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+ xfs_setattr_time(ip, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -920,7 +950,7 @@ xfs_vn_setattr(
error = xfs_setattr_nonsize(ip, iattr, 0);
}
- return -error;
+ return error;
}
STATIC int
@@ -940,7 +970,7 @@ xfs_vn_update_time(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return -error;
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -961,7 +991,7 @@ xfs_vn_update_time(
}
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
- return -xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp, 0);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1006,7 +1036,7 @@ xfs_fiemap_format(
*full = 1; /* user array now full */
}
- return -error;
+ return error;
}
STATIC int
@@ -1025,12 +1055,12 @@ xfs_vn_fiemap(
return error;
/* Set up bmap header for xfs internal routine */
- bm.bmv_offset = BTOBB(start);
+ bm.bmv_offset = BTOBBT(start);
/* Special case for whole file */
if (length == FIEMAP_MAX_OFFSET)
bm.bmv_length = -1LL;
else
- bm.bmv_length = BTOBB(length);
+ bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
/* We add one because in getbmap world count includes the header */
bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
@@ -1045,13 +1075,23 @@ xfs_vn_fiemap(
error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
if (error)
- return -error;
+ return error;
return 0;
}
+STATIC int
+xfs_vn_tmpfile(
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
+{
+ return xfs_generic_create(dir, dentry, mode, 0, true);
+}
+
static const struct inode_operations xfs_inode_operations = {
.get_acl = xfs_get_acl,
+ .set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1079,6 +1119,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
.get_acl = xfs_get_acl,
+ .set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1086,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1105,6 +1147,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
.get_acl = xfs_get_acl,
+ .set_acl = xfs_set_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1112,13 +1155,13 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
- .put_link = xfs_vn_put_link,
- .get_acl = xfs_get_acl,
+ .put_link = kfree_put_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -1205,6 +1248,7 @@ xfs_setup_inode(
xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
@@ -1212,6 +1256,7 @@ xfs_setup_inode(
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index d2c5057b5cc4..1c34e4335920 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -30,7 +30,7 @@ extern void xfs_setup_inode(struct xfs_inode *);
/*
* Internal setattr interfaces.
*/
-#define XFS_ATTR_NOACL 0x01 /* Don't call xfs_acl_chmod */
+#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */
extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
int flags);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c237ad15d500..f71be9c68017 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -67,19 +67,17 @@ xfs_bulkstat_one_int(
*stat = BULKSTAT_RV_NOTHING;
if (!buffer || xfs_internal_inum(mp, ino))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
if (!buf)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = xfs_iget(mp, NULL, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
- if (error) {
- *stat = BULKSTAT_RV_NOTHING;
+ if (error)
goto out_free;
- }
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
@@ -136,7 +134,6 @@ xfs_bulkstat_one_int(
IRELE(ip);
error = formatter(buffer, ubsize, ubused, buf);
-
if (!error)
*stat = BULKSTAT_RV_DIDONE;
@@ -154,9 +151,9 @@ xfs_bulkstat_one_fmt(
const xfs_bstat_t *buffer)
{
if (ubsize < sizeof(*buffer))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (ubused)
*ubused = sizeof(*buffer);
return 0;
@@ -175,9 +172,170 @@ xfs_bulkstat_one(
xfs_bulkstat_one_fmt, ubused, stat);
}
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record. Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_bulkstat_ichunk_ra(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec)
+{
+ xfs_agblock_t agbno;
+ struct blk_plug plug;
+ int blks_per_cluster;
+ int inodes_per_cluster;
+ int i; /* inode chunk index */
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
+
+ blk_start_plug(&plug);
+ for (i = 0; i < XFS_INODES_PER_CHUNK;
+ i += inodes_per_cluster, agbno += blks_per_cluster) {
+ if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) {
+ xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster,
+ &xfs_inode_buf_ops);
+ }
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
+ * Lookup the inode chunk that the given inode lives in and then get the record
+ * if we found the chunk. If the inode was not the last in the chunk and there
+ * are some left allocated, update the data for the pointed-to record as well as
+ * return the count of grabbed inodes.
+ */
+STATIC int
+xfs_bulkstat_grab_ichunk(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t agino, /* starting inode of chunk */
+ int *icount,/* return # of inodes grabbed */
+ struct xfs_inobt_rec_incore *irec) /* btree record */
+{
+ int idx; /* index into inode chunk */
+ int stat;
+ int error = 0;
+
+ /* Lookup the inode chunk that this inode lives in */
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ *icount = 0;
+ return error;
+ }
+
+ /* Get the record, should always work */
+ error = xfs_inobt_get_rec(cur, irec, &stat);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(stat == 1);
+
+ /* Check if the record contains the inode in request */
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ return -EINVAL;
+
+ idx = agino - irec->ir_startino + 1;
+ if (idx < XFS_INODES_PER_CHUNK &&
+ (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
+ int i;
+
+ /* We got a right chunk with some left inodes allocated at it.
+ * Grab the chunk record. Mark all the uninteresting inodes
+ * free -- because they're before our start point.
+ */
+ for (i = 0; i < idx; i++) {
+ if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+ irec->ir_freecount++;
+ }
+
+ irec->ir_free |= xfs_inobt_maskn(0, idx);
+ *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
+ }
+
+ return 0;
+}
+
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
/*
+ * Process inodes in chunk with a pointer to a formatter function
+ * that will iget the inode and fill in the appropriate structure.
+ */
+int
+xfs_bulkstat_ag_ichunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irbp,
+ bulkstat_one_pf formatter,
+ size_t statstruct_size,
+ struct xfs_bulkstat_agichunk *acp)
+{
+ xfs_ino_t lastino = acp->ac_lastino;
+ char __user **ubufp = acp->ac_ubuffer;
+ int ubleft = acp->ac_ubleft;
+ int ubelem = acp->ac_ubelem;
+ int chunkidx, clustidx;
+ int error = 0;
+ xfs_agino_t agino;
+
+ for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
+ XFS_BULKSTAT_UBLEFT(ubleft) &&
+ irbp->ir_freecount < XFS_INODES_PER_CHUNK;
+ chunkidx++, clustidx++, agino++) {
+ int fmterror; /* bulkstat formatter result */
+ int ubused;
+ xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
+
+ ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
+ lastino = ino;
+ continue;
+ }
+
+ /*
+ * Count used inodes as free so we can tell when the
+ * chunk is used up.
+ */
+ irbp->ir_freecount++;
+
+ /* Get the inode and fill in a single buffer */
+ ubused = statstruct_size;
+ error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
+ if (fmterror == BULKSTAT_RV_NOTHING) {
+ if (error && error != -ENOENT && error != -EINVAL) {
+ ubleft = 0;
+ break;
+ }
+ lastino = ino;
+ continue;
+ }
+ if (fmterror == BULKSTAT_RV_GIVEUP) {
+ ubleft = 0;
+ ASSERT(error);
+ break;
+ }
+ if (*ubufp)
+ *ubufp += ubused;
+ ubleft -= ubused;
+ ubelem++;
+ lastino = ino;
+ }
+
+ acp->ac_lastino = lastino;
+ acp->ac_ubleft = ubleft;
+ acp->ac_ubelem = ubelem;
+
+ return error;
+}
+
+/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
int /* error status */
@@ -190,13 +348,10 @@ xfs_bulkstat(
char __user *ubuffer, /* buffer with inode stats */
int *done) /* 1 if there are more stats to get */
{
- xfs_agblock_t agbno=0;/* allocation group block number */
xfs_buf_t *agbp; /* agi header buffer */
xfs_agi_t *agi; /* agi header data */
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
- int chunkidx; /* current index into inode chunk */
- int clustidx; /* current index into inode cluster */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
int end_of_ag; /* set if we've seen the ag end */
int error; /* error code */
@@ -209,9 +364,6 @@ xfs_bulkstat(
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
xfs_ino_t lastino; /* last inode number returned */
- int nbcluster; /* # of blocks in a cluster */
- int nicluster; /* # of inodes in a cluster */
- int nimask; /* mask for inode clusters */
int nirbuf; /* size of irbuf */
int rval; /* return value error code */
int tmp; /* result value from btree calls */
@@ -219,7 +371,6 @@ xfs_bulkstat(
int ubleft; /* bytes left in user's buffer */
char __user *ubufp; /* pointer into user's buffer */
int ubelem; /* spaces used in user's buffer */
- int ubused; /* bytes used by formatter */
/*
* Get the last inode value, see if there's nothing to do.
@@ -234,23 +385,16 @@ xfs_bulkstat(
*ubcountp = 0;
return 0;
}
- if (!ubcountp || *ubcountp <= 0) {
- return EINVAL;
- }
+
ubcount = *ubcountp; /* statstruct's */
ubleft = ubcount * statstruct_size; /* bytes */
*ubcountp = ubelem = 0;
*done = 0;
fmterror = 0;
ubufp = ubuffer;
- nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ?
- mp->m_sb.sb_inopblock :
- (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
- nimask = ~(nicluster - 1);
- nbcluster = nicluster >> mp->m_sb.sb_inopblog;
irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
if (!irbuf)
- return ENOMEM;
+ return -ENOMEM;
nirbuf = irbsize / sizeof(*irbuf);
@@ -262,112 +406,50 @@ xfs_bulkstat(
while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
cond_resched();
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * Skip this allocation group and go to the next one.
- */
- agno++;
- agino = 0;
- continue;
- }
+ if (error)
+ break;
agi = XFS_BUF_TO_AGI(agbp);
/*
* Allocate and initialize a btree cursor for ialloc btree.
*/
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
+ XFS_BTNUM_INO);
irbp = irbuf;
irbufend = irbuf + nirbuf;
end_of_ag = 0;
- /*
- * If we're returning in the middle of an allocation group,
- * we need to get the remainder of the chunk we're in.
- */
+ icount = 0;
if (agino > 0) {
- xfs_inobt_rec_incore_t r;
-
/*
- * Lookup the inode chunk that this inode lives in.
+ * In the middle of an allocation group, we need to get
+ * the remainder of the chunk we're in.
*/
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
- &tmp);
- if (!error && /* no I/O error */
- tmp && /* lookup succeeded */
- /* got the record, should always work */
- !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
- i == 1 &&
- /* this is the right chunk */
- agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
- /* lastino was not last in chunk */
- (chunkidx = agino - r.ir_startino + 1) <
- XFS_INODES_PER_CHUNK &&
- /* there are some left allocated */
- xfs_inobt_maskn(chunkidx,
- XFS_INODES_PER_CHUNK - chunkidx) &
- ~r.ir_free) {
- /*
- * Grab the chunk record. Mark all the
- * uninteresting inodes (because they're
- * before our start point) free.
- */
- for (i = 0; i < chunkidx; i++) {
- if (XFS_INOBT_MASK(i) & ~r.ir_free)
- r.ir_freecount++;
- }
- r.ir_free |= xfs_inobt_maskn(0, chunkidx);
+ struct xfs_inobt_rec_incore r;
+
+ error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
+ if (error)
+ break;
+ if (icount) {
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
- } else {
- /*
- * If any of those tests failed, bump the
- * inode number (just in case).
- */
- agino++;
- icount = 0;
}
- /*
- * In any case, increment to the next record.
- */
- if (!error)
- error = xfs_btree_increment(cur, 0, &tmp);
+ /* Increment to the next record */
+ error = xfs_btree_increment(cur, 0, &tmp);
} else {
- /*
- * Start of ag. Lookup the first inode chunk.
- */
+ /* Start of ag. Lookup the first inode chunk */
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
- icount = 0;
}
+ if (error)
+ break;
+
/*
* Loop through inode btree records in this ag,
* until we run out of inodes or space in the buffer.
*/
while (irbp < irbufend && icount < ubcount) {
- xfs_inobt_rec_incore_t r;
-
- /*
- * Loop as long as we're unable to read the
- * inode btree.
- */
- while (error) {
- agino += XFS_INODES_PER_CHUNK;
- if (XFS_AGINO_TO_AGBNO(mp, agino) >=
- be32_to_cpu(agi->agi_length))
- break;
- error = xfs_inobt_lookup(cur, agino,
- XFS_LOOKUP_GE, &tmp);
- cond_resched();
- }
- /*
- * If ran off the end of the ag either with an error,
- * or the normal way, set end and stop collecting.
- */
- if (error) {
- end_of_ag = 1;
- break;
- }
+ struct xfs_inobt_rec_incore r;
error = xfs_inobt_get_rec(cur, &r, &i);
if (error || i == 0) {
@@ -380,25 +462,7 @@ xfs_bulkstat(
* Also start read-ahead now for this chunk.
*/
if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
- struct blk_plug plug;
- /*
- * Loop over all clusters in the next chunk.
- * Do a readahead if there are any allocated
- * inodes in that cluster.
- */
- blk_start_plug(&plug);
- agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
- for (chunkidx = 0;
- chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx += nicluster,
- agbno += nbcluster) {
- if (xfs_inobt_maskn(chunkidx, nicluster)
- & ~r.ir_free)
- xfs_btree_reada_bufs(mp, agno,
- agbno, nbcluster,
- &xfs_inode_buf_ops);
- }
- blk_finish_plug(&plug);
+ xfs_bulkstat_ichunk_ra(mp, agno, &r);
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
@@ -425,57 +489,20 @@ xfs_bulkstat(
irbufend = irbp;
for (irbp = irbuf;
irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
- /*
- * Now process this chunk of inodes.
- */
- for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
- XFS_BULKSTAT_UBLEFT(ubleft) &&
- irbp->ir_freecount < XFS_INODES_PER_CHUNK;
- chunkidx++, clustidx++, agino++) {
- ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
-
- ino = XFS_AGINO_TO_INO(mp, agno, agino);
- /*
- * Skip if this inode is free.
- */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- lastino = ino;
- continue;
- }
- /*
- * Count used inodes as free so we can tell
- * when the chunk is used up.
- */
- irbp->ir_freecount++;
-
- /*
- * Get the inode and fill in a single buffer.
- */
- ubused = statstruct_size;
- error = formatter(mp, ino, ubufp, ubleft,
- &ubused, &fmterror);
- if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error && error != ENOENT &&
- error != EINVAL) {
- ubleft = 0;
- rval = error;
- break;
- }
- lastino = ino;
- continue;
- }
- if (fmterror == BULKSTAT_RV_GIVEUP) {
- ubleft = 0;
- ASSERT(error);
- rval = error;
- break;
- }
- if (ubufp)
- ubufp += ubused;
- ubleft -= ubused;
- ubelem++;
- lastino = ino;
- }
+ struct xfs_bulkstat_agichunk ac;
+
+ ac.ac_lastino = lastino;
+ ac.ac_ubuffer = &ubuffer;
+ ac.ac_ubleft = ubleft;
+ ac.ac_ubelem = ubelem;
+ error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
+ formatter, statstruct_size, &ac);
+ if (error)
+ rval = error;
+
+ lastino = ac.ac_lastino;
+ ubleft = ac.ac_ubleft;
+ ubelem = ac.ac_ubelem;
cond_resched();
}
@@ -515,58 +542,10 @@ xfs_bulkstat(
return rval;
}
-/*
- * Return stat information in bulk (by-inode) for the filesystem.
- * Special case for non-sequential one inode bulkstat.
- */
-int /* error status */
-xfs_bulkstat_single(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* inode to return */
- char __user *buffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
-{
- int count; /* count value for bulkstat call */
- int error; /* return value */
- xfs_ino_t ino; /* filesystem inode number */
- int res; /* result from bs1 */
-
- /*
- * note that requesting valid inode numbers which are not allocated
- * to inodes will most likely cause xfs_imap_to_bp to generate warning
- * messages about bad magic numbers. This is ok. The fact that
- * the inode isn't actually an inode is handled by the
- * error check below. Done this way to make the usual case faster
- * at the expense of the error case.
- */
-
- ino = *lastinop;
- error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
- NULL, &res);
- if (error) {
- /*
- * Special case way failed, do it the "long" way
- * to see if that works.
- */
- (*lastinop)--;
- count = 1;
- if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), buffer, done))
- return error;
- if (count == 0 || (xfs_ino_t)*lastinop != ino)
- return error == EFSCORRUPTED ?
- XFS_ERROR(EINVAL) : error;
- else
- return 0;
- }
- *done = 0;
- return 0;
-}
-
int
xfs_inumbers_fmt(
void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
+ const struct xfs_inogrp *buffer, /* buffer to read from */
long count, /* # of elements to read */
long *written) /* # of bytes written */
{
@@ -581,126 +560,104 @@ xfs_inumbers_fmt(
*/
int /* error status */
xfs_inumbers(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastino, /* last inode returned */
- int *count, /* size of buffer/count returned */
- void __user *ubuffer,/* buffer with inode descriptions */
- inumbers_fmt_pf formatter)
+ struct xfs_mount *mp,/* mount point for filesystem */
+ xfs_ino_t *lastino,/* last inode returned */
+ int *count,/* size of buffer/count returned */
+ void __user *ubuffer,/* buffer with inode descriptions */
+ inumbers_fmt_pf formatter)
{
- xfs_buf_t *agbp;
- xfs_agino_t agino;
- xfs_agnumber_t agno;
- int bcount;
- xfs_inogrp_t *buffer;
- int bufidx;
- xfs_btree_cur_t *cur;
- int error;
- xfs_inobt_rec_incore_t r;
- int i;
- xfs_ino_t ino;
- int left;
- int tmp;
-
- ino = (xfs_ino_t)*lastino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
- left = *count;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino);
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino);
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_inogrp *buffer;
+ int bcount;
+ int left = *count;
+ int bufidx = 0;
+ int error = 0;
+
*count = 0;
+ if (agno >= mp->m_sb.sb_agcount ||
+ *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
+ return error;
+
bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
- error = bufidx = 0;
- cur = NULL;
- agbp = NULL;
- while (left > 0 && agno < mp->m_sb.sb_agcount) {
- if (agbp == NULL) {
+ do {
+ struct xfs_inobt_rec_incore r;
+ int stat;
+
+ if (!agbp) {
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * If we can't read the AGI of this ag,
- * then just skip to the next one.
- */
- ASSERT(cur == NULL);
- agbp = NULL;
- agno++;
- agino = 0;
- continue;
- }
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+ if (error)
+ break;
+
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
+ XFS_BTNUM_INO);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
- &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * Move up the last inode in the current
- * chunk. The lookup_ge will always get
- * us the first inode in the next chunk.
- */
- agino += XFS_INODES_PER_CHUNK - 1;
- continue;
- }
- }
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
- xfs_buf_relse(agbp);
- agbp = NULL;
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- cur = NULL;
- agno++;
- agino = 0;
- continue;
+ &stat);
+ if (error)
+ break;
+ if (!stat)
+ goto next_ag;
}
+
+ error = xfs_inobt_get_rec(cur, &r, &stat);
+ if (error)
+ break;
+ if (!stat)
+ goto next_ag;
+
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
buffer[bufidx].xi_startino =
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
buffer[bufidx].xi_alloccount =
XFS_INODES_PER_CHUNK - r.ir_freecount;
buffer[bufidx].xi_allocmask = ~r.ir_free;
- bufidx++;
- left--;
- if (bufidx == bcount) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written)) {
- error = XFS_ERROR(EFAULT);
+ if (++bufidx == bcount) {
+ long written;
+
+ error = formatter(ubuffer, buffer, bufidx, &written);
+ if (error)
break;
- }
ubuffer += written;
*count += bufidx;
bufidx = 0;
}
- if (left) {
- error = xfs_btree_increment(cur, 0, &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * The agino value has already been bumped.
- * Just try to skip up to it.
- */
- agino += XFS_INODES_PER_CHUNK;
- continue;
- }
- }
- }
+ if (!--left)
+ break;
+
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error)
+ break;
+ if (stat)
+ continue;
+
+next_ag:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ cur = NULL;
+ xfs_buf_relse(agbp);
+ agbp = NULL;
+ agino = 0;
+ } while (++agno < mp->m_sb.sb_agcount);
+
if (!error) {
if (bufidx) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written))
- error = XFS_ERROR(EFAULT);
- else
+ long written;
+
+ error = formatter(ubuffer, buffer, bufidx, &written);
+ if (!error)
*count += bufidx;
}
*lastino = XFS_AGINO_TO_INO(mp, agno, agino);
}
+
kmem_free(buffer);
if (cur)
xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
XFS_BTREE_NOERROR));
if (agbp)
xfs_buf_relse(agbp);
+
return error;
}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 97295d91d170..aaed08022eb9 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,6 +30,22 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
int *ubused,
int *stat);
+struct xfs_bulkstat_agichunk {
+ xfs_ino_t ac_lastino; /* last inode returned */
+ char __user **ac_ubuffer;/* pointer into user's buffer */
+ int ac_ubleft; /* bytes left in user's buffer */
+ int ac_ubelem; /* spaces used in user's buffer */
+};
+
+int
+xfs_bulkstat_ag_ichunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irbp,
+ bulkstat_one_pf formatter,
+ size_t statstruct_size,
+ struct xfs_bulkstat_agichunk *acp);
+
/*
* Values for stat return value.
*/
@@ -50,13 +66,6 @@ xfs_bulkstat(
char __user *ubuffer,/* buffer with inode stats */
int *done); /* 1 if there are more stats to get */
-int
-xfs_bulkstat_single(
- xfs_mount_t *mp,
- xfs_ino_t *lastinop,
- char __user *buffer,
- int *done);
-
typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
void __user *ubuffer, /* buffer to write to */
int ubsize, /* remaining user buffer sz */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9bb590acc0e..d10dc8f397c9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -21,18 +21,6 @@
#include <linux/types.h>
/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS 1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS 0
-# define XFS_BIG_INUMS 0
-#endif
-
-/*
* Kernel specific type declarations for XFS
*/
typedef signed char __int8_t;
@@ -113,12 +101,13 @@ typedef __uint64_t __psunsigned_t;
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include "xfs_vnode.h"
+#include "xfs_fs.h"
#include "xfs_stats.h"
#include "xfs_sysctl.h"
#include "xfs_iops.h"
#include "xfs_aops.h"
#include "xfs_super.h"
+#include "xfs_cksum.h"
#include "xfs_buf.h"
#include "xfs_message.h"
@@ -178,6 +167,7 @@ typedef __uint64_t __psunsigned_t;
#define ENOATTR ENODATA /* Attribute not found */
#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
@@ -189,6 +179,17 @@ typedef __uint64_t __psunsigned_t;
#define MAX(a,b) (max(a,b))
#define howmany(x, y) (((x)+((y)-1))/(y))
+/*
+ * XFS wrapper structure for sysfs support. It depends on external data
+ * structures and is embedded in various internal data structures to implement
+ * the XFS sysfs object heirarchy. Define it here for broad access throughout
+ * the codebase.
+ */
+struct xfs_kobj {
+ struct kobject kobject;
+ struct completion complete;
+};
+
/* Kernel uid/gid conversion. These are used to convert to/from the on disk
* uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
* The conversion here is type only, the value will remain the same since we
@@ -329,7 +330,7 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
{
x += y - 1;
do_div(x, y);
- return(x * y);
+ return x * y;
}
static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00e399d..ca4fd5bd8522 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -34,6 +34,7 @@
#include "xfs_trace.h"
#include "xfs_fsops.h"
#include "xfs_cksum.h"
+#include "xfs_sysfs.h"
kmem_zone_t *xfs_log_ticket_zone;
@@ -283,7 +284,7 @@ xlog_grant_head_wait(
return 0;
shutdown:
list_del_init(&tic->t_queue);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
@@ -377,7 +378,7 @@ xfs_log_regrant(
int error = 0;
if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_try_logspace);
@@ -446,7 +447,7 @@ xfs_log_reserve(
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_try_logspace);
@@ -454,7 +455,7 @@ xfs_log_reserve(
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
KM_SLEEP | KM_MAYFAIL);
if (!tic)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
tic->t_trans_type = t_type;
*ticp = tic;
@@ -590,7 +591,7 @@ xfs_log_release_iclog(
{
if (xlog_state_release_iclog(mp->m_log, iclog)) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- return EIO;
+ return -EIO;
}
return 0;
@@ -616,17 +617,19 @@ xfs_log_mount(
int error = 0;
int min_logfsbs;
- if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
- xfs_notice(mp, "Mounting Filesystem");
- else {
+ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
+ xfs_notice(mp, "Mounting V%d Filesystem",
+ XFS_SB_VERSION_NUM(&mp->m_sb));
+ } else {
xfs_notice(mp,
-"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent.");
+"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
+ XFS_SB_VERSION_NUM(&mp->m_sb));
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
}
mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
if (IS_ERR(mp->m_log)) {
- error = -PTR_ERR(mp->m_log);
+ error = PTR_ERR(mp->m_log);
goto out;
}
@@ -650,18 +653,18 @@ xfs_log_mount(
xfs_warn(mp,
"Log size %d blocks too small, minimum size is %d blocks",
mp->m_sb.sb_logblocks, min_logfsbs);
- error = EINVAL;
+ error = -EINVAL;
} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
xfs_warn(mp,
"Log size %d blocks too large, maximum size is %lld blocks",
mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
- error = EINVAL;
+ error = -EINVAL;
} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
xfs_warn(mp,
"log size %lld bytes too large, maximum size is %lld bytes",
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
XFS_MAX_LOG_BYTES);
- error = EINVAL;
+ error = -EINVAL;
}
if (error) {
if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -705,6 +708,11 @@ xfs_log_mount(
}
}
+ error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
+ "log");
+ if (error)
+ goto out_destroy_ail;
+
/* Normal transactions can now occur */
mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
@@ -945,6 +953,9 @@ xfs_log_unmount(
xfs_log_quiesce(mp);
xfs_trans_ail_destroy(mp);
+
+ xfs_sysfs_del(&mp->m_log->l_kobj);
+
xlog_dealloc_log(mp->m_log);
}
@@ -1163,7 +1174,7 @@ xlog_iodone(xfs_buf_t *bp)
/*
* Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
@@ -1181,11 +1192,14 @@ xlog_iodone(xfs_buf_t *bp)
/* log I/O is always issued ASYNC */
ASSERT(XFS_BUF_ISASYNC(bp));
xlog_state_done_syncing(iclog, aborted);
+
/*
- * do not reference the buffer (bp) here as we could race
- * with it being freed after writing the unmount record to the
- * log.
+ * drop the buffer lock now that we are done. Nothing references
+ * the buffer after this, so an unmount waiting on this lock can now
+ * tear it down safely. As such, it is unsafe to reference the buffer
+ * (bp) after the unlock as we could race with it being freed.
*/
+ xfs_buf_unlock(bp);
}
/*
@@ -1308,7 +1322,7 @@ xlog_alloc_log(
xlog_in_core_t *iclog, *prev_iclog=NULL;
xfs_buf_t *bp;
int i;
- int error = ENOMEM;
+ int error = -ENOMEM;
uint log2_size = 0;
log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
@@ -1335,7 +1349,7 @@ xlog_alloc_log(
xlog_grant_head_init(&log->l_reserve_head);
xlog_grant_head_init(&log->l_write_head);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
log2_size = mp->m_sb.sb_logsectlog;
if (log2_size < BBSHIFT) {
@@ -1364,12 +1378,26 @@ xlog_alloc_log(
xlog_get_iclog_buffer_size(mp, log);
- error = ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
+ /*
+ * Use a NULL block for the extra log buffer used during splits so that
+ * it will trigger errors if we ever try to do IO on it without first
+ * having set it up properly.
+ */
+ error = -ENOMEM;
+ bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
+ BTOBB(log->l_iclog_size), 0);
if (!bp)
goto out_free_log;
- bp->b_iodone = xlog_iodone;
+
+ /*
+ * The iclogbuf buffer locks are held over IO but we are not going to do
+ * IO yet. Hence unlock the buffer so that the log IO path can grab it
+ * when appropriately.
+ */
ASSERT(xfs_buf_islocked(bp));
+ xfs_buf_unlock(bp);
+
+ bp->b_iodone = xlog_iodone;
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1426,9 @@ xlog_alloc_log(
if (!bp)
goto out_free_iclog;
+ ASSERT(xfs_buf_islocked(bp));
+ xfs_buf_unlock(bp);
+
bp->b_iodone = xlog_iodone;
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
@@ -1422,7 +1453,6 @@ xlog_alloc_log(
iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
- ASSERT(xfs_buf_islocked(iclog->ic_bp));
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
@@ -1448,7 +1478,7 @@ out_free_iclog:
out_free_log:
kmem_free(log);
out:
- return ERR_PTR(-error);
+ return ERR_PTR(error);
} /* xlog_alloc_log */
@@ -1631,6 +1661,12 @@ xlog_cksum(
* we transition the iclogs to IOERROR state *after* flushing all existing
* iclogs to disk. This is because we don't want anymore new transactions to be
* started or completed afterwards.
+ *
+ * We lock the iclogbufs here so that we can serialise against IO completion
+ * during unmount. We might be processing a shutdown triggered during unmount,
+ * and that can occur asynchronously to the unmount thread, and hence we need to
+ * ensure that completes before tearing down the iclogbufs. Hence we need to
+ * hold the buffer lock across the log IO to acheive that.
*/
STATIC int
xlog_bdstrat(
@@ -1638,14 +1674,16 @@ xlog_bdstrat(
{
struct xlog_in_core *iclog = bp->b_fspriv;
+ xfs_buf_lock(bp);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
- * doing it here.
+ * doing it here. Similarly, IO completion will unlock the
+ * buffer, so we don't do it here.
*/
return 0;
}
@@ -1847,14 +1885,28 @@ xlog_dealloc_log(
xlog_cil_destroy(log);
/*
- * always need to ensure that the extra buffer does not point to memory
- * owned by another log buffer before we free it.
+ * Cycle all the iclogbuf locks to make sure all log IO completion
+ * is done before we tear down these buffers.
*/
+ iclog = log->l_iclog;
+ for (i = 0; i < log->l_iclog_bufs; i++) {
+ xfs_buf_lock(iclog->ic_bp);
+ xfs_buf_unlock(iclog->ic_bp);
+ iclog = iclog->ic_next;
+ }
+
+ /*
+ * Always need to ensure that the extra buffer does not point to memory
+ * owned by another log buffer before we free it. Also, cycle the lock
+ * first to ensure we've completed IO on it.
+ */
+ xfs_buf_lock(log->l_xbuf);
+ xfs_buf_unlock(log->l_xbuf);
xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
xfs_buf_free(log->l_xbuf);
iclog = log->l_iclog;
- for (i=0; i<log->l_iclog_bufs; i++) {
+ for (i = 0; i < log->l_iclog_bufs; i++) {
xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
kmem_free(iclog);
@@ -2323,7 +2375,7 @@ xlog_write(
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
if (!ophdr)
- return XFS_ERROR(EIO);
+ return -EIO;
xlog_write_adv_cnt(&ptr, &len, &log_offset,
sizeof(struct xlog_op_header));
@@ -2822,7 +2874,7 @@ restart:
spin_lock(&log->l_icloglock);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
iclog = log->l_iclog;
@@ -3010,7 +3062,7 @@ xlog_state_release_iclog(
int sync = 0; /* do we sync? */
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
@@ -3018,7 +3070,7 @@ xlog_state_release_iclog(
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_WANT_SYNC);
@@ -3135,7 +3187,7 @@ _xfs_log_force(
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* If the head iclog is not active nor dirty, we just attach
@@ -3173,7 +3225,7 @@ _xfs_log_force(
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
@@ -3209,7 +3261,7 @@ maybe_sleep:
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
XFS_STATS_INC(xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3219,7 +3271,7 @@ maybe_sleep:
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
} else {
@@ -3287,7 +3339,7 @@ try_again:
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
do {
@@ -3338,7 +3390,7 @@ try_again:
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
spin_lock(&log->l_icloglock);
@@ -3353,7 +3405,7 @@ try_again:
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
XFS_STATS_INC(xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3363,7 +3415,7 @@ try_again:
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
@@ -3915,11 +3967,14 @@ xfs_log_force_umount(
retval = xlog_state_ioerror(log);
spin_unlock(&log->l_icloglock);
}
+
/*
- * Wake up everybody waiting on xfs_log_force.
- * Callback all log item committed functions as if the
- * log writes were completed.
+ * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
+ * as if the log writes were completed. The abort handling in the log
+ * item committed callback functions will do this again under lock to
+ * avoid races.
*/
+ wake_up_all(&log->l_cilp->xc_commit_wait);
xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
#ifdef XFSERRORDEBUG
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index e148719e0a5d..84e0deb95abd 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -24,12 +24,65 @@ struct xfs_log_vec {
struct xfs_log_iovec *lv_iovecp; /* iovec array */
struct xfs_log_item *lv_item; /* owner */
char *lv_buf; /* formatted buffer */
- int lv_buf_len; /* size of formatted buffer */
+ int lv_bytes; /* accounted space in buffer */
+ int lv_buf_len; /* aligned size of buffer */
int lv_size; /* size of allocated lv */
};
#define XFS_LOG_VEC_ORDERED (-1)
+static inline void *
+xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
+ uint type)
+{
+ struct xfs_log_iovec *vec = *vecp;
+
+ if (vec) {
+ ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
+ vec++;
+ } else {
+ vec = &lv->lv_iovecp[0];
+ }
+
+ vec->i_type = type;
+ vec->i_addr = lv->lv_buf + lv->lv_buf_len;
+
+ ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
+
+ *vecp = vec;
+ return vec->i_addr;
+}
+
+/*
+ * We need to make sure the next buffer is naturally aligned for the biggest
+ * basic data type we put into it. We already accounted for this padding when
+ * sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we have to
+ * track the space used by the log vectors separately to prevent log space hangs
+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
+ * CIL context ticket.
+ */
+static inline void
+xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
+{
+ lv->lv_buf_len += round_up(len, sizeof(uint64_t));
+ lv->lv_bytes += len;
+ vec->i_len = len;
+}
+
+static inline void *
+xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
+ uint type, void *data, int len)
+{
+ void *buf;
+
+ buf = xlog_prepare_iovec(lv, vecp, type);
+ memcpy(buf, data, len);
+ xlog_finish_iovec(lv, *vecp, len);
+ return buf;
+}
+
/*
* Structure used to pass callback function and the function's argument
* to the log manager.
@@ -129,7 +182,7 @@ void xlog_iodone(struct xfs_buf *);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
-int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5eb51fc5eb84..f6b79e5325dd 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -78,38 +78,6 @@ xlog_cil_init_post_recovery(
{
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
- log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
- log->l_curr_block);
-}
-
-STATIC int
-xlog_cil_lv_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_vec *lv)
-{
- int index;
- char *ptr;
-
- /* format new vectors into array */
- lip->li_ops->iop_format(lip, lv->lv_iovecp);
-
- /* copy data into existing array */
- ptr = lv->lv_buf;
- for (index = 0; index < lv->lv_niovecs; index++) {
- struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
-
- memcpy(ptr, vec->i_addr, vec->i_len);
- vec->i_addr = ptr;
- ptr += vec->i_len;
- }
-
- /*
- * some size calculations for log vectors over-estimate, so the caller
- * doesn't know the amount of space actually used by the item. Return
- * the byte count to the caller so they can check and store it
- * appropriately.
- */
- return ptr - lv->lv_buf;
}
/*
@@ -127,7 +95,7 @@ xfs_cil_prepare_item(
{
/* Account for the new LV being passed in */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
- *diff_len += lv->lv_buf_len;
+ *diff_len += lv->lv_bytes;
*diff_iovecs += lv->lv_niovecs;
}
@@ -141,7 +109,7 @@ xfs_cil_prepare_item(
else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
- *diff_len -= old_lv->lv_buf_len;
+ *diff_len -= old_lv->lv_bytes;
*diff_iovecs -= old_lv->lv_niovecs;
kmem_free(old_lv);
}
@@ -232,12 +200,28 @@ xlog_cil_insert_format_items(
nbytes = 0;
}
+ /*
+ * We 64-bit align the length of each iovec so that the start
+ * of the next one is naturally aligned. We'll need to
+ * account for that slack space here. Then round nbytes up
+ * to 64-bit alignment so that the initial buffer alignment is
+ * easy to calculate and verify.
+ */
+ nbytes += niovecs * sizeof(uint64_t);
+ nbytes = round_up(nbytes, sizeof(uint64_t));
+
/* grab the old item if it exists for reservation accounting */
old_lv = lip->li_lv;
- /* calc buffer size */
- buf_size = sizeof(struct xfs_log_vec) + nbytes +
- niovecs * sizeof(struct xfs_log_iovec);
+ /*
+ * The data buffer needs to start 64-bit aligned, so round up
+ * that space to ensure we can align it appropriately and not
+ * overrun the buffer.
+ */
+ buf_size = nbytes +
+ round_up((sizeof(struct xfs_log_vec) +
+ niovecs * sizeof(struct xfs_log_iovec)),
+ sizeof(uint64_t));
/* compare to existing item size */
if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
@@ -253,35 +237,31 @@ xlog_cil_insert_format_items(
* that the space reservation accounting is correct.
*/
*diff_iovecs -= lv->lv_niovecs;
- *diff_len -= lv->lv_buf_len;
-
- /* Ensure the lv is set up according to ->iop_size */
- lv->lv_niovecs = niovecs;
- lv->lv_buf = (char *)lv + buf_size - nbytes;
-
- lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
- goto insert;
+ *diff_len -= lv->lv_bytes;
+ } else {
+ /* allocate new data chunk */
+ lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+ lv->lv_item = lip;
+ lv->lv_size = buf_size;
+ if (ordered) {
+ /* track as an ordered logvec */
+ ASSERT(lip->li_lv == NULL);
+ lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+ goto insert;
+ }
+ lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
}
- /* allocate new data chunk */
- lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
- lv->lv_item = lip;
- lv->lv_size = buf_size;
+ /* Ensure the lv is set up according to ->iop_size */
lv->lv_niovecs = niovecs;
- if (ordered) {
- /* track as an ordered logvec */
- ASSERT(lip->li_lv == NULL);
- lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
- goto insert;
- }
-
- /* The allocated iovec region lies beyond the log vector. */
- lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
/* The allocated data region lies beyond the iovec region */
+ lv->lv_buf_len = 0;
+ lv->lv_bytes = 0;
lv->lv_buf = (char *)lv + buf_size - nbytes;
+ ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
- lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+ lip->li_ops->iop_format(lip, lv);
insert:
ASSERT(lv->lv_buf_len <= nbytes);
xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
@@ -404,7 +384,15 @@ xlog_cil_committed(
xfs_extent_busy_clear(mp, &ctx->busy_extents,
(mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
+ /*
+ * If we are aborting the commit, wake up anyone waiting on the
+ * committing list. If we don't, then a shutdown we can leave processes
+ * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
+ * will never happen because we aborted it.
+ */
spin_lock(&ctx->cil->xc_push_lock);
+ if (abort)
+ wake_up_all(&ctx->cil->xc_commit_wait);
list_del(&ctx->committing);
spin_unlock(&ctx->cil->xc_push_lock);
@@ -518,13 +506,6 @@ xlog_cil_push(
cil->xc_ctx = new_ctx;
/*
- * mirror the new sequence into the cil structure so that we can do
- * unlocked checks against the current sequence in log forces without
- * risking deferencing a freed context pointer.
- */
- cil->xc_current_sequence = new_ctx->sequence;
-
- /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -542,8 +523,15 @@ xlog_cil_push(
* Hence we need to add this context to the committing context list so
* that higher sequences will wait for us to write out a commit record
* before they do.
+ *
+ * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * structure atomically with the addition of this sequence to the
+ * committing list. This also ensures that we can do unlocked checks
+ * against the current sequence in log forces without risking
+ * deferencing a freed context pointer.
*/
spin_lock(&cil->xc_push_lock);
+ cil->xc_current_sequence = new_ctx->sequence;
list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock);
@@ -583,8 +571,18 @@ restart:
spin_lock(&cil->xc_push_lock);
list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
/*
+ * Avoid getting stuck in this loop because we were woken by the
+ * shutdown, but then went back to sleep once already in the
+ * shutdown state.
+ */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto out_abort_free_ticket;
+ }
+
+ /*
* Higher sequences will wait for this one so skip them.
- * Don't wait for own own sequence, either.
+ * Don't wait for our own sequence, either.
*/
if (new_ctx->sequence >= ctx->sequence)
continue;
@@ -634,7 +632,7 @@ out_abort_free_ticket:
xfs_log_ticket_put(tic);
out_abort:
xlog_cil_committed(ctx, XFS_LI_ABORTED);
- return XFS_ERROR(EIO);
+ return -EIO;
}
static void
@@ -681,8 +679,14 @@ xlog_cil_push_background(
}
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
struct xlog *log,
xfs_lsn_t push_seq)
{
@@ -707,10 +711,8 @@ xlog_cil_push_foreground(
}
cil->xc_push_seq = push_seq;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
spin_unlock(&cil->xc_push_lock);
-
- /* do the push now */
- xlog_cil_push(log);
}
bool
@@ -740,7 +742,7 @@ xlog_cil_empty(
* background commit, returns without it held once background commits are
* allowed again.
*/
-int
+void
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
@@ -786,7 +788,6 @@ xfs_log_commit_cil(
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock);
- return 0;
}
/*
@@ -815,7 +816,8 @@ xlog_cil_force_lsn(
* xlog_cil_push() handles racing pushes for the same sequence,
* so no need to deal with it here.
*/
- xlog_cil_push_foreground(log, sequence);
+restart:
+ xlog_cil_push_now(log, sequence);
/*
* See if we can find a previous sequence still committing.
@@ -823,9 +825,15 @@ xlog_cil_force_lsn(
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
-restart:
spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
+ /*
+ * Avoid getting stuck in this loop because we were woken by the
+ * shutdown, but then went back to sleep once already in the
+ * shutdown state.
+ */
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto out_shutdown;
if (ctx->sequence > sequence)
continue;
if (!ctx->commit_lsn) {
@@ -841,8 +849,39 @@ restart:
/* found it! */
commit_lsn = ctx->commit_lsn;
}
+
+ /*
+ * The call to xlog_cil_push_now() executes the push in the background.
+ * Hence by the time we have got here it our sequence may not have been
+ * pushed yet. This is true if the current sequence still matches the
+ * push sequence after the above wait loop and the CIL still contains
+ * dirty objects.
+ *
+ * When the push occurs, it will empty the CIL and atomically increment
+ * the currect sequence past the push sequence and move it into the
+ * committing list. Of course, if the CIL is clean at the time of the
+ * push, it won't have pushed the CIL at all, so in that case we should
+ * try the push for this sequence again from the start just in case.
+ */
+ if (sequence == cil->xc_current_sequence &&
+ !list_empty(&cil->xc_cil)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto restart;
+ }
+
spin_unlock(&cil->xc_push_lock);
return commit_lsn;
+
+ /*
+ * We detected a shutdown in progress. We need to trigger the log force
+ * to pass through it's iclog state machine error handling, even though
+ * we are already in a shutdown state. Hence we can't return
+ * NULLCOMMITLSN here as that has special meaning to log forces (i.e.
+ * LSN is already stable), so we return a zero LSN instead.
+ */
+out_shutdown:
+ spin_unlock(&cil->xc_push_lock);
+ return 0;
}
/*
@@ -887,12 +926,12 @@ xlog_cil_init(
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
if (!cil)
- return ENOMEM;
+ return -ENOMEM;
ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
if (!ctx) {
kmem_free(cil);
- return ENOMEM;
+ return -ENOMEM;
}
INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9bc403a9e54f..db7cbdeb2b42 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -405,6 +405,8 @@ struct xlog {
struct xlog_grant_head l_reserve_head;
struct xlog_grant_head l_write_head;
+ struct xfs_kobj l_kobj;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..1fd5787add99 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,7 +179,7 @@ xlog_bread_noalign(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -193,7 +193,10 @@ xlog_bread_noalign(
bp->b_io_length = nbblks;
bp->b_error = 0;
- xfsbdstrat(log->l_mp, bp);
+ if (XFS_FORCED_SHUTDOWN(log->l_mp))
+ return -EIO;
+
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error)
xfs_buf_ioerror_alert(bp, __func__);
@@ -265,7 +268,7 @@ xlog_bwrite(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -327,14 +330,14 @@ xlog_header_check_recover(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(1)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp,
"dirty log entry has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(2)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -361,7 +364,7 @@ xlog_header_check_mount(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_mount",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -459,7 +462,7 @@ xlog_find_verify_cycle(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < log->l_sectBBsize)
- return ENOMEM;
+ return -ENOMEM;
}
for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
@@ -521,7 +524,7 @@ xlog_find_verify_log_record(
if (!(bp = xlog_get_bp(log, num_blks))) {
if (!(bp = xlog_get_bp(log, 1)))
- return ENOMEM;
+ return -ENOMEM;
smallmem = 1;
} else {
error = xlog_bread(log, start_blk, num_blks, bp, &offset);
@@ -536,7 +539,7 @@ xlog_find_verify_log_record(
xfs_warn(log->l_mp,
"Log inconsistent (didn't find previous header)");
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out;
}
@@ -561,7 +564,7 @@ xlog_find_verify_log_record(
* will be called again for the end of the physical log.
*/
if (i == -1) {
- error = -1;
+ error = 1;
goto out;
}
@@ -625,7 +628,12 @@ xlog_find_head(
int error, log_bbnum = log->l_logBBsize;
/* Is the end of the log device zeroed? */
- if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
+ error = xlog_find_zeroed(log, &first_blk);
+ if (error < 0) {
+ xfs_warn(log->l_mp, "empty log check failed");
+ return error;
+ }
+ if (error == 1) {
*return_head_blk = first_blk;
/* Is the whole lot zeroed? */
@@ -638,15 +646,12 @@ xlog_find_head(
}
return 0;
- } else if (error) {
- xfs_warn(log->l_mp, "empty log check failed");
- return error;
}
first_blk = 0; /* get cycle # of 1st block */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -815,29 +820,29 @@ validate_head:
start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
/* start ptr at last block ptr before head_blk */
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
} else {
start_blk = 0;
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error < 0)
+ goto bp_err;
+ if (error == 1) {
/* We hit the beginning of the log during our search */
start_blk = log_bbnum - (num_scan_bblks - head_blk);
new_blk = log_bbnum;
ASSERT(start_blk <= INT_MAX &&
(xfs_daddr_t) log_bbnum-start_blk >= 0);
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log,
- start_blk, &new_blk,
- (int)head_blk)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk,
+ &new_blk, (int)head_blk);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
if (new_blk != log_bbnum)
head_blk = new_blk;
@@ -908,7 +913,7 @@ xlog_find_tail(
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
if (*head_blk == 0) { /* special case */
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -958,7 +963,7 @@ xlog_find_tail(
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
xlog_put_bp(bp);
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* find blk_no of tail of log */
@@ -1089,8 +1094,8 @@ done:
*
* Return:
* 0 => the log is completely written to
- * -1 => use *blk_no as the first block of the log
- * >0 => error has occurred
+ * 1 => use *blk_no as the first block of the log
+ * <0 => error has occurred
*/
STATIC int
xlog_find_zeroed(
@@ -1109,7 +1114,7 @@ xlog_find_zeroed(
/* check totally zeroed log */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
goto bp_err;
@@ -1118,7 +1123,7 @@ xlog_find_zeroed(
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
xlog_put_bp(bp);
- return -1;
+ return 1;
}
/* check partially zeroed log */
@@ -1138,7 +1143,7 @@ xlog_find_zeroed(
*/
xfs_warn(log->l_mp,
"Log inconsistent or not a log (last==0, first!=1)");
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto bp_err;
}
@@ -1176,19 +1181,18 @@ xlog_find_zeroed(
* Potentially backup over partial log record write. We don't need
* to search the end of the log because we know it is zero.
*/
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &last_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
- goto bp_err;
+ error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
+ goto bp_err;
*blk_no = last_blk;
bp_err:
xlog_put_bp(bp);
if (error)
return error;
- return -1;
+ return 1;
}
/*
@@ -1248,7 +1252,7 @@ xlog_write_log_records(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < sectbb)
- return ENOMEM;
+ return -ENOMEM;
}
/* We may need to do a read at the start to fill in part of
@@ -1351,7 +1355,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block + (log->l_logBBsize - head_block);
} else {
@@ -1363,7 +1367,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block - head_block;
}
@@ -1548,7 +1552,7 @@ xlog_recover_add_to_trans(
xfs_warn(log->l_mp, "%s: bad header magic number",
__func__);
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
if (len == sizeof(xfs_trans_header_t))
xlog_recover_add_item(&trans->r_itemq);
@@ -1578,7 +1582,7 @@ xlog_recover_add_to_trans(
in_f->ilf_size);
ASSERT(0);
kmem_free(ptr);
- return XFS_ERROR(EIO);
+ return -EIO;
}
item->ri_total = in_f->ilf_size;
@@ -1651,6 +1655,7 @@ xlog_recover_reorder_trans(
int pass)
{
xlog_recover_item_t *item, *n;
+ int error = 0;
LIST_HEAD(sort_list);
LIST_HEAD(cancel_list);
LIST_HEAD(buffer_list);
@@ -1692,9 +1697,17 @@ xlog_recover_reorder_trans(
"%s: unrecognized type of log operation",
__func__);
ASSERT(0);
- return XFS_ERROR(EIO);
+ /*
+ * return the remaining items back to the transaction
+ * item list so they can be freed in caller.
+ */
+ if (!list_empty(&sort_list))
+ list_splice_init(&sort_list, &trans->r_itemq);
+ error = -EIO;
+ goto out;
}
}
+out:
ASSERT(list_empty(&sort_list));
if (!list_empty(&buffer_list))
list_splice(&buffer_list, &trans->r_itemq);
@@ -1704,7 +1717,7 @@ xlog_recover_reorder_trans(
list_splice_tail(&inode_buffer_list, &trans->r_itemq);
if (!list_empty(&cancel_list))
list_splice_tail(&cancel_list, &trans->r_itemq);
- return 0;
+ return error;
}
/*
@@ -1931,7 +1944,7 @@ xlog_recover_do_inode_buffer(
item, bp);
XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
@@ -2113,6 +2126,17 @@ xlog_recover_validate_buf_type(
__uint16_t magic16;
__uint16_t magicda;
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
magicda = be16_to_cpu(info->magic);
@@ -2126,7 +2150,9 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_allocbt_buf_ops;
break;
case XFS_IBT_CRC_MAGIC:
+ case XFS_FIBT_CRC_MAGIC:
case XFS_IBT_MAGIC:
+ case XFS_FIBT_MAGIC:
bp->b_ops = &xfs_inobt_buf_ops;
break;
case XFS_BMAP_CRC_MAGIC:
@@ -2148,8 +2174,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_agf_buf_ops;
break;
case XFS_BLFT_AGFL_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_AGFL_MAGIC) {
xfs_warn(mp, "Bad AGFL block magic!");
ASSERT(0);
@@ -2182,10 +2206,6 @@ xlog_recover_validate_buf_type(
#endif
break;
case XFS_BLFT_DINO_BUF:
- /*
- * we get here with inode allocation buffers, not buffers that
- * track unlinked list changes.
- */
if (magic16 != XFS_DINODE_MAGIC) {
xfs_warn(mp, "Bad INODE block magic!");
ASSERT(0);
@@ -2265,8 +2285,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_attr3_leaf_buf_ops;
break;
case XFS_BLFT_ATTR_RMT_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
xfs_warn(mp, "Bad attr remote magic!");
ASSERT(0);
@@ -2373,16 +2391,7 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- /*
- * We can only do post recovery validation on items on CRC enabled
- * fielsystems as we need to know when the buffer was written to be able
- * to determine if we should have replayed the item. If we replay old
- * metadata over a newer buffer, then it will enter a temporarily
- * inconsistent state resulting in verification failures. Hence for now
- * just avoid the verification stage for non-crc filesystems
- */
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2390,8 +2399,11 @@ xlog_recover_do_reg_buffer(
* Simple algorithm: if we have found a QUOTAOFF log item of the same type
* (ie. USR or GRP), then just toss this buffer away; don't recover it.
* Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
*/
-STATIC void
+STATIC bool
xlog_recover_do_dquot_buffer(
struct xfs_mount *mp,
struct xlog *log,
@@ -2406,9 +2418,8 @@ xlog_recover_do_dquot_buffer(
/*
* Filesystems are required to send in quota flags at mount time.
*/
- if (mp->m_qflags == 0) {
- return;
- }
+ if (!mp->m_qflags)
+ return false;
type = 0;
if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
@@ -2421,9 +2432,10 @@ xlog_recover_do_dquot_buffer(
* This type of quotas was turned off, so ignore this buffer
*/
if (log->l_quotaoffs_flag & type)
- return;
+ return false;
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ return true;
}
/*
@@ -2482,7 +2494,7 @@ xlog_recover_buffer_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
buf_flags, NULL);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
@@ -2490,23 +2502,44 @@ xlog_recover_buffer_pass2(
}
/*
- * recover the buffer only if we get an LSN from it and it's less than
+ * Recover the buffer only if we get an LSN from it and it's less than
* the lsn of the transaction we are replaying.
+ *
+ * Note that we have to be extremely careful of readahead here.
+ * Readahead does not attach verfiers to the buffers so if we don't
+ * actually do any replay after readahead because of the LSN we found
+ * in the buffer if more recent than that current transaction then we
+ * need to attach the verifier directly. Failure to do so can lead to
+ * future recovery actions (e.g. EFI and unlinked list recovery) can
+ * operate on the buffers and they won't get the verifier attached. This
+ * can lead to blocks on disk having the correct content but a stale
+ * CRC.
+ *
+ * It is safe to assume these clean buffers are currently up to date.
+ * If the buffer is dirtied by a later transaction being replayed, then
+ * the verifier will be reset to match whatever recover turns that
+ * buffer into.
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
goto out_release;
+ }
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+ if (error)
+ goto out_release;
} else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ bool dirty;
+
+ dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ if (!dirty)
+ goto out_release;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
- if (error)
- goto out_release;
/*
* Perform delayed write on the buffer. Asynchronous writes will be
@@ -2514,19 +2547,19 @@ xlog_recover_buffer_pass2(
*
* Also make sure that only inode buffers with good sizes stay in
* the buffer cache. The kernel moves inodes in buffers of 1 block
- * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode
+ * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode
* buffers in the log can be a different size if the log was generated
* by an older kernel using unclustered inode buffers or a newer kernel
* running with a different inode cluster size. Regardless, if the
- * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE)
- * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep
+ * the inode buffer size isn't MAX(blocksize, mp->m_inode_cluster_size)
+ * for *our* value of mp->m_inode_cluster_size, then we need to keep
* the buffer out of the buffer cache so that the buffer won't
* overlap with future reads of those inodes.
*/
if (XFS_DINODE_MAGIC ==
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
(BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
- (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
+ (__uint32_t)log->l_mp->m_inode_cluster_size))) {
xfs_buf_stale(bp);
error = xfs_bwrite(bp);
} else {
@@ -2584,7 +2617,7 @@ xfs_recover_inode_owner_change(
ip = xfs_inode_alloc(mp, in_f->ilf_ino);
if (!ip)
- return ENOMEM;
+ return -ENOMEM;
/* instantiate the inode */
xfs_dinode_from_disk(&ip->i_d, dip);
@@ -2662,7 +2695,7 @@ xlog_recover_inode_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
&xfs_inode_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error;
}
error = bp->b_error;
@@ -2683,7 +2716,7 @@ xlog_recover_inode_pass2(
__func__, dip, bp, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
dicp = item->ri_buf[1].i_addr;
@@ -2693,7 +2726,7 @@ xlog_recover_inode_pass2(
__func__, item, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2750,7 +2783,7 @@ xlog_recover_inode_pass2(
"%s: Bad regular inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
} else if (unlikely(S_ISDIR(dicp->di_mode))) {
@@ -2763,7 +2796,7 @@ xlog_recover_inode_pass2(
"%s: Bad dir inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
}
@@ -2776,7 +2809,7 @@ xlog_recover_inode_pass2(
__func__, item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
@@ -2786,7 +2819,7 @@ xlog_recover_inode_pass2(
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
isize = xfs_icdinode_size(dicp->di_version);
@@ -2796,7 +2829,7 @@ xlog_recover_inode_pass2(
xfs_alert(mp,
"%s: Bad inode log record length %d, rec ptr 0x%p",
__func__, item->ri_buf[1].i_len, item);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2884,7 +2917,7 @@ xlog_recover_inode_pass2(
default:
xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
ASSERT(0);
- error = EIO;
+ error = -EIO;
goto out_release;
}
}
@@ -2905,7 +2938,7 @@ out_release:
error:
if (need_free)
kmem_free(in_f);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -2932,7 +2965,7 @@ xlog_recover_quotaoff_pass1(
if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
log->l_quotaoffs_flag |= XFS_DQ_GROUP;
- return (0);
+ return 0;
}
/*
@@ -2957,17 +2990,17 @@ xlog_recover_dquot_pass2(
* Filesystems are required to send in quota flags at mount time.
*/
if (mp->m_qflags == 0)
- return (0);
+ return 0;
recddq = item->ri_buf[1].i_addr;
if (recddq == NULL) {
xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
item->ri_buf[1].i_len, __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
@@ -2976,7 +3009,7 @@ xlog_recover_dquot_pass2(
type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
ASSERT(type);
if (log->l_quotaoffs_flag & type)
- return (0);
+ return 0;
/*
* At this point we know that quota was _not_ turned off.
@@ -2993,12 +3026,19 @@ xlog_recover_dquot_pass2(
error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
"xlog_recover_dquot_pass2 (log copy)");
if (error)
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(dq_f->qlf_len == 1);
+ /*
+ * At this point we are assuming that the dquots have been allocated
+ * and hence the buffer has valid dquots stamped in it. It should,
+ * therefore, pass verifier validation. If the dquot is bad, then the
+ * we'll return an error here, so we don't need to specifically check
+ * the dquot in the buffer after the verifier has run.
+ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
- NULL);
+ &xfs_dquot_buf_ops);
if (error)
return error;
@@ -3006,18 +3046,6 @@ xlog_recover_dquot_pass2(
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
- * At least the magic num portion should be on disk because this
- * was among a chunk of dquots created earlier, and we did some
- * minimal initialization then.
- */
- error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_dquot_pass2");
- if (error) {
- xfs_buf_relse(bp);
- return XFS_ERROR(EIO);
- }
-
- /*
* If the dquot has an LSN in it, recover the dquot only if it's less
* than the lsn of the transaction we are replaying.
*/
@@ -3133,7 +3161,7 @@ xlog_recover_efd_pass2(
}
lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
- xfs_trans_ail_cursor_done(ailp, &cur);
+ xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
return 0;
@@ -3164,47 +3192,47 @@ xlog_recover_do_icreate_pass2(
icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
if (icl->icl_type != XFS_LI_ICREATE) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
- return EINVAL;
+ return -EINVAL;
}
if (icl->icl_size != 1) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
- return EINVAL;
+ return -EINVAL;
}
agno = be32_to_cpu(icl->icl_ag);
if (agno >= mp->m_sb.sb_agcount) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
- return EINVAL;
+ return -EINVAL;
}
agbno = be32_to_cpu(icl->icl_agbno);
if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
- return EINVAL;
+ return -EINVAL;
}
isize = be32_to_cpu(icl->icl_isize);
if (isize != mp->m_sb.sb_inodesize) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
- return EINVAL;
+ return -EINVAL;
}
count = be32_to_cpu(icl->icl_count);
if (!count) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
- return EINVAL;
+ return -EINVAL;
}
length = be32_to_cpu(icl->icl_length);
if (!length || length >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
- return EINVAL;
+ return -EINVAL;
}
/* existing allocation is fixed value */
- ASSERT(count == XFS_IALLOC_INODES(mp));
- ASSERT(length == XFS_IALLOC_BLOCKS(mp));
- if (count != XFS_IALLOC_INODES(mp) ||
- length != XFS_IALLOC_BLOCKS(mp)) {
+ ASSERT(count == mp->m_ialloc_inos);
+ ASSERT(length == mp->m_ialloc_blks);
+ if (count != mp->m_ialloc_inos ||
+ length != mp->m_ialloc_blks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
- return EINVAL;
+ return -EINVAL;
}
/*
@@ -3375,7 +3403,7 @@ xlog_recover_commit_pass1(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3411,7 +3439,7 @@ xlog_recover_commit_pass2(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3508,8 +3536,7 @@ out:
STATIC int
xlog_recover_unmount_trans(
- struct xlog *log,
- struct xlog_recover *trans)
+ struct xlog *log)
{
/* Do nothing now */
xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
@@ -3547,7 +3574,7 @@ xlog_recover_process_data(
/* check the log format matches our own - else we can't recover */
if (xlog_header_check_recover(log->l_mp, rhead))
- return (XFS_ERROR(EIO));
+ return -EIO;
while ((dp < lp) && num_logops) {
ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
@@ -3558,7 +3585,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
__func__, ohead->oh_clientid);
ASSERT(0);
- return (XFS_ERROR(EIO));
+ return -EIO;
}
tid = be32_to_cpu(ohead->oh_tid);
hash = XLOG_RHASH(tid);
@@ -3572,7 +3599,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad length 0x%x",
__func__, be32_to_cpu(ohead->oh_len));
WARN_ON(1);
- return (XFS_ERROR(EIO));
+ return -EIO;
}
flags = ohead->oh_flags & ~XLOG_END_TRANS;
if (flags & XLOG_WAS_CONT_TRANS)
@@ -3583,7 +3610,7 @@ xlog_recover_process_data(
trans, pass);
break;
case XLOG_UNMOUNT_TRANS:
- error = xlog_recover_unmount_trans(log, trans);
+ error = xlog_recover_unmount_trans(log);
break;
case XLOG_WAS_CONT_TRANS:
error = xlog_recover_add_to_cont_trans(log,
@@ -3594,7 +3621,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad transaction",
__func__);
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
case 0:
case XLOG_CONTINUE_TRANS:
@@ -3605,11 +3632,13 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad flag 0x%x",
__func__, flags);
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
- if (error)
+ if (error) {
+ xlog_recover_free_trans(trans);
return error;
+ }
}
dp += be32_to_cpu(ohead->oh_len);
num_logops--;
@@ -3654,7 +3683,7 @@ xlog_recover_process_efi(
*/
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
xfs_efi_release(efip, efip->efi_format.efi_nextents);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3743,7 +3772,7 @@ xlog_recover_process_efis(
lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
out:
- xfs_trans_ail_cursor_done(ailp, &cur);
+ xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
return error;
}
@@ -3954,7 +3983,7 @@ xlog_unpack_data_crc(
* CRC protection by punting an error back up the stack.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
return 0;
@@ -4003,14 +4032,14 @@ xlog_valid_rec_header(
if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(
(!rhead->h_version ||
(be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
__func__, be32_to_cpu(rhead->h_version));
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* LR body must have data or it wouldn't have been written */
@@ -4018,12 +4047,12 @@ xlog_valid_rec_header(
if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -4066,7 +4095,7 @@ xlog_do_recovery_pass(
*/
hbp = xlog_get_bp(log, 1);
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, tail_blk, 1, hbp, &offset);
if (error)
@@ -4095,11 +4124,11 @@ xlog_do_recovery_pass(
}
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
dbp = xlog_get_bp(log, BTOBB(h_size));
if (!dbp) {
xlog_put_bp(hbp);
- return ENOMEM;
+ return -ENOMEM;
}
memset(rhash, 0, sizeof(rhash));
@@ -4373,7 +4402,7 @@ xlog_do_recover(
* If IO errors happened during recovery, bail out.
*/
if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
- return (EIO);
+ return -EIO;
}
/*
@@ -4397,7 +4426,13 @@ xlog_do_recover(
XFS_BUF_READ(bp);
XFS_BUF_UNASYNC(bp);
bp->b_ops = &xfs_sb_buf_ops;
- xfsbdstrat(log->l_mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ xfs_buf_relse(bp);
+ return -EIO;
+ }
+
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
@@ -4471,7 +4506,7 @@ xlog_recover(
"Please recover the log on a kernel that supports the unknown features.",
(log->l_mp->m_sb.sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
- return EINVAL;
+ return -EINVAL;
}
xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 02df7b408a26..fbf0384a466f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -42,6 +42,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_dinode.h"
+#include "xfs_sysfs.h"
#ifdef HAVE_PERCPU_SB
@@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
+extern struct kset *xfs_kset;
+
/*
* See if the UUID is unique among mounted XFS filesystems.
* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -76,7 +79,7 @@ xfs_uuid_mount(
if (uuid_is_nil(uuid)) {
xfs_warn(mp, "Filesystem has nil UUID - can't mount");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mutex_lock(&xfs_uuid_table_mutex);
@@ -104,7 +107,7 @@ xfs_uuid_mount(
out_duplicate:
mutex_unlock(&xfs_uuid_table_mutex);
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
STATIC void
@@ -173,13 +176,9 @@ xfs_sb_validate_fsb_count(
ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
ASSERT(sbp->sb_blocklog >= BBSHIFT);
-#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
+ /* Limited by ULONG_MAX of page cache index */
if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
- return EFBIG;
-#else /* Limited by UINT_MAX of sectors */
- if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
- return EFBIG;
-#endif
+ return -EFBIG;
return 0;
}
@@ -250,9 +249,9 @@ xfs_initialize_perag(
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
if (mp->m_flags & XFS_MOUNT_32BITINODES)
- index = xfs_set_inode32(mp);
+ index = xfs_set_inode32(mp, agcount);
else
- index = xfs_set_inode64(mp);
+ index = xfs_set_inode64(mp, agcount);
if (maxagi)
*maxagi = index;
@@ -282,39 +281,59 @@ xfs_readsb(
struct xfs_sb *sbp = &mp->m_sb;
int error;
int loud = !(flags & XFS_MFSI_QUIET);
+ const struct xfs_buf_ops *buf_ops;
ASSERT(mp->m_sb_bp == NULL);
ASSERT(mp->m_ddev_targp != NULL);
/*
+ * For the initial read, we must guess at the sector
+ * size based on the block device. It's enough to
+ * get the sb_sectsize out of the superblock and
+ * then reread with the proper length.
+ * We don't verify it yet, because it may not be complete.
+ */
+ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
+ buf_ops = NULL;
+
+ /*
* Allocate a (locked) buffer to hold the superblock.
* This will be kept around at all times to optimize
* access to the superblock.
*/
- sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-
reread:
bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
- BTOBB(sector_size), 0,
- loud ? &xfs_sb_buf_ops
- : &xfs_sb_quiet_buf_ops);
+ BTOBB(sector_size), 0, buf_ops);
if (!bp) {
if (loud)
xfs_warn(mp, "SB buffer read failed");
- return EIO;
+ return -EIO;
}
if (bp->b_error) {
error = bp->b_error;
if (loud)
xfs_warn(mp, "SB validate failed with error %d.", error);
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
goto release_buf;
}
/*
* Initialize the mount structure from the superblock.
*/
- xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
- xfs_sb_quota_from_disk(&mp->m_sb);
+ xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
+
+ /*
+ * If we haven't validated the superblock, do so now before we try
+ * to check the sector size and reread the superblock appropriately.
+ */
+ if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+ if (loud)
+ xfs_warn(mp, "Invalid superblock magic number");
+ error = -EINVAL;
+ goto release_buf;
+ }
/*
* We must be able to do sector-sized and sector-aligned IO.
@@ -323,17 +342,18 @@ reread:
if (loud)
xfs_warn(mp, "device supports %u byte sectors (not %u)",
sector_size, sbp->sb_sectsize);
- error = ENOSYS;
+ error = -ENOSYS;
goto release_buf;
}
- /*
- * If device sector size is smaller than the superblock size,
- * re-read the superblock so the buffer is correctly sized.
- */
- if (sector_size < sbp->sb_sectsize) {
+ if (buf_ops == NULL) {
+ /*
+ * Re-read the superblock so the buffer is correctly sized,
+ * and properly verified.
+ */
xfs_buf_relse(bp);
sector_size = sbp->sb_sectsize;
+ buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
goto reread;
}
@@ -370,7 +390,7 @@ xfs_update_alignment(xfs_mount_t *mp)
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)",
sbp->sb_blocksize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else {
/*
* Convert the stripe unit and width to FSBs.
@@ -380,14 +400,14 @@ xfs_update_alignment(xfs_mount_t *mp)
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_agblocks);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, sbp->sb_blocksize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -407,7 +427,7 @@ xfs_update_alignment(xfs_mount_t *mp)
} else {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) {
@@ -534,14 +554,14 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
xfs_warn(mp, "filesystem size mismatch detected");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL);
if (!bp) {
xfs_warn(mp, "last sector read failed");
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
@@ -549,14 +569,14 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
xfs_warn(mp, "log size mismatch detected");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL);
if (!bp) {
xfs_warn(mp, "log device read failed");
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
}
@@ -686,6 +706,12 @@ xfs_mountfs(
mp->m_update_flags |= XFS_SB_VERSIONNUM;
}
+ /* always use v2 inodes by default now */
+ if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
+ mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
+ mp->m_update_flags |= XFS_SB_VERSIONNUM;
+ }
+
/*
* Check if sb_agblocks is aligned at stripe boundary
* If sb_agblocks is NOT aligned turn off m_dalign since
@@ -703,10 +729,15 @@ xfs_mountfs(
xfs_set_maxicount(mp);
- error = xfs_uuid_mount(mp);
+ mp->m_kobj.kobject.kset = xfs_kset;
+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
goto out;
+ error = xfs_uuid_mount(mp);
+ if (error)
+ goto out_remove_sysfs;
+
/*
* Set the minimum read and write sizes
*/
@@ -732,8 +763,6 @@ xfs_mountfs(
new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
mp->m_inode_cluster_size = new_size;
- xfs_info(mp, "Using inode cluster size of %d bytes",
- mp->m_inode_cluster_size);
}
/*
@@ -765,12 +794,11 @@ xfs_mountfs(
mp->m_dmevmask = 0; /* not persistent; set after each mount */
- xfs_dir_mount(mp);
-
- /*
- * Initialize the attribute manager's entries.
- */
- mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
+ error = xfs_da_mount(mp);
+ if (error) {
+ xfs_warn(mp, "Failed dir/attr init: %d", error);
+ goto out_remove_uuid;
+ }
/*
* Initialize the precomputed transaction reservations values.
@@ -785,13 +813,13 @@ xfs_mountfs(
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
xfs_warn(mp, "Failed per-ag init: %d", error);
- goto out_remove_uuid;
+ goto out_free_dir;
}
if (!sbp->sb_logblocks) {
xfs_warn(mp, "no log defined");
XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_free_perag;
}
@@ -830,7 +858,7 @@ xfs_mountfs(
!mp->m_sb.sb_inprogress) {
error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
if (error)
- goto out_fail_wait;
+ goto out_log_dealloc;
}
/*
@@ -851,7 +879,7 @@ xfs_mountfs(
xfs_iunlock(rip, XFS_ILOCK_EXCL);
XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_rele_rip;
}
mp->m_rootip = rip; /* save it */
@@ -902,7 +930,7 @@ xfs_mountfs(
xfs_notice(mp, "resetting quota flags");
error = xfs_mount_reset_sbqflags(mp);
if (error)
- return error;
+ goto out_rtunmount;
}
}
@@ -960,8 +988,12 @@ xfs_mountfs(
xfs_wait_buftarg(mp->m_ddev_targp);
out_free_perag:
xfs_free_perag(mp);
+ out_free_dir:
+ xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_remove_sysfs:
+ xfs_sysfs_del(&mp->m_kobj);
out:
return error;
}
@@ -1037,12 +1069,15 @@ xfs_unmountfs(
"Freespace may not be correct on next mount.");
xfs_log_unmount(mp);
+ xfs_da_unmount(mp);
xfs_uuid_unmount(mp);
#if defined(DEBUG)
xfs_errortag_clearall(mp, 0);
#endif
xfs_free_perag(mp);
+
+ xfs_sysfs_del(&mp->m_kobj);
}
int
@@ -1124,7 +1159,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_icount = lcounter;
return 0;
@@ -1133,7 +1168,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_ifree = lcounter;
return 0;
@@ -1163,7 +1198,7 @@ xfs_mod_incore_sb_unlocked(
* blocks if were allowed to.
*/
if (!rsvd)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
lcounter = (long long)mp->m_resblks_avail + delta;
if (lcounter >= 0) {
@@ -1174,7 +1209,7 @@ xfs_mod_incore_sb_unlocked(
"Filesystem \"%s\": reserve blocks depleted! "
"Consider increasing reserve pool size.",
mp->m_fsname);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1183,7 +1218,7 @@ xfs_mod_incore_sb_unlocked(
lcounter = (long long)mp->m_sb.sb_frextents;
lcounter += delta;
if (lcounter < 0) {
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
mp->m_sb.sb_frextents = lcounter;
return 0;
@@ -1192,7 +1227,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_dblocks = lcounter;
return 0;
@@ -1201,7 +1236,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_agcount = scounter;
return 0;
@@ -1210,7 +1245,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_imax_pct = scounter;
return 0;
@@ -1219,7 +1254,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextsize = scounter;
return 0;
@@ -1228,7 +1263,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rbmblocks = scounter;
return 0;
@@ -1237,7 +1272,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rblocks = lcounter;
return 0;
@@ -1246,7 +1281,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextents = lcounter;
return 0;
@@ -1255,13 +1290,13 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextslog = scounter;
return 0;
default:
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -1424,7 +1459,7 @@ xfs_dev_is_read_only(
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
xfs_notice(mp, "%s required on read-only device.", message);
xfs_notice(mp, "write access unavailable, cannot proceed.");
- return EROFS;
+ return -EROFS;
}
return 0;
}
@@ -1967,7 +2002,7 @@ slow_path:
* (e.g. lots of space just got freed). After that
* we are done.
*/
- if (ret != ENOSPC)
+ if (ret != -ENOSPC)
xfs_icsb_balance_counter(mp, field, 0);
xfs_icsb_unlock(mp);
return ret;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a466c5e5826e..b0447c86e7e2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -27,6 +27,7 @@ struct xfs_nameops;
struct xfs_ail;
struct xfs_quotainfo;
struct xfs_dir_ops;
+struct xfs_da_geometry;
#ifdef HAVE_PERCPU_SB
@@ -96,6 +97,8 @@ typedef struct xfs_mount {
uint m_readio_blocks; /* min read size blocks */
uint m_writeio_log; /* min write size log bytes */
uint m_writeio_blocks; /* min write size blocks */
+ struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
+ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
struct xlog *m_log; /* log specific stuff */
int m_logbufs; /* number of log buffers */
int m_logbsize; /* size of each log buffer */
@@ -131,8 +134,6 @@ typedef struct xfs_mount {
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
__uint64_t m_flags; /* global mount flags */
- uint m_dir_node_ents; /* #entries in a dir danode */
- uint m_attr_node_ents; /* #entries in attr danode */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
int m_inoalign_mask;/* mask sb_inoalignmt if used */
@@ -145,17 +146,10 @@ typedef struct xfs_mount {
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
int m_sinoalign; /* stripe unit inode alignment */
- int m_attr_magicpct;/* 37% of the blocksize */
- int m_dir_magicpct; /* 37% of the dir blocksize */
__uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
- int m_dirblksize; /* directory block sz--bytes */
- int m_dirblkfsbs; /* directory block sz--fsbs */
- xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
- xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */
- xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */
uint m_chsize; /* size of next field */
atomic_t m_active_trans; /* number trans frozen */
#ifdef HAVE_PERCPU_SB
@@ -172,6 +166,7 @@ typedef struct xfs_mount {
on the next remount,rw */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
+ struct xfs_kobj m_kobj;
struct workqueue_struct *m_data_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4aff56395732..1eb6f3df698c 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -100,14 +100,20 @@
* likely result in a loop in one of the lists. That's a sure-fire recipe for
* an infinite loop in the code.
*/
-typedef struct xfs_mru_cache_elem
-{
- struct list_head list_node;
- unsigned long key;
- void *value;
-} xfs_mru_cache_elem_t;
+struct xfs_mru_cache {
+ struct radix_tree_root store; /* Core storage data structure. */
+ struct list_head *lists; /* Array of lists, one per grp. */
+ struct list_head reap_list; /* Elements overdue for reaping. */
+ spinlock_t lock; /* Lock to protect this struct. */
+ unsigned int grp_count; /* Number of discrete groups. */
+ unsigned int grp_time; /* Time period spanned by grps. */
+ unsigned int lru_grp; /* Group containing time zero. */
+ unsigned long time_zero; /* Time first element was added. */
+ xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+ struct delayed_work work; /* Workqueue data for reaping. */
+ unsigned int queued; /* work has been queued */
+};
-static kmem_zone_t *xfs_mru_elem_zone;
static struct workqueue_struct *xfs_mru_reap_wq;
/*
@@ -129,12 +135,12 @@ static struct workqueue_struct *xfs_mru_reap_wq;
*/
STATIC unsigned long
_xfs_mru_cache_migrate(
- xfs_mru_cache_t *mru,
- unsigned long now)
+ struct xfs_mru_cache *mru,
+ unsigned long now)
{
- unsigned int grp;
- unsigned int migrated = 0;
- struct list_head *lru_list;
+ unsigned int grp;
+ unsigned int migrated = 0;
+ struct list_head *lru_list;
/* Nothing to do if the data store is empty. */
if (!mru->time_zero)
@@ -193,11 +199,11 @@ _xfs_mru_cache_migrate(
*/
STATIC void
_xfs_mru_cache_list_insert(
- xfs_mru_cache_t *mru,
- xfs_mru_cache_elem_t *elem)
+ struct xfs_mru_cache *mru,
+ struct xfs_mru_cache_elem *elem)
{
- unsigned int grp = 0;
- unsigned long now = jiffies;
+ unsigned int grp = 0;
+ unsigned long now = jiffies;
/*
* If the data store is empty, initialise time zero, leave grp set to
@@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert(
*/
STATIC void
_xfs_mru_cache_clear_reap_list(
- xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock)
-
+ struct xfs_mru_cache *mru)
+ __releases(mru->lock) __acquires(mru->lock)
{
- xfs_mru_cache_elem_t *elem, *next;
+ struct xfs_mru_cache_elem *elem, *next;
struct list_head tmp;
INIT_LIST_HEAD(&tmp);
@@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list(
spin_unlock(&mru->lock);
list_for_each_entry_safe(elem, next, &tmp, list_node) {
-
- /* Remove the element from the reap list. */
list_del_init(&elem->list_node);
-
- /* Call the client's free function with the key and value pointer. */
- mru->free_func(elem->key, elem->value);
-
- /* Free the element structure. */
- kmem_zone_free(xfs_mru_elem_zone, elem);
+ mru->free_func(elem);
}
spin_lock(&mru->lock);
@@ -277,7 +276,8 @@ STATIC void
_xfs_mru_cache_reap(
struct work_struct *work)
{
- xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
+ struct xfs_mru_cache *mru =
+ container_of(work, struct xfs_mru_cache, work.work);
unsigned long now, next;
ASSERT(mru && mru->lists);
@@ -304,28 +304,16 @@ _xfs_mru_cache_reap(
int
xfs_mru_cache_init(void)
{
- xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
- "xfs_mru_cache_elem");
- if (!xfs_mru_elem_zone)
- goto out;
-
xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
if (!xfs_mru_reap_wq)
- goto out_destroy_mru_elem_zone;
-
+ return -ENOMEM;
return 0;
-
- out_destroy_mru_elem_zone:
- kmem_zone_destroy(xfs_mru_elem_zone);
- out:
- return -ENOMEM;
}
void
xfs_mru_cache_uninit(void)
{
destroy_workqueue(xfs_mru_reap_wq);
- kmem_zone_destroy(xfs_mru_elem_zone);
}
/*
@@ -336,33 +324,33 @@ xfs_mru_cache_uninit(void)
*/
int
xfs_mru_cache_create(
- xfs_mru_cache_t **mrup,
+ struct xfs_mru_cache **mrup,
unsigned int lifetime_ms,
unsigned int grp_count,
xfs_mru_cache_free_func_t free_func)
{
- xfs_mru_cache_t *mru = NULL;
- int err = 0, grp;
- unsigned int grp_time;
+ struct xfs_mru_cache *mru = NULL;
+ int err = 0, grp;
+ unsigned int grp_time;
if (mrup)
*mrup = NULL;
if (!mrup || !grp_count || !lifetime_ms || !free_func)
- return EINVAL;
+ return -EINVAL;
if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
- return EINVAL;
+ return -EINVAL;
if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
- return ENOMEM;
+ return -ENOMEM;
/* An extra list is needed to avoid reaping up to a grp_time early. */
mru->grp_count = grp_count + 1;
mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
if (!mru->lists) {
- err = ENOMEM;
+ err = -ENOMEM;
goto exit;
}
@@ -400,7 +388,7 @@ exit:
*/
static void
xfs_mru_cache_flush(
- xfs_mru_cache_t *mru)
+ struct xfs_mru_cache *mru)
{
if (!mru || !mru->lists)
return;
@@ -420,7 +408,7 @@ xfs_mru_cache_flush(
void
xfs_mru_cache_destroy(
- xfs_mru_cache_t *mru)
+ struct xfs_mru_cache *mru)
{
if (!mru || !mru->lists)
return;
@@ -438,38 +426,30 @@ xfs_mru_cache_destroy(
*/
int
xfs_mru_cache_insert(
- xfs_mru_cache_t *mru,
- unsigned long key,
- void *value)
+ struct xfs_mru_cache *mru,
+ unsigned long key,
+ struct xfs_mru_cache_elem *elem)
{
- xfs_mru_cache_elem_t *elem;
+ int error;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
- return EINVAL;
+ return -EINVAL;
- elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
- if (!elem)
- return ENOMEM;
-
- if (radix_tree_preload(GFP_KERNEL)) {
- kmem_zone_free(xfs_mru_elem_zone, elem);
- return ENOMEM;
- }
+ if (radix_tree_preload(GFP_KERNEL))
+ return -ENOMEM;
INIT_LIST_HEAD(&elem->list_node);
elem->key = key;
- elem->value = value;
spin_lock(&mru->lock);
-
- radix_tree_insert(&mru->store, key, elem);
+ error = radix_tree_insert(&mru->store, key, elem);
radix_tree_preload_end();
- _xfs_mru_cache_list_insert(mru, elem);
-
+ if (!error)
+ _xfs_mru_cache_list_insert(mru, elem);
spin_unlock(&mru->lock);
- return 0;
+ return error;
}
/*
@@ -478,13 +458,12 @@ xfs_mru_cache_insert(
* the client data pointer for the removed element is returned, otherwise this
* function will return a NULL pointer.
*/
-void *
+struct xfs_mru_cache_elem *
xfs_mru_cache_remove(
- xfs_mru_cache_t *mru,
- unsigned long key)
+ struct xfs_mru_cache *mru,
+ unsigned long key)
{
- xfs_mru_cache_elem_t *elem;
- void *value = NULL;
+ struct xfs_mru_cache_elem *elem;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
@@ -492,17 +471,11 @@ xfs_mru_cache_remove(
spin_lock(&mru->lock);
elem = radix_tree_delete(&mru->store, key);
- if (elem) {
- value = elem->value;
+ if (elem)
list_del(&elem->list_node);
- }
-
spin_unlock(&mru->lock);
- if (elem)
- kmem_zone_free(xfs_mru_elem_zone, elem);
-
- return value;
+ return elem;
}
/*
@@ -511,13 +484,14 @@ xfs_mru_cache_remove(
*/
void
xfs_mru_cache_delete(
- xfs_mru_cache_t *mru,
- unsigned long key)
+ struct xfs_mru_cache *mru,
+ unsigned long key)
{
- void *value = xfs_mru_cache_remove(mru, key);
+ struct xfs_mru_cache_elem *elem;
- if (value)
- mru->free_func(key, value);
+ elem = xfs_mru_cache_remove(mru, key);
+ if (elem)
+ mru->free_func(elem);
}
/*
@@ -540,12 +514,12 @@ xfs_mru_cache_delete(
* status, we need to help it get it right by annotating the path that does
* not release the lock.
*/
-void *
+struct xfs_mru_cache_elem *
xfs_mru_cache_lookup(
- xfs_mru_cache_t *mru,
- unsigned long key)
+ struct xfs_mru_cache *mru,
+ unsigned long key)
{
- xfs_mru_cache_elem_t *elem;
+ struct xfs_mru_cache_elem *elem;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
@@ -560,7 +534,7 @@ xfs_mru_cache_lookup(
} else
spin_unlock(&mru->lock);
- return elem ? elem->value : NULL;
+ return elem;
}
/*
@@ -570,7 +544,8 @@ xfs_mru_cache_lookup(
*/
void
xfs_mru_cache_done(
- xfs_mru_cache_t *mru) __releases(mru->lock)
+ struct xfs_mru_cache *mru)
+ __releases(mru->lock)
{
spin_unlock(&mru->lock);
}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 36dd3ec8b4eb..fb5245ba5ff7 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -18,24 +18,15 @@
#ifndef __XFS_MRU_CACHE_H__
#define __XFS_MRU_CACHE_H__
+struct xfs_mru_cache;
-/* Function pointer type for callback to free a client's data pointer. */
-typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
+struct xfs_mru_cache_elem {
+ struct list_head list_node;
+ unsigned long key;
+};
-typedef struct xfs_mru_cache
-{
- struct radix_tree_root store; /* Core storage data structure. */
- struct list_head *lists; /* Array of lists, one per grp. */
- struct list_head reap_list; /* Elements overdue for reaping. */
- spinlock_t lock; /* Lock to protect this struct. */
- unsigned int grp_count; /* Number of discrete groups. */
- unsigned int grp_time; /* Time period spanned by grps. */
- unsigned int lru_grp; /* Group containing time zero. */
- unsigned long time_zero; /* Time first element was added. */
- xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
- struct delayed_work work; /* Workqueue data for reaping. */
- unsigned int queued; /* work has been queued */
-} xfs_mru_cache_t;
+/* Function pointer type for callback to free a client's data pointer. */
+typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
int xfs_mru_cache_init(void);
void xfs_mru_cache_uninit(void);
@@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
xfs_mru_cache_free_func_t free_func);
void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
- void *value);
-void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+ struct xfs_mru_cache_elem *elem);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
-void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+struct xfs_mru_cache_elem *
+xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
void xfs_mru_cache_done(struct xfs_mru_cache *mru);
#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..10232102b4a6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -98,18 +98,18 @@ restart:
next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
error = execute(batch[i], data);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
skipped++;
continue;
}
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
mutex_unlock(&qi->qi_tree_lock);
/* bail out if the filesystem is corrupted. */
- if (last_error == EFSCORRUPTED) {
+ if (last_error == -EFSCORRUPTED) {
skipped = 0;
break;
}
@@ -134,28 +134,11 @@ xfs_qm_dqpurge(
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
xfs_dqunlock(dqp);
- return EAGAIN;
- }
-
- /*
- * If this quota has a hint attached, prepare for releasing it now.
- */
- gdqp = dqp->q_gdquot;
- if (gdqp) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
-
- pdqp = dqp->q_pdquot;
- if (pdqp) {
- xfs_dqlock(pdqp);
- dqp->q_pdquot = NULL;
+ return -EAGAIN;
}
dqp->dq_flags |= XFS_DQ_FREEING;
@@ -206,11 +189,6 @@ xfs_qm_dqpurge(
XFS_STATS_DEC(xs_qm_dquot_unused);
xfs_qm_dqdestroy(dqp);
-
- if (gdqp)
- xfs_qm_dqput(gdqp);
- if (pdqp)
- xfs_qm_dqput(pdqp);
return 0;
}
@@ -243,100 +221,6 @@ xfs_qm_unmount(
}
}
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo. This is also responsible for
- * running a quotacheck as necessary. We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
- xfs_mount_t *mp)
-{
- int error = 0;
- uint sbf;
-
- /*
- * If quotas on realtime volumes is not supported, we disable
- * quotas immediately.
- */
- if (mp->m_sb.sb_rextents) {
- xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
- mp->m_qflags = 0;
- goto write_changes;
- }
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Allocate the quotainfo structure inside the mount struct, and
- * create quotainode(s), and change/rev superblock if necessary.
- */
- error = xfs_qm_init_quotainfo(mp);
- if (error) {
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo == NULL);
- mp->m_qflags = 0;
- goto write_changes;
- }
- /*
- * If any of the quotas are not consistent, do a quotacheck.
- */
- if (XFS_QM_NEED_QUOTACHECK(mp)) {
- error = xfs_qm_quotacheck(mp);
- if (error) {
- /* Quotacheck failed and disabled quotas. */
- return;
- }
- }
- /*
- * If one type of quotas is off, then it will lose its
- * quotachecked status, since we won't be doing accounting for
- * that type anymore.
- */
- if (!XFS_IS_UQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_UQUOTA_CHKD;
- if (!XFS_IS_GQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_GQUOTA_CHKD;
- if (!XFS_IS_PQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_PQUOTA_CHKD;
-
- write_changes:
- /*
- * We actually don't have to acquire the m_sb_lock at all.
- * This can only be called from mount, and that's single threaded. XXX
- */
- spin_lock(&mp->m_sb_lock);
- sbf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
- spin_unlock(&mp->m_sb_lock);
-
- if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
- if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
- /*
- * We could only have been turning quotas off.
- * We aren't in very good shape actually because
- * the incore structures are convinced that quotas are
- * off, but the on disk superblock doesn't know that !
- */
- ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
- xfs_alert(mp, "%s: Superblock update failed!",
- __func__);
- }
- }
-
- if (error) {
- xfs_warn(mp, "Failed to initialize disk quotas.");
- return;
- }
-}
-
/*
* Called from the vfsops layer.
*/
@@ -380,7 +264,6 @@ xfs_qm_dqattach_one(
xfs_dqid_t id,
uint type,
uint doalloc,
- xfs_dquot_t *udqhint, /* hint */
xfs_dquot_t **IO_idqpp)
{
xfs_dquot_t *dqp;
@@ -390,9 +273,9 @@ xfs_qm_dqattach_one(
error = 0;
/*
- * See if we already have it in the inode itself. IO_idqpp is
- * &i_udquot or &i_gdquot. This made the code look weird, but
- * made the logic a lot simpler.
+ * See if we already have it in the inode itself. IO_idqpp is &i_udquot
+ * or &i_gdquot. This made the code look weird, but made the logic a lot
+ * simpler.
*/
dqp = *IO_idqpp;
if (dqp) {
@@ -401,49 +284,10 @@ xfs_qm_dqattach_one(
}
/*
- * udqhint is the i_udquot field in inode, and is non-NULL only
- * when the type arg is group/project. Its purpose is to save a
- * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
- * the user dquot.
- */
- if (udqhint) {
- ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
- xfs_dqlock(udqhint);
-
- /*
- * No need to take dqlock to look at the id.
- *
- * The ID can't change until it gets reclaimed, and it won't
- * be reclaimed as long as we have a ref from inode and we
- * hold the ilock.
- */
- if (type == XFS_DQ_GROUP)
- dqp = udqhint->q_gdquot;
- else
- dqp = udqhint->q_pdquot;
- if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
- ASSERT(*IO_idqpp == NULL);
-
- *IO_idqpp = xfs_qm_dqhold(dqp);
- xfs_dqunlock(udqhint);
- return 0;
- }
-
- /*
- * We can't hold a dquot lock when we call the dqget code.
- * We'll deadlock in no time, because of (not conforming to)
- * lock ordering - the inodelock comes before any dquot lock,
- * and we may drop and reacquire the ilock in xfs_qm_dqget().
- */
- xfs_dqunlock(udqhint);
- }
-
- /*
- * Find the dquot from somewhere. This bumps the
- * reference count of dquot and returns it locked.
- * This can return ENOENT if dquot didn't exist on
- * disk and we didn't ask it to allocate;
- * ESRCH if quotas got turned off suddenly.
+ * Find the dquot from somewhere. This bumps the reference count of
+ * dquot and returns it locked. This can return ENOENT if dquot didn't
+ * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
+ * turned off suddenly.
*/
error = xfs_qm_dqget(ip->i_mount, ip, id, type,
doalloc | XFS_QMOPT_DOWARN, &dqp);
@@ -461,48 +305,6 @@ xfs_qm_dqattach_one(
return 0;
}
-
-/*
- * Given a udquot and group/project type, attach the group/project
- * dquot pointer to the udquot as a hint for future lookups.
- */
-STATIC void
-xfs_qm_dqattach_hint(
- struct xfs_inode *ip,
- int type)
-{
- struct xfs_dquot **dqhintp;
- struct xfs_dquot *dqp;
- struct xfs_dquot *udq = ip->i_udquot;
-
- ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-
- xfs_dqlock(udq);
-
- if (type == XFS_DQ_GROUP) {
- dqp = ip->i_gdquot;
- dqhintp = &udq->q_gdquot;
- } else {
- dqp = ip->i_pdquot;
- dqhintp = &udq->q_pdquot;
- }
-
- if (*dqhintp) {
- struct xfs_dquot *tmp;
-
- if (*dqhintp == dqp)
- goto done;
-
- tmp = *dqhintp;
- *dqhintp = NULL;
- xfs_qm_dqrele(tmp);
- }
-
- *dqhintp = xfs_qm_dqhold(dqp);
-done:
- xfs_dqunlock(udq);
-}
-
static bool
xfs_qm_need_dqattach(
struct xfs_inode *ip)
@@ -533,7 +335,6 @@ xfs_qm_dqattach_locked(
uint flags)
{
xfs_mount_t *mp = ip->i_mount;
- uint nquotas = 0;
int error = 0;
if (!xfs_qm_need_dqattach(ip))
@@ -541,77 +342,39 @@ xfs_qm_dqattach_locked(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (XFS_IS_UQUOTA_ON(mp)) {
+ if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
flags & XFS_QMOPT_DQALLOC,
- NULL, &ip->i_udquot);
+ &ip->i_udquot);
if (error)
goto done;
- nquotas++;
+ ASSERT(ip->i_udquot);
}
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (XFS_IS_GQUOTA_ON(mp)) {
+ if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
flags & XFS_QMOPT_DQALLOC,
- ip->i_udquot, &ip->i_gdquot);
- /*
- * Don't worry about the udquot that we may have
- * attached above. It'll get detached, if not already.
- */
+ &ip->i_gdquot);
if (error)
goto done;
- nquotas++;
+ ASSERT(ip->i_gdquot);
}
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (XFS_IS_PQUOTA_ON(mp)) {
+ if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
flags & XFS_QMOPT_DQALLOC,
- ip->i_udquot, &ip->i_pdquot);
- /*
- * Don't worry about the udquot that we may have
- * attached above. It'll get detached, if not already.
- */
+ &ip->i_pdquot);
if (error)
goto done;
- nquotas++;
+ ASSERT(ip->i_pdquot);
}
+done:
/*
- * Attach this group/project quota to the user quota as a hint.
- * This WON'T, in general, result in a thrash.
+ * Don't worry about the dquots that we may have attached before any
+ * error - they'll get detached later if it has not already been done.
*/
- if (nquotas > 1 && ip->i_udquot) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp));
- ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp));
-
- /*
- * We do not have i_udquot locked at this point, but this check
- * is OK since we don't depend on the i_gdquot to be accurate
- * 100% all the time. It is just a hint, and this will
- * succeed in general.
- */
- if (ip->i_udquot->q_gdquot != ip->i_gdquot)
- xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP);
-
- if (ip->i_udquot->q_pdquot != ip->i_pdquot)
- xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ);
- }
-
- done:
-#ifdef DEBUG
- if (!error) {
- if (XFS_IS_UQUOTA_ON(mp))
- ASSERT(ip->i_udquot);
- if (XFS_IS_GQUOTA_ON(mp))
- ASSERT(ip->i_gdquot);
- if (XFS_IS_PQUOTA_ON(mp))
- ASSERT(ip->i_pdquot);
- }
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
return error;
}
@@ -814,22 +577,17 @@ xfs_qm_init_quotainfo(
qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
- if ((error = list_lru_init(&qinf->qi_lru))) {
- kmem_free(qinf);
- mp->m_quotainfo = NULL;
- return error;
- }
+ error = list_lru_init(&qinf->qi_lru);
+ if (error)
+ goto out_free_qinf;
/*
* See if quotainodes are setup, and if not, allocate them,
* and change the superblock accordingly.
*/
- if ((error = xfs_qm_init_quotainos(mp))) {
- list_lru_destroy(&qinf->qi_lru);
- kmem_free(qinf);
- mp->m_quotainfo = NULL;
- return error;
- }
+ error = xfs_qm_init_quotainos(mp);
+ if (error)
+ goto out_free_lru;
INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
@@ -841,8 +599,7 @@ xfs_qm_init_quotainfo(
/* Precalc some constants */
qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp,
- qinf->qi_dqchunklen);
+ qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
@@ -889,7 +646,7 @@ xfs_qm_init_quotainfo(
qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
+
xfs_qm_dqdestroy(dqp);
} else {
qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -906,6 +663,13 @@ xfs_qm_init_quotainfo(
qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
register_shrinker(&qinf->qi_shrinker);
return 0;
+
+out_free_lru:
+ list_lru_destroy(&qinf->qi_lru);
+out_free_qinf:
+ kmem_free(qinf);
+ mp->m_quotainfo = NULL;
+ return error;
}
@@ -1137,7 +901,7 @@ xfs_qm_dqiter_bufs(
* will leave a trace in the log indicating corruption has
* been detected.
*/
- if (error == EFSCORRUPTED) {
+ if (error == -EFSCORRUPTED) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, bno),
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
@@ -1147,6 +911,12 @@ xfs_qm_dqiter_bufs(
if (error)
break;
+ /*
+ * A corrupt buffer might not have a verifier attached, so
+ * make sure we have the correct one attached before writeback
+ * occurs.
+ */
+ bp->b_ops = &xfs_dquot_buf_ops;
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
@@ -1193,16 +963,18 @@ xfs_qm_dqiterate(
lblkno = 0;
maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
do {
+ uint lock_mode;
+
nmaps = XFS_DQITER_MAP_SIZE;
/*
* We aren't changing the inode itself. Just changing
* some of its data. No new blocks are added here, and
* the inode is never added to the transaction.
*/
- xfs_ilock(qip, XFS_ILOCK_SHARED);
+ lock_mode = xfs_ilock_data_map_shared(qip);
error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
map, &nmaps, 0);
- xfs_iunlock(qip, XFS_ILOCK_SHARED);
+ xfs_iunlock(qip, lock_mode);
if (error)
break;
@@ -1230,7 +1002,7 @@ xfs_qm_dqiterate(
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen,
- NULL);
+ &xfs_dquot_buf_ops);
rablkno++;
}
}
@@ -1278,8 +1050,8 @@ xfs_qm_quotacheck_dqadjust(
/*
* Shouldn't be able to turn off quotas here.
*/
- ASSERT(error != ESRCH);
- ASSERT(error != ENOENT);
+ ASSERT(error != -ESRCH);
+ ASSERT(error != -ENOENT);
return error;
}
@@ -1366,7 +1138,7 @@ xfs_qm_dqusage_adjust(
*/
if (xfs_is_quota_inode(&mp->m_sb, ino)) {
*res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -1470,7 +1242,7 @@ out_unlock:
* Walk thru all the filesystem inodes and construct a consistent view
* of the disk quota world. If the quotacheck fails, disable quotas.
*/
-int
+STATIC int
xfs_qm_quotacheck(
xfs_mount_t *mp)
{
@@ -1603,7 +1375,100 @@ xfs_qm_quotacheck(
}
} else
xfs_notice(mp, "Quotacheck: Done.");
- return (error);
+ return error;
+}
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo. This is also responsible for
+ * running a quotacheck as necessary. We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+ uint sbf;
+
+ /*
+ * If quotas on realtime volumes is not supported, we disable
+ * quotas immediately.
+ */
+ if (mp->m_sb.sb_rextents) {
+ xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ /*
+ * Allocate the quotainfo structure inside the mount struct, and
+ * create quotainode(s), and change/rev superblock if necessary.
+ */
+ error = xfs_qm_init_quotainfo(mp);
+ if (error) {
+ /*
+ * We must turn off quotas.
+ */
+ ASSERT(mp->m_quotainfo == NULL);
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+ /*
+ * If any of the quotas are not consistent, do a quotacheck.
+ */
+ if (XFS_QM_NEED_QUOTACHECK(mp)) {
+ error = xfs_qm_quotacheck(mp);
+ if (error) {
+ /* Quotacheck failed and disabled quotas. */
+ return;
+ }
+ }
+ /*
+ * If one type of quotas is off, then it will lose its
+ * quotachecked status, since we won't be doing accounting for
+ * that type anymore.
+ */
+ if (!XFS_IS_UQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+ if (!XFS_IS_GQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_GQUOTA_CHKD;
+ if (!XFS_IS_PQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_PQUOTA_CHKD;
+
+ write_changes:
+ /*
+ * We actually don't have to acquire the m_sb_lock at all.
+ * This can only be called from mount, and that's single threaded. XXX
+ */
+ spin_lock(&mp->m_sb_lock);
+ sbf = mp->m_sb.sb_qflags;
+ mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+ spin_unlock(&mp->m_sb_lock);
+
+ if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+ if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+ /*
+ * We could only have been turning quotas off.
+ * We aren't in very good shape actually because
+ * the incore structures are convinced that quotas are
+ * off, but the on disk superblock doesn't know that !
+ */
+ ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+ xfs_alert(mp, "%s: Superblock update failed!",
+ __func__);
+ }
+ }
+
+ if (error) {
+ xfs_warn(mp, "Failed to initialize disk quotas.");
+ return;
+ }
}
/*
@@ -1633,7 +1498,7 @@ xfs_qm_init_quotainos(
error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
0, 0, &uip);
if (error)
- return XFS_ERROR(error);
+ return error;
}
if (XFS_IS_GQUOTA_ON(mp) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
@@ -1703,7 +1568,7 @@ error_rele:
IRELE(gip);
if (pip)
IRELE(pip);
- return XFS_ERROR(error);
+ return error;
}
STATIC void
@@ -1819,7 +1684,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&uq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
return error;
}
/*
@@ -1846,7 +1711,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&gq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto error_rele;
}
xfs_dqunlock(gq);
@@ -1866,7 +1731,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&pq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto error_rele;
}
xfs_dqunlock(pq);
@@ -2035,7 +1900,7 @@ xfs_qm_vop_chown_reserve(
-((xfs_qcnt_t)delblks), 0, blkflags);
}
- return (0);
+ return 0;
}
int
@@ -2082,24 +1947,21 @@ xfs_qm_vop_create_dqattach(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
- if (udqp) {
+ if (udqp && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(ip->i_udquot == NULL);
- ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
ip->i_udquot = xfs_qm_dqhold(udqp);
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
- if (gdqp) {
+ if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(ip->i_gdquot == NULL);
- ASSERT(XFS_IS_GQUOTA_ON(mp));
ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
ip->i_gdquot = xfs_qm_dqhold(gdqp);
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
- if (pdqp) {
+ if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
ASSERT(ip->i_pdquot == NULL);
- ASSERT(XFS_IS_PQUOTA_ON(mp));
ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index a788b66a5cb1..3a07a937e232 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -20,13 +20,29 @@
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
struct xfs_inode;
extern struct kmem_zone *xfs_qm_dqtrxzone;
/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE 10
+
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+ !dqp->q_core.d_blk_hardlimit && \
+ !dqp->q_core.d_blk_softlimit && \
+ !dqp->q_core.d_rtb_hardlimit && \
+ !dqp->q_core.d_rtb_softlimit && \
+ !dqp->q_core.d_ino_hardlimit && \
+ !dqp->q_core.d_ino_softlimit && \
+ !dqp->q_core.d_bcount && \
+ !dqp->q_core.d_rtbcount && \
+ !dqp->q_core.d_icount)
+
+/*
* This defines the unit of allocation of dquots.
* Currently, it is just one file system block, and a 4K blk contains 30
* (136 * 30 = 4080) dquots. It's probably not worth trying to make
@@ -141,7 +157,6 @@ struct xfs_dquot_acct {
#define XFS_QM_RTBWARNLIMIT 5
extern void xfs_qm_destroy_quotainfo(struct xfs_mount *);
-extern int xfs_qm_quotacheck(struct xfs_mount *);
extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t);
/* dquot stuff */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index e9be63abd8d2..2c61e61b0205 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -117,7 +117,7 @@ xfs_qm_newmount(
(uquotaondisk ? " usrquota" : ""),
(gquotaondisk ? " grpquota" : ""),
(pquotaondisk ? " prjquota" : ""));
- return XFS_ERROR(EPERM);
+ return -EPERM;
}
if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 437c9198031a..80f2d77d929a 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -64,10 +64,10 @@ xfs_qm_scall_quotaoff(
/*
* No file system can have quotas enabled on disk but not in core.
* Note that quota utilities (like quotaoff) _expect_
- * errno == EEXIST here.
+ * errno == -EEXIST here.
*/
if ((mp->m_qflags & flags) == 0)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
error = 0;
flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
@@ -94,7 +94,7 @@ xfs_qm_scall_quotaoff(
/* XXX what to do if error ? Revert back to old vals incore ? */
error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
- return (error);
+ return error;
}
dqtype = 0;
@@ -198,7 +198,7 @@ xfs_qm_scall_quotaoff(
if (mp->m_qflags == 0) {
mutex_unlock(&q->qi_quotaofflock);
xfs_qm_destroy_quotainfo(mp);
- return (0);
+ return 0;
}
/*
@@ -278,22 +278,29 @@ xfs_qm_scall_trunc_qfiles(
xfs_mount_t *mp,
uint flags)
{
- int error = 0, error2 = 0;
+ int error = -EINVAL;
- if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+ if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
+ (flags & ~XFS_DQ_ALLTYPES)) {
xfs_debug(mp, "%s: flags=%x m_qflags=%x",
__func__, flags, mp->m_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
- if (flags & XFS_DQ_USER)
+ if (flags & XFS_DQ_USER) {
error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
- if (flags & XFS_DQ_GROUP)
- error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+ if (error)
+ return error;
+ }
+ if (flags & XFS_DQ_GROUP) {
+ error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+ if (error)
+ return error;
+ }
if (flags & XFS_DQ_PROJ)
- error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino);
+ error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino);
- return error ? error : error2;
+ return error;
}
/*
@@ -321,7 +328,7 @@ xfs_qm_scall_quotaon(
if (flags == 0) {
xfs_debug(mp, "%s: zero flags, m_qflags=%x",
__func__, mp->m_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/* No fs can turn on quotas with a delayed effect */
@@ -344,13 +351,13 @@ xfs_qm_scall_quotaon(
xfs_debug(mp,
"%s: Can't enforce without acct, flags=%x sbflags=%x",
__func__, flags, mp->m_sb.sb_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
* If everything's up to-date incore, then don't waste time.
*/
if ((mp->m_qflags & flags) == flags)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
/*
* Change sb_qflags on disk but not incore mp->qflags
@@ -365,11 +372,11 @@ xfs_qm_scall_quotaon(
* There's nothing to change if it's the same.
*/
if ((qf & flags) == flags && sbflags == 0)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
sbflags |= XFS_SB_QFLAGS;
if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
- return (error);
+ return error;
/*
* If we aren't trying to switch on quota enforcement, we are done.
*/
@@ -380,10 +387,10 @@ xfs_qm_scall_quotaon(
((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
(mp->m_qflags & XFS_GQUOTA_ACCT)) ||
(flags & XFS_ALL_QUOTA_ENFD) == 0)
- return (0);
+ return 0;
if (! XFS_IS_QUOTA_RUNNING(mp))
- return XFS_ERROR(ESRCH);
+ return -ESRCH;
/*
* Switch on quota enforcement in core.
@@ -392,7 +399,7 @@ xfs_qm_scall_quotaon(
mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
- return (0);
+ return 0;
}
@@ -419,7 +426,7 @@ xfs_qm_scall_getqstat(
if (!xfs_sb_version_hasquota(&mp->m_sb)) {
out->qs_uquota.qfs_ino = NULLFSINO;
out->qs_gquota.qfs_ino = NULLFSINO;
- return (0);
+ return 0;
}
out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -507,7 +514,7 @@ xfs_qm_scall_getqstatv(
out->qs_uquota.qfs_ino = NULLFSINO;
out->qs_gquota.qfs_ino = NULLFSINO;
out->qs_pquota.qfs_ino = NULLFSINO;
- return (0);
+ return 0;
}
out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -588,7 +595,7 @@ xfs_qm_scall_setqlim(
xfs_qcnt_t hard, soft;
if (newlim->d_fieldmask & ~XFS_DQ_MASK)
- return EINVAL;
+ return -EINVAL;
if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
return 0;
@@ -608,7 +615,7 @@ xfs_qm_scall_setqlim(
*/
error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto out_unlock;
}
xfs_dqunlock(dqp);
@@ -751,7 +758,7 @@ xfs_qm_log_quotaoff_end(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return (error);
+ return error;
}
qoffi = xfs_trans_get_qoff_item(tp, startqoff,
@@ -765,7 +772,7 @@ xfs_qm_log_quotaoff_end(
*/
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
- return (error);
+ return error;
}
@@ -815,7 +822,7 @@ error0:
spin_unlock(&mp->m_sb_lock);
}
*qoffstartp = qoffi;
- return (error);
+ return error;
}
@@ -843,7 +850,7 @@ xfs_qm_scall_getquota(
* our utility programs are concerned.
*/
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
- error = XFS_ERROR(ENOENT);
+ error = -ENOENT;
goto out_put;
}
@@ -946,14 +953,13 @@ xfs_qm_export_flags(
uflags |= FS_QUOTA_GDQ_ENFD;
if (flags & XFS_PQUOTA_ENFD)
uflags |= FS_QUOTA_PDQ_ENFD;
- return (uflags);
+ return uflags;
}
STATIC int
xfs_dqrele_inode(
struct xfs_inode *ip,
- struct xfs_perag *pag,
int flags,
void *args)
{
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
deleted file mode 100644
index 6d86219d93da..000000000000
--- a/fs/xfs/xfs_quota_priv.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE 10
-
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
- !dqp->q_core.d_blk_hardlimit && \
- !dqp->q_core.d_blk_softlimit && \
- !dqp->q_core.d_rtb_hardlimit && \
- !dqp->q_core.d_rtb_softlimit && \
- !dqp->q_core.d_ino_hardlimit && \
- !dqp->q_core.d_ino_softlimit && \
- !dqp->q_core.d_bcount && \
- !dqp->q_core.d_rtbcount && \
- !dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
- (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
- (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33cafe69b6..b238027df987 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -51,7 +51,7 @@ xfs_fs_get_xstate(
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_qm_scall_getqstat(mp, fqs);
+ return xfs_qm_scall_getqstat(mp, fqs);
}
STATIC int
@@ -63,7 +63,7 @@ xfs_fs_get_xstatev(
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_qm_scall_getqstatv(mp, fqs);
+ return xfs_qm_scall_getqstatv(mp, fqs);
}
STATIC int
@@ -95,21 +95,41 @@ xfs_fs_set_xstate(
switch (op) {
case Q_XQUOTAON:
- return -xfs_qm_scall_quotaon(mp, flags);
+ return xfs_qm_scall_quotaon(mp, flags);
case Q_XQUOTAOFF:
if (!XFS_IS_QUOTA_ON(mp))
return -EINVAL;
- return -xfs_qm_scall_quotaoff(mp, flags);
- case Q_XQUOTARM:
- if (XFS_IS_QUOTA_ON(mp))
- return -EINVAL;
- return -xfs_qm_scall_trunc_qfiles(mp, flags);
+ return xfs_qm_scall_quotaoff(mp, flags);
}
return -EINVAL;
}
STATIC int
+xfs_fs_rm_xquota(
+ struct super_block *sb,
+ unsigned int uflags)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+ unsigned int flags = 0;
+
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ if (XFS_IS_QUOTA_ON(mp))
+ return -EINVAL;
+
+ if (uflags & FS_USER_QUOTA)
+ flags |= XFS_DQ_USER;
+ if (uflags & FS_GROUP_QUOTA)
+ flags |= XFS_DQ_GROUP;
+ if (uflags & FS_PROJ_QUOTA)
+ flags |= XFS_DQ_PROJ;
+
+ return xfs_qm_scall_trunc_qfiles(mp, flags);
+}
+
+STATIC int
xfs_fs_get_dqblk(
struct super_block *sb,
struct kqid qid,
@@ -122,7 +142,7 @@ xfs_fs_get_dqblk(
if (!XFS_IS_QUOTA_ON(mp))
return -ESRCH;
- return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
+ return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
xfs_quota_type(qid.type), fdq);
}
@@ -141,7 +161,7 @@ xfs_fs_set_dqblk(
if (!XFS_IS_QUOTA_ON(mp))
return -ESRCH;
- return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
+ return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
xfs_quota_type(qid.type), fdq);
}
@@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = {
.get_xstatev = xfs_fs_get_xstatev,
.get_xstate = xfs_fs_get_xstate,
.set_xstate = xfs_fs_set_xstate,
+ .rm_xquota = xfs_fs_rm_xquota,
.get_dqblk = xfs_fs_get_dqblk,
.set_dqblk = xfs_fs_set_dqblk,
};
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2b6a85..909e143b87ae 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
/*
* Reserve space & log for one extent added to the file.
*/
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
resblks, 0);
if (error)
goto error_cancel;
@@ -863,7 +863,7 @@ xfs_growfs_rt_alloc(
XFS_BMAPI_METADATA, &firstblock,
resblks, &map, &nmap, &flist);
if (!error && nmap < 1)
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
if (error)
goto error_cancel;
/*
@@ -903,7 +903,7 @@ xfs_growfs_rt_alloc(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize, 0);
if (bp == NULL) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
error_cancel:
xfs_trans_cancel(tp, cancelflags);
goto error;
@@ -944,9 +944,9 @@ xfs_growfs_rt(
xfs_buf_t *bp; /* temporary buffer */
int error; /* error return value */
xfs_mount_t *nmp; /* new (fake) mount structure */
- xfs_drfsbno_t nrblocks; /* new number of realtime blocks */
+ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */
xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
- xfs_drtbno_t nrextents; /* new number of realtime extents */
+ xfs_rtblock_t nrextents; /* new number of realtime extents */
uint8_t nrextslog; /* new log2 of sb_rextents */
xfs_extlen_t nrsumblocks; /* new number of summary blocks */
uint nrsumlevels; /* new rt summary levels */
@@ -962,11 +962,11 @@ xfs_growfs_rt(
* Initial error checking.
*/
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
(nrblocks = in->newblocks) <= sbp->sb_rblocks ||
(sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
return error;
/*
@@ -976,7 +976,7 @@ xfs_growfs_rt(
XFS_FSB_TO_BB(mp, nrblocks - 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL);
if (!bp)
- return EIO;
+ return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
@@ -1001,7 +1001,7 @@ xfs_growfs_rt(
* since we'll log basically the whole summary file at once.
*/
if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Get the old block counts for bitmap and summary inodes.
* These can't change since other growfs callers are locked out.
@@ -1208,7 +1208,7 @@ xfs_rtallocate_extent(
len, &sumbp, &sb, prod, &r);
break;
default:
- error = EIO;
+ error = -EIO;
ASSERT(0);
}
if (error)
@@ -1247,7 +1247,7 @@ xfs_rtmount_init(
if (mp->m_rtdev_targp == NULL) {
xfs_warn(mp,
"Filesystem has a realtime volume, use rtdev=device option");
- return XFS_ERROR(ENODEV);
+ return -ENODEV;
}
mp->m_rsumlevels = sbp->sb_rextslog + 1;
mp->m_rsumsize =
@@ -1263,7 +1263,7 @@ xfs_rtmount_init(
xfs_warn(mp, "realtime mount -- %llu != %llu",
(unsigned long long) XFS_BB_TO_FSB(mp, d),
(unsigned long long) mp->m_sb.sb_rblocks);
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -1272,7 +1272,7 @@ xfs_rtmount_init(
xfs_warn(mp, "realtime device size check failed");
if (bp)
xfs_buf_relse(bp);
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
return 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 752b63d10300..c642795324af 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -132,7 +132,7 @@ xfs_rtmount_init(
return 0;
xfs_warn(mp, "Not built with CONFIG_XFS_RT");
- return ENOSYS;
+ return -ENOSYS;
}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ce372b7d5644..f2240383d4bb 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
{ "abtc2", XFSSTAT_END_ABTC_V2 },
{ "bmbt2", XFSSTAT_END_BMBT_V2 },
{ "ibt2", XFSSTAT_END_IBT_V2 },
+ { "fibt2", XFSSTAT_END_FIBT_V2 },
/* we print both series of quota information together */
{ "qm", XFSSTAT_END_QM },
};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c03ad38ceaeb..c8f238b8299a 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,7 +183,23 @@ struct xfsstats {
__uint32_t xs_ibt_2_alloc;
__uint32_t xs_ibt_2_free;
__uint32_t xs_ibt_2_moves;
-#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6)
+#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
+ __uint32_t xs_fibt_2_lookup;
+ __uint32_t xs_fibt_2_compare;
+ __uint32_t xs_fibt_2_insrec;
+ __uint32_t xs_fibt_2_delrec;
+ __uint32_t xs_fibt_2_newroot;
+ __uint32_t xs_fibt_2_killroot;
+ __uint32_t xs_fibt_2_increment;
+ __uint32_t xs_fibt_2_decrement;
+ __uint32_t xs_fibt_2_lshift;
+ __uint32_t xs_fibt_2_rshift;
+ __uint32_t xs_fibt_2_split;
+ __uint32_t xs_fibt_2_join;
+ __uint32_t xs_fibt_2_alloc;
+ __uint32_t xs_fibt_2_free;
+ __uint32_t xs_fibt_2_moves;
+#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6)
__uint32_t xs_qm_dqreclaims;
__uint32_t xs_qm_dqreclaim_misses;
__uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f317488263dd..b194652033cd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -61,6 +61,7 @@
static const struct super_operations xfs_super_operations;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
+struct kset *xfs_kset;
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -185,7 +186,7 @@ xfs_parseargs(
*/
mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_fsname)
- return ENOMEM;
+ return -ENOMEM;
mp->m_fsname_len = strlen(mp->m_fsname) + 1;
/*
@@ -204,9 +205,6 @@ xfs_parseargs(
*/
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-#if !XFS_BIG_INUMS
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-#endif
/*
* These can be overridden by the mount option parsing.
@@ -227,57 +225,57 @@ xfs_parseargs(
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &mp->m_logbufs))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_logname)
- return ENOMEM;
+ return -ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
xfs_warn(mp, "%s option not allowed on this system",
this_char);
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_rtname)
- return ENOMEM;
+ return -ENOMEM;
} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &iosize))
- return EINVAL;
+ return -EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (suffix_kstrtoint(value, 10, &iosize))
- return EINVAL;
+ return -EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -297,27 +295,22 @@ xfs_parseargs(
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &dsunit))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &dswidth))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
- xfs_warn(mp, "%s option not allowed on this system",
- this_char);
- return EINVAL;
-#endif
} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
mp->m_flags |= XFS_MOUNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
@@ -390,7 +383,7 @@ xfs_parseargs(
"irixsgid is now a sysctl(2) variable, option is deprecated.");
} else {
xfs_warn(mp, "unknown mount option [%s].", this_char);
- return EINVAL;
+ return -EINVAL;
}
}
@@ -400,32 +393,32 @@ xfs_parseargs(
if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_warn(mp, "no-recovery mounts must be read-only.");
- return EINVAL;
+ return -EINVAL;
}
if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
xfs_warn(mp,
"sunit and swidth options incompatible with the noalign option");
- return EINVAL;
+ return -EINVAL;
}
#ifndef CONFIG_XFS_QUOTA
if (XFS_IS_QUOTA_RUNNING(mp)) {
xfs_warn(mp, "quota support not available in this kernel.");
- return EINVAL;
+ return -EINVAL;
}
#endif
if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
xfs_warn(mp, "sunit and swidth must be specified together");
- return EINVAL;
+ return -EINVAL;
}
if (dsunit && (dswidth % dsunit != 0)) {
xfs_warn(mp,
"stripe width (%d) must be a multiple of the stripe unit (%d)",
dswidth, dsunit);
- return EINVAL;
+ return -EINVAL;
}
done:
@@ -446,7 +439,7 @@ done:
mp->m_logbufs > XLOG_MAX_ICLOGS)) {
xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (mp->m_logbsize != -1 &&
mp->m_logbsize != 0 &&
@@ -456,7 +449,7 @@ done:
xfs_warn(mp,
"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
mp->m_logbsize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (iosizelog) {
@@ -465,7 +458,7 @@ done:
xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
iosizelog, XFS_MIN_IO_LOG,
XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
@@ -597,15 +590,20 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
+/*
+ * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
+ * because in the growfs case, mp->m_sb.sb_agcount is not updated
+ * yet to the potentially higher ag count.
+ */
xfs_agnumber_t
-xfs_set_inode32(struct xfs_mount *mp)
+xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
{
xfs_agnumber_t index = 0;
xfs_agnumber_t maxagi = 0;
xfs_sb_t *sbp = &mp->m_sb;
xfs_agnumber_t max_metadata;
- xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+ xfs_agino_t agino;
+ xfs_ino_t ino;
xfs_perag_t *pag;
/* Calculate how much should be reserved for inodes to meet
@@ -620,10 +618,12 @@ xfs_set_inode32(struct xfs_mount *mp)
do_div(icount, sbp->sb_agblocks);
max_metadata = icount;
} else {
- max_metadata = sbp->sb_agcount;
+ max_metadata = agcount;
}
- for (index = 0; index < sbp->sb_agcount; index++) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+
+ for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
if (ino > XFS_MAXINUMBER_32) {
@@ -648,11 +648,11 @@ xfs_set_inode32(struct xfs_mount *mp)
}
xfs_agnumber_t
-xfs_set_inode64(struct xfs_mount *mp)
+xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
{
xfs_agnumber_t index = 0;
- for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+ for (index = 0; index < agcount; index++) {
struct xfs_perag *pag;
pag = xfs_perag_get(mp, index);
@@ -686,7 +686,7 @@ xfs_blkdev_get(
xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
}
- return -error;
+ return error;
}
STATIC void
@@ -756,7 +756,7 @@ xfs_open_devices(
if (rtdev == ddev || rtdev == logdev) {
xfs_warn(mp,
"Cannot mount filesystem with identical rtdev and ddev/logdev.");
- error = EINVAL;
+ error = -EINVAL;
goto out_close_rtdev;
}
}
@@ -764,21 +764,19 @@ xfs_open_devices(
/*
* Setup xfs_mount buffer target pointers
*/
- error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+ error = -ENOMEM;
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
- mp->m_fsname);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
- mp->m_fsname);
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -811,8 +809,7 @@ xfs_setup_devices(
{
int error;
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
+ error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -822,14 +819,12 @@ xfs_setup_devices(
if (xfs_sb_version_hassector(&mp->m_sb))
log_sector_size = mp->m_sb.sb_logsectsize;
error = xfs_setsize_buftarg(mp->m_logdev_targp,
- mp->m_sb.sb_blocksize,
log_sector_size);
if (error)
return error;
}
if (mp->m_rtdev_targp) {
error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_blocksize,
mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -913,7 +908,7 @@ xfs_flush_inodes(
struct super_block *sb = mp->m_super;
if (down_read_trylock(&sb->s_umount)) {
- sync_inodes_sb(sb, jiffies);
+ sync_inodes_sb(sb);
up_read(&sb->s_umount);
}
}
@@ -996,7 +991,7 @@ xfs_fs_evict_inode(
trace_xfs_evict_inode(ip);
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
XFS_STATS_INC(vn_rele);
XFS_STATS_INC(vn_remove);
@@ -1193,10 +1188,12 @@ xfs_fs_remount(
char *options)
{
struct xfs_mount *mp = XFS_M(sb);
+ xfs_sb_t *sbp = &mp->m_sb;
substring_t args[MAX_OPT_ARGS];
char *p;
int error;
+ sync_filesystem(sb);
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -1212,10 +1209,10 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:
- mp->m_maxagi = xfs_set_inode64(mp);
+ mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
break;
case Opt_inode32:
- mp->m_maxagi = xfs_set_inode32(mp);
+ mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
break;
default:
/*
@@ -1299,7 +1296,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp);
+ return xfs_fs_log_dummy(mp);
}
STATIC int
@@ -1318,7 +1315,7 @@ xfs_fs_show_options(
struct seq_file *m,
struct dentry *root)
{
- return -xfs_showargs(XFS_M(root->d_sb), m);
+ return xfs_showargs(XFS_M(root->d_sb), m);
}
/*
@@ -1340,14 +1337,14 @@ xfs_finish_flags(
mp->m_logbsize < mp->m_sb.sb_logsunit) {
xfs_warn(mp,
"logbuf size must be greater than or equal to log stripe size");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
} else {
/* Fail a mount if the logbuf is larger than 32K */
if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
xfs_warn(mp,
"logbuf size for version 1 logs must be 16K or 32K");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -1359,7 +1356,7 @@ xfs_finish_flags(
xfs_warn(mp,
"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
MNTOPT_NOATTR2, MNTOPT_ATTR2);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -1376,7 +1373,7 @@ xfs_finish_flags(
if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
xfs_warn(mp,
"cannot mount a read-only filesystem as read-write");
- return XFS_ERROR(EROFS);
+ return -EROFS;
}
if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
@@ -1384,7 +1381,7 @@ xfs_finish_flags(
!xfs_sb_version_has_pquotino(&mp->m_sb)) {
xfs_warn(mp,
"Super block does not support project and group quota together");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
@@ -1398,7 +1395,7 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- int flags = 0, error = ENOMEM;
+ int flags = 0, error = -ENOMEM;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
@@ -1478,12 +1475,12 @@ xfs_fs_fill_super(
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
- error = ENOENT;
+ error = -ENOENT;
goto out_unmount;
}
sb->s_root = d_make_root(root);
if (!sb->s_root) {
- error = ENOMEM;
+ error = -ENOMEM;
goto out_unmount;
}
@@ -1503,7 +1500,7 @@ out_destroy_workqueues:
xfs_free_fsname(mp);
kfree(mp);
out:
- return -error;
+ return error;
out_unmount:
xfs_filestream_unmount(mp);
@@ -1753,13 +1750,9 @@ init_xfs_fs(void)
if (error)
goto out_destroy_wq;
- error = xfs_filestream_init();
- if (error)
- goto out_mru_cache_uninit;
-
error = xfs_buf_init();
if (error)
- goto out_filestream_uninit;
+ goto out_mru_cache_uninit;
error = xfs_init_procfs();
if (error)
@@ -1769,9 +1762,15 @@ init_xfs_fs(void)
if (error)
goto out_cleanup_procfs;
+ xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
+ if (!xfs_kset) {
+ error = -ENOMEM;
+ goto out_sysctl_unregister;;
+ }
+
error = xfs_qm_init();
if (error)
- goto out_sysctl_unregister;
+ goto out_kset_unregister;
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1780,14 +1779,14 @@ init_xfs_fs(void)
out_qm_exit:
xfs_qm_exit();
+ out_kset_unregister:
+ kset_unregister(xfs_kset);
out_sysctl_unregister:
xfs_sysctl_unregister();
out_cleanup_procfs:
xfs_cleanup_procfs();
out_buf_terminate:
xfs_buf_terminate();
- out_filestream_uninit:
- xfs_filestream_uninit();
out_mru_cache_uninit:
xfs_mru_cache_uninit();
out_destroy_wq:
@@ -1803,10 +1802,10 @@ exit_xfs_fs(void)
{
xfs_qm_exit();
unregister_filesystem(&xfs_fs_type);
+ kset_unregister(xfs_kset);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
- xfs_filestream_uninit();
xfs_mru_cache_uninit();
xfs_destroy_workqueues();
xfs_destroy_zones();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index bbe3d15a7904..2b830c2f322e 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -44,16 +44,6 @@ extern void xfs_qm_exit(void);
# define XFS_REALTIME_STRING
#endif
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-# define XFS_BIGFS_STRING "large block/inode numbers, "
-# else
-# define XFS_BIGFS_STRING "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -64,7 +54,6 @@ extern void xfs_qm_exit(void);
#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
- XFS_BIGFS_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
@@ -76,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
extern void xfs_flush_inodes(struct xfs_mount *mp);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 14e58f2c96bd..6a944a2cd36f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -27,6 +27,7 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
@@ -75,11 +76,15 @@ xfs_readlink_bmap(
bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
&xfs_symlink_buf_ops);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
+
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
goto out;
}
byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -88,9 +93,9 @@ xfs_readlink_bmap(
cur_chunk = bp->b_addr;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
- if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
+ if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
byte_cnt, bp)) {
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
xfs_alert(mp,
"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
offset, byte_cnt, ip->i_ino);
@@ -130,7 +135,7 @@ xfs_readlink(
trace_xfs_readlink(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -143,7 +148,7 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
ASSERT(0);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out;
}
@@ -198,20 +203,17 @@ xfs_symlink(
trace_xfs_symlink(dp, link_name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
/*
* Check component lengths of the target path name.
*/
pathlen = strlen(target_path);
if (pathlen >= MAXPATHLEN) /* total string too long */
- return XFS_ERROR(ENAMETOOLONG);
+ return -ENAMETOOLONG;
udqp = gdqp = NULL;
- if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- prid = xfs_get_projid(dp);
- else
- prid = XFS_PROJID_DEFAULT;
+ prid = xfs_get_initial_prid(dp);
/*
* Make sure that we have allocated dquot(s) on disk.
@@ -236,7 +238,7 @@ xfs_symlink(
fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
- if (error == ENOSPC && fs_blocks == 0) {
+ if (error == -ENOSPC && fs_blocks == 0) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
}
@@ -252,7 +254,7 @@ xfs_symlink(
* Check whether the directory allows new symlinks or not.
*/
if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
- error = XFS_ERROR(EPERM);
+ error = -EPERM;
goto error_return;
}
@@ -282,7 +284,7 @@ xfs_symlink(
error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
prid, resblks > 0, &ip, NULL);
if (error) {
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto error_return;
goto error1;
}
@@ -346,7 +348,7 @@ xfs_symlink(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
BTOBB(byte_cnt), 0);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error2;
}
bp->b_ops = &xfs_symlink_buf_ops;
@@ -487,7 +489,7 @@ xfs_inactive_symlink_rmt(
XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error_bmap_cancel;
}
xfs_trans_binval(tp, bp);
@@ -560,7 +562,7 @@ xfs_inactive_symlink(
trace_xfs_inactive_symlink(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -578,7 +580,7 @@ xfs_inactive_symlink(
__func__, (unsigned long long)ip->i_ino, pathlen);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
ASSERT(0);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (ip->i_df.if_flags & XFS_IFINLINE) {
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
new file mode 100644
index 000000000000..9835139ce1ec
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xfs.h"
+#include "xfs_sysfs.h"
+#include "xfs_log_format.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+
+struct xfs_sysfs_attr {
+ struct attribute attr;
+ ssize_t (*show)(char *buf, void *data);
+ ssize_t (*store)(const char *buf, size_t count, void *data);
+};
+
+static inline struct xfs_sysfs_attr *
+to_attr(struct attribute *attr)
+{
+ return container_of(attr, struct xfs_sysfs_attr, attr);
+}
+
+#define XFS_SYSFS_ATTR_RW(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
+#define XFS_SYSFS_ATTR_RO(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
+
+/*
+ * xfs_mount kobject. This currently has no attributes and thus no need for show
+ * and store helpers. The mp kobject serves as the per-mount parent object that
+ * is identified by the fsname under sysfs.
+ */
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+};
+
+/* xlog */
+
+STATIC ssize_t
+log_head_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ spin_lock(&log->l_icloglock);
+ cycle = log->l_curr_cycle;
+ block = log->l_curr_block;
+ spin_unlock(&log->l_icloglock);
+
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_head_lsn);
+
+STATIC ssize_t
+log_tail_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_tail_lsn);
+
+STATIC ssize_t
+reserve_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(reserve_grant_head);
+
+STATIC ssize_t
+write_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(write_grant_head);
+
+static struct attribute *xfs_log_attrs[] = {
+ ATTR_LIST(log_head_lsn),
+ ATTR_LIST(log_tail_lsn),
+ ATTR_LIST(reserve_grant_head),
+ ATTR_LIST(write_grant_head),
+ NULL,
+};
+
+static inline struct xlog *
+to_xlog(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ return container_of(kobj, struct xlog, l_kobj);
+}
+
+STATIC ssize_t
+xfs_log_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
+}
+
+STATIC ssize_t
+xfs_log_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
+}
+
+static struct sysfs_ops xfs_log_ops = {
+ .show = xfs_log_show,
+ .store = xfs_log_store,
+};
+
+struct kobj_type xfs_log_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_log_ops,
+ .default_attrs = xfs_log_attrs,
+};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
new file mode 100644
index 000000000000..54a2091183c0
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __XFS_SYSFS_H__
+#define __XFS_SYSFS_H__
+
+extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
+extern struct kobj_type xfs_log_ktype; /* xlog */
+
+static inline struct xfs_kobj *
+to_kobj(struct kobject *kobject)
+{
+ return container_of(kobject, struct xfs_kobj, kobject);
+}
+
+static inline void
+xfs_sysfs_release(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ complete(&kobj->complete);
+}
+
+static inline int
+xfs_sysfs_init(
+ struct xfs_kobj *kobj,
+ struct kobj_type *ktype,
+ struct xfs_kobj *parent_kobj,
+ const char *name)
+{
+ init_completion(&kobj->complete);
+ return kobject_init_and_add(&kobj->kobject, ktype,
+ &parent_kobj->kobject, "%s", name);
+}
+
+static inline void
+xfs_sysfs_del(
+ struct xfs_kobj *kobj)
+{
+ kobject_del(&kobj->kobject);
+ kobject_put(&kobj->kobject);
+ wait_for_completion(&kobj->complete);
+}
+
+#endif /* __XFS_SYSFS_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index dee3279c095e..1e85bcd0e418 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -46,6 +46,7 @@
#include "xfs_log_recover.h"
#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
+#include "xfs_filestream.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa45b9a0..152f82782630 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);
+DECLARE_EVENT_CLASS(xfs_filestream_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
+ TP_ARGS(ip, agno),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_agnumber_t, agno)
+ __field(int, streams)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->agno = agno;
+ __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+ ),
+ TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->agno,
+ __entry->streams)
+)
+#define DEFINE_FILESTREAM_EVENT(name) \
+DEFINE_EVENT(xfs_filestream_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
+ TP_ARGS(ip, agno))
+DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
+DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
+
+TRACE_EVENT(xfs_filestream_pick,
+ TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno,
+ xfs_extlen_t free, int nscan),
+ TP_ARGS(ip, agno, free, nscan),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_agnumber_t, agno)
+ __field(int, streams)
+ __field(xfs_extlen_t, free)
+ __field(int, nscan)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->agno = agno;
+ __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+ __entry->free = free;
+ __entry->nscan = nscan;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->agno,
+ __entry->streams,
+ __entry->free,
+ __entry->nscan)
+);
+
DECLARE_EVENT_CLASS(xfs_lock_class,
TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
unsigned long caller_ip),
@@ -603,6 +661,8 @@ DEFINE_INODE_EVENT(xfs_readlink);
DEFINE_INODE_EVENT(xfs_inactive_symlink);
DEFINE_INODE_EVENT(xfs_alloc_file_space);
DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_zero_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
DEFINE_INODE_EVENT(xfs_readdir);
#ifdef CONFIG_XFS_POSIX_ACL
DEFINE_INODE_EVENT(xfs_get_acl);
@@ -1058,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c060de..30e8e3410955 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -190,7 +190,7 @@ xfs_trans_reserve(
-((int64_t)blocks), rsvd);
if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- return (XFS_ERROR(ENOSPC));
+ return -ENOSPC;
}
tp->t_blk_res += blocks;
}
@@ -241,7 +241,7 @@ xfs_trans_reserve(
error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
-((int64_t)rtextents), rsvd);
if (error) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto undo_log;
}
tp->t_rtx_res += rtextents;
@@ -827,7 +827,7 @@ xfs_trans_committed_bulk(
xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
spin_lock(&ailp->xa_lock);
- xfs_trans_ail_cursor_done(ailp, &cur);
+ xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
}
@@ -874,7 +874,7 @@ xfs_trans_commit(
goto out_unreserve;
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_unreserve;
}
@@ -887,12 +887,7 @@ xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
- if (error == ENOMEM) {
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- error = XFS_ERROR(EIO);
- goto out_unreserve;
- }
+ xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
* log out now and wait for it.
*/
if (sync) {
- if (!error) {
- error = _xfs_log_force_lsn(mp, commit_lsn,
- XFS_LOG_SYNC, NULL);
- }
+ error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
XFS_STATS_INC(xs_trans_sync);
} else {
XFS_STATS_INC(xs_trans_async);
@@ -925,7 +917,7 @@ out_unreserve:
if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
if (commit_lsn == -1 && !error)
- error = XFS_ERROR(EIO);
+ error = -EIO;
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
@@ -1032,7 +1024,7 @@ xfs_trans_roll(
*/
error = xfs_trans_commit(trans, 0);
if (error)
- return (error);
+ return error;
trans = *tpp;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9b96d35e483d..b5bc1ab3c4da 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -64,7 +64,7 @@ typedef struct xfs_log_item {
struct xfs_item_ops {
void (*iop_size)(xfs_log_item_t *, int *, int *);
- void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
+ void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);
void (*iop_pin)(xfs_log_item_t *);
void (*iop_unpin)(xfs_log_item_t *, int remove);
uint (*iop_push)(struct xfs_log_item *, struct list_head *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a7287354e535..859482f53b5a 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -173,7 +173,6 @@ xfs_trans_ail_cursor_next(
*/
void
xfs_trans_ail_cursor_done(
- struct xfs_ail *ailp,
struct xfs_ail_cursor *cur)
{
cur->item = NULL;
@@ -368,7 +367,7 @@ xfsaild_push(
* If the AIL is empty or our push has reached the end we are
* done now.
*/
- xfs_trans_ail_cursor_done(ailp, &cur);
+ xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
goto out_done;
}
@@ -453,7 +452,7 @@ xfsaild_push(
break;
lsn = lip->li_lsn;
}
- xfs_trans_ail_cursor_done(ailp, &cur);
+ xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
@@ -763,7 +762,7 @@ xfs_trans_ail_init(
ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
if (!ailp)
- return ENOMEM;
+ return -ENOMEM;
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
@@ -782,7 +781,7 @@ xfs_trans_ail_init(
out_free_ailp:
kmem_free(ailp);
- return ENOMEM;
+ return -ENOMEM;
}
void
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..96c898e7ac9a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -166,7 +166,7 @@ xfs_trans_get_buf_map(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
trace_xfs_trans_get_buf_recur(bip);
- return (bp);
+ return bp;
}
bp = xfs_buf_get_map(target, map, nmaps, flags);
@@ -178,7 +178,7 @@ xfs_trans_get_buf_map(
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
- return (bp);
+ return bp;
}
/*
@@ -201,9 +201,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
* Default to just trying to lock the superblock buffer
* if tp is NULL.
*/
- if (tp == NULL) {
- return (xfs_getsb(mp, flags));
- }
+ if (tp == NULL)
+ return xfs_getsb(mp, flags);
/*
* If the superblock buffer already has this transaction
@@ -218,7 +217,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
trace_xfs_trans_getsb_recur(bip);
- return (bp);
+ return bp;
}
bp = xfs_getsb(mp, flags);
@@ -227,7 +226,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_getsb(bp->b_fspriv);
- return (bp);
+ return bp;
}
#ifdef DEBUG
@@ -267,7 +266,7 @@ xfs_trans_read_buf_map(
bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
if (!bp)
return (flags & XBF_TRYLOCK) ?
- EAGAIN : XFS_ERROR(ENOMEM);
+ -EAGAIN : -ENOMEM;
if (bp->b_error) {
error = bp->b_error;
@@ -275,6 +274,10 @@ xfs_trans_read_buf_map(
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
xfs_buf_relse(bp);
+
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -283,7 +286,7 @@ xfs_trans_read_buf_map(
if (((xfs_req_num++) % xfs_error_mod) == 0) {
xfs_buf_relse(bp);
xfs_debug(mp, "Returning error!");
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
}
@@ -314,7 +317,18 @@ xfs_trans_read_buf_map(
ASSERT(bp->b_iodone == NULL);
XFS_BUF_READ(bp);
bp->b_ops = ops;
- xfsbdstrat(tp->t_mountp, bp);
+
+ /*
+ * XXX(hch): clean up the error handling here to be less
+ * of a mess..
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ trace_xfs_bdstrat_shut(bp, _RET_IP_);
+ xfs_bioerror_relse(bp);
+ } else {
+ xfs_buf_iorequest(bp);
+ }
+
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
@@ -327,6 +341,9 @@ xfs_trans_read_buf_map(
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp,
SHUTDOWN_META_IO_ERROR);
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
}
@@ -337,7 +354,7 @@ xfs_trans_read_buf_map(
if (XFS_FORCED_SHUTDOWN(mp)) {
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
*bpp = NULL;
- return XFS_ERROR(EIO);
+ return -EIO;
}
@@ -354,7 +371,7 @@ xfs_trans_read_buf_map(
if (bp == NULL) {
*bpp = NULL;
return (flags & XBF_TRYLOCK) ?
- 0 : XFS_ERROR(ENOMEM);
+ 0 : -ENOMEM;
}
if (bp->b_error) {
error = bp->b_error;
@@ -364,6 +381,10 @@ xfs_trans_read_buf_map(
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
+
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -374,7 +395,7 @@ xfs_trans_read_buf_map(
SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
xfs_debug(mp, "Returning trans error!");
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
}
@@ -392,7 +413,7 @@ shutdown_abort:
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
xfs_buf_relse(bp);
*bpp = NULL;
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index cd2a10e15d3a..846e061c2e98 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -295,8 +295,8 @@ xfs_trans_mod_dquot(
/*
* Given an array of dqtrx structures, lock all the dquots associated and join
* them to the transaction, provided they have been modified. We know that the
- * highest number of dquots of one type - usr, grp OR prj - involved in a
- * transaction is 2 so we don't need to make this very generic.
+ * highest number of dquots of one type - usr, grp and prj - involved in a
+ * transaction is 3 so we don't need to make this very generic.
*/
STATIC void
xfs_trans_dqlockedjoin(
@@ -722,8 +722,8 @@ xfs_trans_dqresv(
error_return:
xfs_dqunlock(dqp);
if (flags & XFS_QMOPT_ENOSPC)
- return ENOSPC;
- return EDQUOT;
+ return -ENOSPC;
+ return -EDQUOT;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 12e86af9d9b9..bd1281862ad7 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -133,8 +133,7 @@ struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
xfs_lsn_t lsn);
struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
-void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur);
+void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
#if BITS_PER_LONG != 64
static inline void
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 82bbc34d54a3..b79dc66b2ecd 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -38,43 +38,18 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
-/*
- * These types are 64 bits on disk but are either 32 or 64 bits in memory.
- * Disk based types:
- */
-typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */
-typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */
-typedef __uint64_t xfs_dfiloff_t; /* block number in a file */
-typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */
-
-/*
- * Memory based types are conditional.
- */
-#if XFS_BIG_BLKNOS
typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-#else
-typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
-typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-#endif
typedef __uint64_t xfs_fileoff_t; /* block number in a file */
-typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
/*
* Null values for the types.
*/
-#define NULLDFSBNO ((xfs_dfsbno_t)-1)
-#define NULLDRFSBNO ((xfs_drfsbno_t)-1)
-#define NULLDRTBNO ((xfs_drtbno_t)-1)
-#define NULLDFILOFF ((xfs_dfiloff_t)-1)
-
#define NULLFSBLOCK ((xfs_fsblock_t)-1)
#define NULLRFSBLOCK ((xfs_rfsblock_t)-1)
#define NULLRTBLOCK ((xfs_rtblock_t)-1)
@@ -134,7 +109,7 @@ typedef enum {
typedef enum {
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
- XFS_BTNUM_MAX
+ XFS_BTNUM_FINOi, XFS_BTNUM_MAX
} xfs_btnum_t;
struct xfs_name {
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
deleted file mode 100644
index 3e8e797c6d11..000000000000
--- a/fs/xfs/xfs_vnode.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct attrlist_cursor_kern;
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT 0x00004 /* bypass page cache */
-#define IO_INVIS 0x00020 /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
- { IO_ISDIRECT, "DIRECT" }, \
- { IO_INVIS, "INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE 0 /* none */
-#define FI_REMAPF 1 /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation.
- Prevent VM access to the pages until
- the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp) (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
- PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 9d479073ba41..93455b998041 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -49,7 +49,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
value = NULL;
}
- error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+ error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
if (error)
return error;
return asize;
@@ -71,8 +71,8 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
xflags |= ATTR_REPLACE;
if (!value)
- return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
- return -xfs_attr_set(ip, (unsigned char *)name,
+ return xfs_attr_remove(ip, (unsigned char *)name, xflags);
+ return xfs_attr_set(ip, (unsigned char *)name,
(void *)value, size, xflags);
}
@@ -102,8 +102,8 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
#ifdef CONFIG_XFS_POSIX_ACL
- &xfs_xattr_acl_access_handler,
- &xfs_xattr_acl_default_handler,
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
#endif
NULL
};