diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/xfs/Makefile | 71 | ||||
-rw-r--r-- | fs/xfs/kmem.c | 21 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h (renamed from fs/xfs/xfs_ag.h) | 42 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c (renamed from fs/xfs/xfs_alloc.c) | 78 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.h (renamed from fs/xfs/xfs_alloc.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.c (renamed from fs/xfs/xfs_alloc_btree.c) | 19 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.h (renamed from fs/xfs/xfs_alloc_btree.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c (renamed from fs/xfs/xfs_attr.c) | 444 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c (renamed from fs/xfs/xfs_attr_leaf.c) | 298 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.h (renamed from fs/xfs/xfs_attr_leaf.h) | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.c (renamed from fs/xfs/xfs_attr_remote.c) | 101 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.h (renamed from fs/xfs/xfs_attr_remote.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_sf.h (renamed from fs/xfs/xfs_attr_sf.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bit.h (renamed from fs/xfs/xfs_bit.h) | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c (renamed from fs/xfs/xfs_bmap.c) | 506 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h (renamed from fs/xfs/xfs_bmap.h) | 23 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c (renamed from fs/xfs/xfs_bmap_btree.c) | 120 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.h (renamed from fs/xfs/xfs_bmap_btree.h) | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c (renamed from fs/xfs/xfs_btree.c) | 198 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.h (renamed from fs/xfs/xfs_btree.h) | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_cksum.h (renamed from fs/xfs/xfs_cksum.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c (renamed from fs/xfs/xfs_da_btree.c) | 241 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.h (renamed from fs/xfs/xfs_da_btree.h) | 28 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.c (renamed from fs/xfs/xfs_da_format.c) | 36 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.h (renamed from fs/xfs/xfs_da_format.h) | 154 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dinode.h (renamed from fs/xfs/xfs_dinode.h) | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.c (renamed from fs/xfs/xfs_dir2.c) | 470 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.h (renamed from fs/xfs/xfs_dir2.h) | 30 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_block.c (renamed from fs/xfs/xfs_dir2_block.c) | 130 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_data.c (renamed from fs/xfs/xfs_dir2_data.c) | 111 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_leaf.c (renamed from fs/xfs/xfs_dir2_leaf.c) | 241 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_node.c (renamed from fs/xfs/xfs_dir2_node.c) | 271 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_priv.h (renamed from fs/xfs/xfs_dir2_priv.h) | 142 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_sf.c (renamed from fs/xfs/xfs_dir2_sf.c) | 168 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dquot_buf.c (renamed from fs/xfs/xfs_dquot_buf.c) | 20 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_format.h (renamed from fs/xfs/xfs_format.h) | 30 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c (renamed from fs/xfs/xfs_ialloc.c) | 819 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.h (renamed from fs/xfs/xfs_ialloc.h) | 23 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c (renamed from fs/xfs/xfs_ialloc_btree.c) | 87 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.h (renamed from fs/xfs/xfs_ialloc_btree.h) | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.c (renamed from fs/xfs/xfs_inode_buf.c) | 34 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.h (renamed from fs/xfs/xfs_inode_buf.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.c (renamed from fs/xfs/xfs_inode_fork.c) | 56 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.h (renamed from fs/xfs/xfs_inode_fork.h) | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inum.h (renamed from fs/xfs/xfs_inum.h) | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_format.h (renamed from fs/xfs/xfs_log_format.h) | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_recover.h (renamed from fs/xfs/xfs_log_recover.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_rlimit.c (renamed from fs/xfs/xfs_log_rlimit.c) | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_quota_defs.h (renamed from fs/xfs/xfs_quota_defs.h) | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rtbitmap.c (renamed from fs/xfs/xfs_rtbitmap.c) | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c (renamed from fs/xfs/xfs_sb.c) | 116 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.h (renamed from fs/xfs/xfs_sb.h) | 245 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_shared.h (renamed from fs/xfs/xfs_shared.h) | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_symlink_remote.c (renamed from fs/xfs/xfs_symlink_remote.c) | 21 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.c (renamed from fs/xfs/xfs_trans_resv.c) | 146 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.h (renamed from fs/xfs/xfs_trans_resv.h) | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_space.h (renamed from fs/xfs/xfs_trans_space.h) | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.c | 161 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 290 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_inactive.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_list.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 389 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 161 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 54 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 152 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_readdir.c | 161 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 107 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c | 67 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_error.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_error.h | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_export.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 276 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 684 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.h | 34 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 89 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 160 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_icreate_item.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 447 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 378 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 306 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.c | 110 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 69 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 291 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 589 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.h | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 143 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 57 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 195 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 335 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 159 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.c | 163 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 452 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c | 66 | ||||
-rw-r--r-- | fs/xfs/xfs_quota_priv.h | 42 | ||||
-rw-r--r-- | fs/xfs/xfs_quotaops.c | 41 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.h | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 155 | ||||
-rw-r--r-- | fs/xfs/xfs_super.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 38 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 165 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.h | 59 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 61 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 24 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 49 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_priv.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_types.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_vnode.h | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_xattr.c | 10 |
133 files changed, 7101 insertions, 6284 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 399e8cec6e60..5d47b4df61ea 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,6 +1,7 @@ config XFS_FS tristate "XFS filesystem support" depends on BLOCK + depends on (64BIT || LBDAF) select EXPORTFS select LIBCRC32C help diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c21f43506661..d61799949580 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -17,6 +17,7 @@ # ccflags-y += -I$(src) # needed for trace events +ccflags-y += -I$(src)/libxfs ccflags-$(CONFIG_XFS_DEBUG) += -g @@ -25,6 +26,39 @@ obj-$(CONFIG_XFS_FS) += xfs.o # this one should be compiled first, as the tracing macros can easily blow up xfs-y += xfs_trace.o +# build the libxfs code first +xfs-y += $(addprefix libxfs/, \ + xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_attr_remote.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_da_btree.o \ + xfs_da_format.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dquot_buf.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_inode_fork.o \ + xfs_inode_buf.o \ + xfs_log_rlimit.o \ + xfs_sb.o \ + xfs_symlink_remote.o \ + xfs_trans_resv.o \ + ) +# xfs_rtbitmap is shared with libxfs +xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \ + xfs_rtbitmap.o \ + ) + # highlevel code xfs-y += xfs_aops.o \ xfs_attr_inactive.o \ @@ -45,53 +79,27 @@ xfs-y += xfs_aops.o \ xfs_ioctl.o \ xfs_iomap.o \ xfs_iops.o \ + xfs_inode.o \ xfs_itable.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ xfs_super.o \ xfs_symlink.o \ + xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ kmem.o \ uuid.o -# code shared with libxfs -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_attr_remote.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_da_btree.o \ - xfs_da_format.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dquot_buf.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_icreate_item.o \ - xfs_inode.o \ - xfs_inode_fork.o \ - xfs_inode_buf.o \ - xfs_log_recover.o \ - xfs_log_rlimit.o \ - xfs_sb.o \ - xfs_symlink_remote.o \ - xfs_trans_resv.o - # low-level transaction/log code xfs-y += xfs_log.o \ xfs_log_cil.o \ xfs_buf_item.o \ xfs_extfree_item.o \ + xfs_icreate_item.o \ xfs_inode_item.o \ + xfs_log_recover.o \ xfs_trans_ail.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ @@ -107,8 +115,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_quotaops.o # xfs_rtbitmap is shared with libxfs -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ - xfs_rtbitmap.o +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 66a36befc5c0..844e288b9576 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) void * kmem_zalloc_large(size_t size, xfs_km_flags_t flags) { + unsigned noio_flag = 0; void *ptr; + gfp_t lflags; ptr = kmem_zalloc(size, flags | KM_MAYFAIL); if (ptr) return ptr; - return vzalloc(size); + + /* + * __vmalloc() will allocate data pages and auxillary structures (e.g. + * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context + * here. Hence we need to tell memory reclaim that we are in such a + * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering + * the filesystem here and potentially deadlocking. + */ + if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + noio_flag = memalloc_noio_save(); + + lflags = kmem_flags_convert(flags); + ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + + if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + memalloc_noio_restore(noio_flag); + + return ptr; } void diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 3fc109819c34..6e247a99f5db 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -89,6 +89,8 @@ typedef struct xfs_agf { /* structure must be padded to 64 bit alignment */ } xfs_agf_t; +#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) + #define XFS_AGF_MAGICNUM 0x00000001 #define XFS_AGF_VERSIONNUM 0x00000002 #define XFS_AGF_SEQNO 0x00000004 @@ -158,28 +160,38 @@ typedef struct xfs_agi { * still being referenced. */ __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; - + /* + * This marks the end of logging region 1 and start of logging region 2. + */ uuid_t agi_uuid; /* uuid of filesystem */ __be32 agi_crc; /* crc of agi sector */ __be32 agi_pad32; __be64 agi_lsn; /* last write sequence */ + __be32 agi_free_root; /* root of the free inode btree */ + __be32 agi_free_level;/* levels in free inode btree */ + /* structure must be padded to 64 bit alignment */ } xfs_agi_t; -#define XFS_AGI_MAGICNUM 0x00000001 -#define XFS_AGI_VERSIONNUM 0x00000002 -#define XFS_AGI_SEQNO 0x00000004 -#define XFS_AGI_LENGTH 0x00000008 -#define XFS_AGI_COUNT 0x00000010 -#define XFS_AGI_ROOT 0x00000020 -#define XFS_AGI_LEVEL 0x00000040 -#define XFS_AGI_FREECOUNT 0x00000080 -#define XFS_AGI_NEWINO 0x00000100 -#define XFS_AGI_DIRINO 0x00000200 -#define XFS_AGI_UNLINKED 0x00000400 -#define XFS_AGI_NUM_BITS 11 -#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) +#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) + +#define XFS_AGI_MAGICNUM (1 << 0) +#define XFS_AGI_VERSIONNUM (1 << 1) +#define XFS_AGI_SEQNO (1 << 2) +#define XFS_AGI_LENGTH (1 << 3) +#define XFS_AGI_COUNT (1 << 4) +#define XFS_AGI_ROOT (1 << 5) +#define XFS_AGI_LEVEL (1 << 6) +#define XFS_AGI_FREECOUNT (1 << 7) +#define XFS_AGI_NEWINO (1 << 8) +#define XFS_AGI_DIRINO (1 << 9) +#define XFS_AGI_UNLINKED (1 << 10) +#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ +#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) +#define XFS_AGI_FREE_ROOT (1 << 11) +#define XFS_AGI_FREE_LEVEL (1 << 12) +#define XFS_AGI_NUM_BITS_R2 13 /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) @@ -222,6 +234,8 @@ typedef struct xfs_agfl { __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ } xfs_agfl_t; +#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) + /* * tags for inode radix tree */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9eab2dfdcbb5..4bffffe038a1 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -257,16 +257,14 @@ xfs_alloc_fix_len( k = rlen % args->prod; if (k == args->mod) return; - if (k > args->mod) { - if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) - return; - } else { - if ((int)(rlen = rlen - args->prod - (args->mod - k)) < - (int)args->minlen) - return; - } - ASSERT(rlen >= args->minlen); - ASSERT(rlen <= args->maxlen); + if (k > args->mod) + rlen = rlen - (k - args->mod); + else + rlen = rlen - args->prod + (args->mod - k); + if ((int)rlen < (int)args->minlen) + return; + ASSERT(rlen >= args->minlen && rlen <= args->maxlen); + ASSERT(rlen % args->prod == args->mod); args->len = rlen; } @@ -474,7 +472,6 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agfl_ok = 1; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -485,15 +482,13 @@ xfs_agfl_read_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agfl, agfl_crc)); + if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_agfl_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); - agfl_ok = agfl_ok && xfs_agfl_verify(bp); - - if (!agfl_ok) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -508,16 +503,15 @@ xfs_agfl_write_verify( return; if (!xfs_agfl_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } if (bip) XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agfl, agfl_crc)); + xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); } const struct xfs_buf_ops xfs_agfl_buf_ops = { @@ -545,7 +539,6 @@ xfs_alloc_read_agfl( XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; @@ -566,7 +559,7 @@ xfs_alloc_update_counters( xfs_trans_agblocks_delta(tp, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) - return EFSCORRUPTED; + return -EFSCORRUPTED; xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); return 0; @@ -2238,19 +2231,17 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agf_ok = 1; - - if (xfs_sb_version_hascrc(&mp->m_sb)) - agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agf, agf_crc)); - agf_ok = agf_ok && xfs_agf_verify(mp, bp); - - if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF))) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, + XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -2261,8 +2252,8 @@ xfs_agf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -2272,8 +2263,7 @@ xfs_agf_write_verify( if (bip) XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agf, agf_crc)); + xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); } const struct xfs_buf_ops xfs_agf_buf_ops = { @@ -2611,11 +2601,11 @@ xfs_free_extent( */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); if (args.agno >= args.mp->m_sb.sb_agcount) - return EFSCORRUPTED; + return -EFSCORRUPTED; args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); if (args.agbno >= args.mp->m_sb.sb_agblocks) - return EFSCORRUPTED; + return -EFSCORRUPTED; args.pag = xfs_perag_get(args.mp, args.agno); ASSERT(args.pag); @@ -2627,7 +2617,7 @@ xfs_free_extent( /* validate the extent size is legal now we have the agf locked */ if (args.agbno + len > be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index feacb061bab7..feacb061bab7 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 13085429e523..e0e83e24d3ef 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -70,7 +70,6 @@ xfs_allocbt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { int error; @@ -355,12 +354,14 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_sblock_verify_crc(bp) && - xfs_allocbt_verify(bp))) { + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_allocbt_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); } } @@ -370,9 +371,9 @@ xfs_allocbt_write_verify( { if (!xfs_allocbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 45e189e7e81c..45e189e7e81c 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index b86127072ac3..353fb425faef 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -77,17 +77,27 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state); STATIC int -xfs_attr_name_to_xname( - struct xfs_name *xname, - const unsigned char *aname) +xfs_attr_args_init( + struct xfs_da_args *args, + struct xfs_inode *dp, + const unsigned char *name, + int flags) { - if (!aname) - return EINVAL; - xname->name = aname; - xname->len = strlen((char *)aname); - if (xname->len >= MAXNAMELEN) - return EFAULT; /* match IRIX behaviour */ + if (!name) + return -EINVAL; + + memset(args, 0, sizeof(*args)); + args->geo = dp->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->dp = dp; + args->flags = flags; + args->name = name; + args->namelen = strlen((const char *)name); + if (args->namelen >= MAXNAMELEN) + return -EFAULT; /* match IRIX behaviour */ + + args->hashval = xfs_da_hashname(args->name, args->namelen); return 0; } @@ -106,78 +116,46 @@ xfs_inode_hasattr( * Overall external interface routines. *========================================================================*/ -STATIC int -xfs_attr_get_int( +int +xfs_attr_get( struct xfs_inode *ip, - struct xfs_name *name, + const unsigned char *name, unsigned char *value, int *valuelenp, int flags) { - xfs_da_args_t args; - int error; + struct xfs_da_args args; + uint lock_mode; + int error; + + XFS_STATS_INC(xs_attr_get); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; if (!xfs_inode_hasattr(ip)) - return ENOATTR; + return -ENOATTR; + + error = xfs_attr_args_init(&args, ip, name, flags); + if (error) + return error; - /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; args.value = value; args.valuelen = *valuelenp; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = ip; - args.whichfork = XFS_ATTR_FORK; - /* - * Decide on what work routines to call based on the inode size. - */ - if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + lock_mode = xfs_ilock_attr_map_shared(ip); + if (!xfs_inode_hasattr(ip)) + error = -ENOATTR; + else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) error = xfs_attr_shortform_getvalue(&args); - } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { + else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) error = xfs_attr_leaf_get(&args); - } else { + else error = xfs_attr_node_get(&args); - } + xfs_iunlock(ip, lock_mode); - /* - * Return the number of bytes in the value to the caller. - */ *valuelenp = args.valuelen; - - if (error == EEXIST) - error = 0; - return(error); -} - -int -xfs_attr_get( - xfs_inode_t *ip, - const unsigned char *name, - unsigned char *value, - int *valuelenp, - int flags) -{ - int error; - struct xfs_name xname; - - XFS_STATS_INC(xs_attr_get); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return(EIO); - - error = xfs_attr_name_to_xname(&xname, name); - if (error) - return error; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return(error); + return error == -EEXIST ? 0 : error; } /* @@ -185,12 +163,10 @@ xfs_attr_get( */ STATIC int xfs_attr_calc_size( - struct xfs_inode *ip, - int namelen, - int valuelen, + struct xfs_da_args *args, int *local) { - struct xfs_mount *mp = ip->i_mount; + struct xfs_mount *mp = args->dp->i_mount; int size; int nblks; @@ -198,12 +174,10 @@ xfs_attr_calc_size( * Determine space new attribute will use, and if it would be * "local" or "remote" (note: local != inline). */ - size = xfs_attr_leaf_newentsize(namelen, valuelen, - mp->m_sb.sb_blocksize, local); - + size = xfs_attr_leaf_newentsize(args, local); nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); if (*local) { - if (size > (mp->m_sb.sb_blocksize >> 1)) { + if (size > (args->geo->blksize / 2)) { /* Double split possible */ nblks *= 2; } @@ -212,7 +186,7 @@ xfs_attr_calc_size( * Out of line attribute, cannot double split, but * make room for the attribute value itself. */ - uint dblocks = XFS_B_TO_FSB(mp, valuelen); + uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen); nblks += dblocks; nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); } @@ -220,26 +194,38 @@ xfs_attr_calc_size( return nblks; } -STATIC int -xfs_attr_set_int( - struct xfs_inode *dp, - struct xfs_name *name, - unsigned char *value, - int valuelen, - int flags) +int +xfs_attr_set( + struct xfs_inode *dp, + const unsigned char *name, + unsigned char *value, + int valuelen, + int flags) { - xfs_da_args_t args; - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - int error, err2, committed; struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; struct xfs_trans_res tres; + xfs_fsblock_t firstblock; int rsvd = (flags & ATTR_ROOT) != 0; - int local; + int error, err2, committed, local; + + XFS_STATS_INC(xs_attr_set); + + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + return -EIO; + + error = xfs_attr_args_init(&args, dp, name, flags); + if (error) + return error; + + args.value = value; + args.valuelen = valuelen; + args.firstblock = &firstblock; + args.flist = &flist; + args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + args.total = xfs_attr_calc_size(&args, &local); - /* - * Attach the dquots to the inode. - */ error = xfs_qm_dqattach(dp, 0); if (error) return error; @@ -250,32 +236,14 @@ xfs_attr_set_int( */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + - XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen); + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); - if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) - return(error); + error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + if (error) + return error; } /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; - args.value = value; - args.valuelen = valuelen; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = dp; - args.firstblock = &firstblock; - args.flist = &flist; - args.whichfork = XFS_ATTR_FORK; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - - /* Size is now blocks for attribute data */ - args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); - - /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off @@ -302,7 +270,7 @@ xfs_attr_set_int( error = xfs_trans_reserve(args.trans, &tres, args.total, 0); if (error) { xfs_trans_cancel(args.trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -312,7 +280,7 @@ xfs_attr_set_int( if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); - return (error); + return error; } xfs_trans_ijoin(args.trans, dp, 0); @@ -321,9 +289,9 @@ xfs_attr_set_int( * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ - if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || - ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) && - (dp->i_d.di_anextents == 0))) { + if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || + (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). @@ -336,7 +304,7 @@ xfs_attr_set_int( * the inode. */ error = xfs_attr_shortform_addname(&args); - if (error != ENOSPC) { + if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). @@ -348,9 +316,8 @@ xfs_attr_set_int( * the transaction goes to disk before returning * to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, @@ -360,7 +327,7 @@ xfs_attr_set_int( XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error == 0 ? err2 : error); + return error ? error : err2; } /* @@ -398,22 +365,19 @@ xfs_attr_set_int( } - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); - } else { + else error = xfs_attr_node_addname(&args); - } - if (error) { + if (error) goto out; - } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -425,65 +389,47 @@ xfs_attr_set_int( error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: - if (args.trans) + if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + } xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; } +/* + * Generic handler routine to remove a name from an attribute list. + * Transitions attribute list from Btree to shortform as necessary. + */ int -xfs_attr_set( - xfs_inode_t *dp, - const unsigned char *name, - unsigned char *value, - int valuelen, - int flags) +xfs_attr_remove( + struct xfs_inode *dp, + const unsigned char *name, + int flags) { - int error; - struct xfs_name xname; + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args; + struct xfs_bmap_free flist; + xfs_fsblock_t firstblock; + int error; - XFS_STATS_INC(xs_attr_set); + XFS_STATS_INC(xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return (EIO); + return -EIO; + + if (!xfs_inode_hasattr(dp)) + return -ENOATTR; - error = xfs_attr_name_to_xname(&xname, name); + error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; - return xfs_attr_set_int(dp, &xname, value, valuelen, flags); -} - -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -STATIC int -xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) -{ - xfs_da_args_t args; - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - int error; - xfs_mount_t *mp = dp->i_mount; - - /* - * Fill in the arg structure for this request. - */ - memset((char *)&args, 0, sizeof(args)); - args.name = name->name; - args.namelen = name->len; - args.flags = flags; - args.hashval = xfs_da_hashname(args.name, args.namelen); - args.dp = dp; args.firstblock = &firstblock; args.flist = &flist; - args.total = 0; - args.whichfork = XFS_ATTR_FORK; /* * we have no control over the attribute names that userspace passes us @@ -492,9 +438,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) */ args.op_flags = XFS_DA_OP_OKNOENT; - /* - * Attach the dquots to the inode. - */ error = xfs_qm_dqattach(dp, 0); if (error) return error; @@ -523,7 +466,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) XFS_ATTRRM_SPACE_RES(mp), 0); if (error) { xfs_trans_cancel(args.trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -533,35 +476,26 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) */ xfs_trans_ijoin(args.trans, dp, 0); - /* - * Decide on what work routines to call based on the inode size. - */ if (!xfs_inode_hasattr(dp)) { - error = XFS_ERROR(ENOATTR); - goto out; - } - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + error = -ENOATTR; + } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(&args); - if (error) { - goto out; - } } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { error = xfs_attr_leaf_removename(&args); } else { error = xfs_attr_node_removename(&args); } - if (error) { + + if (error) goto out; - } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); - } if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -573,45 +507,17 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: - if (args.trans) + if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); -} - -int -xfs_attr_remove( - xfs_inode_t *dp, - const unsigned char *name, - int flags) -{ - int error; - struct xfs_name xname; - - XFS_STATS_INC(xs_attr_remove); - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return (EIO); - - error = xfs_attr_name_to_xname(&xname, name); - if (error) - return error; - - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!xfs_inode_hasattr(dp)) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return XFS_ERROR(ENOATTR); } - xfs_iunlock(dp, XFS_ILOCK_SHARED); - - return xfs_attr_remove_int(dp, &xname, flags); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + return error; } - /*======================================================================== * External routines when attribute list is inside the inode *========================================================================*/ @@ -628,28 +534,28 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) trace_xfs_attr_sf_addname(args); retval = xfs_attr_shortform_lookup(args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - return(retval); - } else if (retval == EEXIST) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + return retval; + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) - return(retval); + return retval; retval = xfs_attr_shortform_remove(args); ASSERT(retval == 0); } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return(XFS_ERROR(ENOSPC)); + return -ENOSPC; newsize = XFS_ATTR_SF_TOTSIZE(args->dp); newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); if (!forkoff) - return(XFS_ERROR(ENOSPC)); + return -ENOSPC; xfs_attr_shortform_add(args, forkoff); - return(0); + return 0; } @@ -686,10 +592,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * the given flags produce an error or call for an atomic rename. */ retval = xfs_attr3_leaf_lookup_int(bp, args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { xfs_trans_brelse(args->trans, bp); return retval; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) { /* pure create op */ xfs_trans_brelse(args->trans, bp); return retval; @@ -697,11 +603,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) trace_xfs_attr_leaf_replace(args); + /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ + args->rmtblkno = 0; + args->rmtblkcnt = 0; + args->rmtvaluelen = 0; } /* @@ -709,7 +626,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * if required. */ retval = xfs_attr3_leaf_add(bp, args); - if (retval == ENOSPC) { + if (retval == -ENOSPC) { /* * Promote the attribute list to the Btree format, then * Commit that transaction so that the node_addname() call @@ -725,7 +642,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -741,13 +658,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; /* * Fob the whole rest of the problem off on the Btree code. */ error = xfs_attr_node_addname(args); - return(error); + return error; } /* @@ -756,7 +673,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; /* * If there was an out-of-line value, allocate the blocks we @@ -767,7 +684,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) - return(error); + return error; } /* @@ -783,7 +700,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_attr3_leaf_flipflags(args); if (error) - return(error); + return error; /* * Dismantle the "old" attribute/value pair by removing @@ -793,10 +710,11 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) - return(error); + return error; } /* @@ -826,7 +744,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -877,7 +795,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return error; error = xfs_attr3_leaf_lookup_int(bp, args); - if (error == ENOATTR) { + if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; } @@ -932,7 +850,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args) return error; error = xfs_attr3_leaf_lookup_int(bp, args); - if (error != EEXIST) { + if (error != -EEXIST) { xfs_trans_brelse(args->trans, bp); return error; } @@ -945,7 +863,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args) } /*======================================================================== - * External routines when attribute list size > XFS_LBSIZE(mp). + * External routines when attribute list size > geo->blksize *========================================================================*/ /* @@ -978,8 +896,6 @@ restart: state = xfs_da_state_alloc(); state->args = args; state->mp = mp; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name already exists, and get back a pointer @@ -990,25 +906,34 @@ restart: goto out; blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { goto out; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) goto out; trace_xfs_attr_node_replace(args); + /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ args->rmtblkno = 0; args->rmtblkcnt = 0; + args->rmtvaluelen = 0; } retval = xfs_attr3_leaf_add(blk->bp, state->args); - if (retval == ENOSPC) { + if (retval == -ENOSPC) { if (state->path.active == 1) { /* * Its really a single leaf node, but it had @@ -1106,7 +1031,7 @@ restart: if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) - return(error); + return error; } /* @@ -1132,10 +1057,11 @@ restart: args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) - return(error); + return error; } /* @@ -1147,8 +1073,6 @@ restart: state = xfs_da_state_alloc(); state->args = args; state->mp = mp; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; state->inleaf = 0; error = xfs_da3_node_lookup_int(state, &retval); if (error) @@ -1210,8 +1134,8 @@ out: if (state) xfs_da_state_free(state); if (error) - return(error); - return(retval); + return error; + return retval; } /* @@ -1239,14 +1163,12 @@ xfs_attr_node_removename(xfs_da_args_t *args) state = xfs_da_state_alloc(); state->args = args; state->mp = dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); - if (error || (retval != EEXIST)) { + if (error || (retval != -EEXIST)) { if (error == 0) error = retval; goto out; @@ -1375,7 +1297,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) out: xfs_da_state_free(state); - return(error); + return error; } /* @@ -1423,7 +1345,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) } } - return(0); + return 0; } /* @@ -1454,7 +1376,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); if (error) - return(error); + return error; } else { blk->bp = NULL; } @@ -1473,13 +1395,13 @@ xfs_attr_refillstate(xfs_da_state_t *state) blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); if (error) - return(error); + return error; } else { blk->bp = NULL; } } - return(0); + return 0; } /* @@ -1502,8 +1424,6 @@ xfs_attr_node_get(xfs_da_args_t *args) state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_sb.sb_blocksize; - state->node_ents = state->mp->m_attr_node_ents; /* * Search to see if name exists, and get back a pointer to it. @@ -1511,7 +1431,7 @@ xfs_attr_node_get(xfs_da_args_t *args) error = xfs_da3_node_lookup_int(state, &retval); if (error) { retval = error; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->bp != NULL); ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1535,5 +1455,5 @@ xfs_attr_node_get(xfs_da_args_t *args) } xfs_da_state_free(state); - return(retval); + return retval; } diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 7b126f46a2f9..b1f73dbbf3d8 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -80,11 +80,12 @@ STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state, /* * Utility routines. */ -STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf, +STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, + struct xfs_attr_leafblock *src_leaf, struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, struct xfs_attr_leafblock *dst_leaf, struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, - int move_count, struct xfs_mount *mp); + int move_count); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); void @@ -213,8 +214,8 @@ xfs_attr3_leaf_write_verify( struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_attr3_leaf_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -224,7 +225,7 @@ xfs_attr3_leaf_write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); } /* @@ -239,13 +240,14 @@ xfs_attr3_leaf_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_ATTR3_LEAF_CRC_OFF)) || - !xfs_attr3_leaf_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_attr3_leaf_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { @@ -545,7 +547,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) break; } if (i == end) - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; /* * Fix up the attribute fork data, covering the hole @@ -580,7 +582,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) xfs_sbversion_add_attr2(mp, args->trans); - return(0); + return 0; } /* @@ -609,9 +611,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) continue; if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; - return(XFS_ERROR(EEXIST)); + return -EEXIST; } - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; } /* @@ -638,18 +640,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) continue; if (args->flags & ATTR_KERNOVAL) { args->valuelen = sfe->valuelen; - return(XFS_ERROR(EEXIST)); + return -EEXIST; } if (args->valuelen < sfe->valuelen) { args->valuelen = sfe->valuelen; - return(XFS_ERROR(ERANGE)); + return -ERANGE; } args->valuelen = sfe->valuelen; memcpy(args->value, &sfe->nameval[args->namelen], args->valuelen); - return(XFS_ERROR(EEXIST)); + return -EEXIST; } - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; } /* @@ -689,7 +691,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) * If we hit an IO error middle of the transaction inside * grow_inode(), we may have inconsistent data. Bail out. */ - if (error == EIO) + if (error == -EIO) goto out; xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ @@ -710,6 +712,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) memset((char *)&nargs, 0, sizeof(nargs)); nargs.dp = dp; + nargs.geo = args->geo; nargs.firstblock = args->firstblock; nargs.flist = args->flist; nargs.total = args->total; @@ -727,9 +730,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe->namelen); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ - ASSERT(error == ENOATTR); + ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); if (error) goto out; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); @@ -738,7 +741,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) out: kmem_free(tmpbuffer); - return(error); + return error; } /* @@ -766,12 +769,12 @@ xfs_attr_shortform_allfit( if (entry->flags & XFS_ATTR_INCOMPLETE) continue; /* don't copy partial entries */ if (!(entry->flags & XFS_ATTR_LOCAL)) - return(0); + return 0; name_loc = xfs_attr3_leaf_name_local(leaf, i); if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return(0); + return 0; if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) - return(0); + return 0; bytes += sizeof(struct xfs_attr_sf_entry) - 1 + name_loc->namelen + be16_to_cpu(name_loc->valuelen); @@ -804,18 +807,18 @@ xfs_attr3_leaf_to_shortform( trace_xfs_attr_leaf_to_sf(args); - tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); if (!tmpbuffer) - return ENOMEM; + return -ENOMEM; - memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); leaf = (xfs_attr_leafblock_t *)tmpbuffer; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); entry = xfs_attr3_leaf_entryp(leaf); /* XXX (dgc): buffer is about to be marked stale - why zero it? */ - memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); + memset(bp->b_addr, 0, args->geo->blksize); /* * Clean out the prior contents of the attribute list. @@ -837,6 +840,7 @@ xfs_attr3_leaf_to_shortform( * Copy the attributes */ memset((char *)&nargs, 0, sizeof(nargs)); + nargs.geo = args->geo; nargs.dp = dp; nargs.firstblock = args->firstblock; nargs.flist = args->flist; @@ -903,12 +907,12 @@ xfs_attr3_leaf_to_node( /* copy leaf to new buffer, update identifiers */ xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); bp2->b_ops = bp1->b_ops; - memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp)); + memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; hdr3->blkno = cpu_to_be64(bp2->b_bn); } - xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); /* * Set up the new root node. @@ -929,7 +933,7 @@ xfs_attr3_leaf_to_node( btree[0].before = cpu_to_be32(blkno); icnodehdr.count = 1; dp->d_ops->node_hdr_to_disk(node, &icnodehdr); - xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); error = 0; out: return error; @@ -965,10 +969,10 @@ xfs_attr3_leaf_create( bp->b_ops = &xfs_attr3_leaf_buf_ops; xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF); leaf = bp->b_addr; - memset(leaf, 0, XFS_LBSIZE(mp)); + memset(leaf, 0, args->geo->blksize); memset(&ichdr, 0, sizeof(ichdr)); - ichdr.firstused = XFS_LBSIZE(mp); + ichdr.firstused = args->geo->blksize; if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_blkinfo *hdr3 = bp->b_addr; @@ -987,7 +991,7 @@ xfs_attr3_leaf_create( ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); - xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); *bpp = bp; return 0; @@ -1013,10 +1017,10 @@ xfs_attr3_leaf_split( ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); error = xfs_da_grow_inode(state->args, &blkno); if (error) - return(error); + return error; error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); if (error) - return(error); + return error; newblk->blkno = blkno; newblk->magic = XFS_ATTR_LEAF_MAGIC; @@ -1027,7 +1031,7 @@ xfs_attr3_leaf_split( xfs_attr3_leaf_rebalance(state, oldblk, newblk); error = xfs_da3_blk_link(state, oldblk, newblk); if (error) - return(error); + return error; /* * Save info on "old" attribute for "atomic rename" ops, leaf_add() @@ -1049,7 +1053,7 @@ xfs_attr3_leaf_split( */ oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); - return(error); + return error; } /* @@ -1073,8 +1077,7 @@ xfs_attr3_leaf_add( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); ASSERT(args->index >= 0 && args->index <= ichdr.count); - entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - args->trans->t_mountp->m_sb.sb_blocksize, NULL); + entsize = xfs_attr_leaf_newentsize(args, NULL); /* * Search through freemap for first-fit on new name length. @@ -1105,7 +1108,7 @@ xfs_attr3_leaf_add( * no good and we should just give up. */ if (!ichdr.holes && sum < entsize) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Compact the entries to coalesce free space. @@ -1118,7 +1121,7 @@ xfs_attr3_leaf_add( * free region, in freemap[0]. If it is not big enough, give up. */ if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { - tmp = ENOSPC; + tmp = -ENOSPC; goto out_log_hdr; } @@ -1173,17 +1176,14 @@ xfs_attr3_leaf_add_work( * Allocate space for the new string (at the end of the run). */ mp = args->trans->t_mountp; - ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp)); + ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize); ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0); ASSERT(ichdr->freemap[mapindex].size >= - xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - mp->m_sb.sb_blocksize, NULL)); - ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp)); + xfs_attr_leaf_newentsize(args, NULL)); + ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize); ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); - ichdr->freemap[mapindex].size -= - xfs_attr_leaf_newentsize(args->namelen, args->valuelen, - mp->m_sb.sb_blocksize, &tmp); + ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp); entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + ichdr->freemap[mapindex].size); @@ -1228,6 +1228,7 @@ xfs_attr3_leaf_add_work( name_rmt->valueblk = 0; args->rmtblkno = 1; args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); + args->rmtvaluelen = args->valuelen; } xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), @@ -1267,14 +1268,13 @@ xfs_attr3_leaf_compact( struct xfs_attr_leafblock *leaf_dst; struct xfs_attr3_icleaf_hdr ichdr_src; struct xfs_trans *trans = args->trans; - struct xfs_mount *mp = trans->t_mountp; char *tmpbuffer; trace_xfs_attr_leaf_compact(args); - tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); - memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); - memset(bp->b_addr, 0, XFS_LBSIZE(mp)); + tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); + memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); + memset(bp->b_addr, 0, args->geo->blksize); leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; leaf_dst = bp->b_addr; @@ -1287,7 +1287,7 @@ xfs_attr3_leaf_compact( /* Initialise the incore headers */ ichdr_src = *ichdr_dst; /* struct copy */ - ichdr_dst->firstused = XFS_LBSIZE(mp); + ichdr_dst->firstused = args->geo->blksize; ichdr_dst->usedbytes = 0; ichdr_dst->count = 0; ichdr_dst->holes = 0; @@ -1302,13 +1302,13 @@ xfs_attr3_leaf_compact( * Copy all entry's in the same (sorted) order, * but allocate name/value pairs packed and in sequence. */ - xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0, - ichdr_src.count, mp); + xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, + leaf_dst, ichdr_dst, 0, ichdr_src.count); /* * this logs the entire buffer, but the caller must write the header * back to the buffer when it is finished modifying it. */ - xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); + xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); kmem_free(tmpbuffer); } @@ -1459,8 +1459,8 @@ xfs_attr3_leaf_rebalance( /* * Move high entries from leaf1 to low end of leaf2. */ - xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count, - leaf2, &ichdr2, 0, count, state->mp); + xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, + ichdr1.count - count, leaf2, &ichdr2, 0, count); } else if (count > ichdr1.count) { /* @@ -1488,14 +1488,14 @@ xfs_attr3_leaf_rebalance( /* * Move low entries from leaf2 to high end of leaf1. */ - xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1, - ichdr1.count, count, state->mp); + xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, + ichdr1.count, count); } xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); - xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); - xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); + xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); /* * Copy out last hashval in each block for B-tree code. @@ -1590,11 +1590,9 @@ xfs_attr3_leaf_figure_balance( max = ichdr1->count + ichdr2->count; half = (max + 1) * sizeof(*entry); half += ichdr1->usedbytes + ichdr2->usedbytes + - xfs_attr_leaf_newentsize(state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); half /= 2; - lastdelta = state->blocksize; + lastdelta = state->args->geo->blksize; entry = xfs_attr3_leaf_entryp(leaf1); for (count = index = 0; count < max; entry++, index++, count++) { @@ -1604,10 +1602,7 @@ xfs_attr3_leaf_figure_balance( */ if (count == blk1->index) { tmp = totallen + sizeof(*entry) + - xfs_attr_leaf_newentsize( - state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); if (XFS_ATTR_ABS(half - tmp) > lastdelta) break; lastdelta = XFS_ATTR_ABS(half - tmp); @@ -1643,10 +1638,7 @@ xfs_attr3_leaf_figure_balance( totallen -= count * sizeof(*entry); if (foundit) { totallen -= sizeof(*entry) + - xfs_attr_leaf_newentsize( - state->args->namelen, - state->args->valuelen, - state->blocksize, NULL); + xfs_attr_leaf_newentsize(state->args, NULL); } *countarg = count; @@ -1698,9 +1690,9 @@ xfs_attr3_leaf_toosmall( bytes = xfs_attr3_leaf_hdr_size(leaf) + ichdr.count * sizeof(xfs_attr_leaf_entry_t) + ichdr.usedbytes; - if (bytes > (state->blocksize >> 1)) { + if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ - return(0); + return 0; } /* @@ -1719,7 +1711,7 @@ xfs_attr3_leaf_toosmall( error = xfs_da3_path_shift(state, &state->altpath, forward, 0, &retval); if (error) - return(error); + return error; if (retval) { *action = 0; } else { @@ -1748,11 +1740,12 @@ xfs_attr3_leaf_toosmall( error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, blkno, -1, &bp); if (error) - return(error); + return error; xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); - bytes = state->blocksize - (state->blocksize >> 2) - + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2) - ichdr.usedbytes - ichdr2.usedbytes - ((ichdr.count + ichdr2.count) * sizeof(xfs_attr_leaf_entry_t)) - @@ -1764,7 +1757,7 @@ xfs_attr3_leaf_toosmall( } if (i >= 2) { *action = 0; - return(0); + return 0; } /* @@ -1780,13 +1773,13 @@ xfs_attr3_leaf_toosmall( 0, &retval); } if (error) - return(error); + return error; if (retval) { *action = 0; } else { *action = 1; } - return(0); + return 0; } /* @@ -1803,7 +1796,6 @@ xfs_attr3_leaf_remove( struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entry; - struct xfs_mount *mp = args->trans->t_mountp; int before; int after; int smallest; @@ -1817,7 +1809,7 @@ xfs_attr3_leaf_remove( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); ASSERT(args->index >= 0 && args->index < ichdr.count); ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + xfs_attr3_leaf_hdr_size(leaf)); @@ -1825,7 +1817,7 @@ xfs_attr3_leaf_remove( entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); /* * Scan through free region table: @@ -1840,8 +1832,8 @@ xfs_attr3_leaf_remove( smallest = XFS_ATTR_LEAF_MAPSIZE - 1; entsize = xfs_attr_leaf_entsize(leaf, args->index); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp)); - ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp)); + ASSERT(ichdr.freemap[i].base < args->geo->blksize); + ASSERT(ichdr.freemap[i].size < args->geo->blksize); if (ichdr.freemap[i].base == tablesize) { ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t); ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); @@ -1918,11 +1910,11 @@ xfs_attr3_leaf_remove( * removing the name. */ if (smallest) { - tmp = XFS_LBSIZE(mp); + tmp = args->geo->blksize; entry = xfs_attr3_leaf_entryp(leaf); for (i = ichdr.count - 1; i >= 0; entry++, i--) { ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); - ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); + ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); if (be16_to_cpu(entry->nameidx) < tmp) tmp = be16_to_cpu(entry->nameidx); @@ -1945,7 +1937,7 @@ xfs_attr3_leaf_remove( tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + ichdr.count * sizeof(xfs_attr_leaf_entry_t); - return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */ + return tmp < args->geo->magicpct; /* leaf is < 37% full */ } /* @@ -1962,7 +1954,6 @@ xfs_attr3_leaf_unbalance( struct xfs_attr3_icleaf_hdr drophdr; struct xfs_attr3_icleaf_hdr savehdr; struct xfs_attr_leaf_entry *entry; - struct xfs_mount *mp = state->mp; trace_xfs_attr_leaf_unbalance(state->args); @@ -1989,13 +1980,15 @@ xfs_attr3_leaf_unbalance( */ if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, save_leaf, &savehdr, 0, - drophdr.count, mp); + drophdr.count); } else { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, save_leaf, &savehdr, - savehdr.count, drophdr.count, mp); + savehdr.count, drophdr.count); } } else { /* @@ -2005,7 +1998,7 @@ xfs_attr3_leaf_unbalance( struct xfs_attr_leafblock *tmp_leaf; struct xfs_attr3_icleaf_hdr tmphdr; - tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP); + tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); /* * Copy the header into the temp leaf so that all the stuff @@ -2018,35 +2011,39 @@ xfs_attr3_leaf_unbalance( tmphdr.magic = savehdr.magic; tmphdr.forw = savehdr.forw; tmphdr.back = savehdr.back; - tmphdr.firstused = state->blocksize; + tmphdr.firstused = state->args->geo->blksize; /* write the header to the temp buffer to initialise it */ xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, drop_blk->bp, &drophdr)) { - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, tmp_leaf, &tmphdr, 0, - drophdr.count, mp); - xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, + drophdr.count); + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, tmp_leaf, &tmphdr, tmphdr.count, - savehdr.count, mp); + savehdr.count); } else { - xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, + xfs_attr3_leaf_moveents(state->args, + save_leaf, &savehdr, 0, tmp_leaf, &tmphdr, 0, - savehdr.count, mp); - xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, + savehdr.count); + xfs_attr3_leaf_moveents(state->args, + drop_leaf, &drophdr, 0, tmp_leaf, &tmphdr, tmphdr.count, - drophdr.count, mp); + drophdr.count); } - memcpy(save_leaf, tmp_leaf, state->blocksize); + memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); savehdr = tmphdr; /* struct copy */ kmem_free(tmp_leaf); } xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, - state->blocksize - 1); + state->args->geo->blksize - 1); /* * Copy out last hashval in each block for B-tree code. @@ -2092,7 +2089,7 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); + ASSERT(ichdr.count < args->geo->blksize / 8); /* * Binary search. (note: small blocks will skip this loop) @@ -2126,7 +2123,7 @@ xfs_attr3_leaf_lookup_int( } if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { args->index = probe; - return XFS_ERROR(ENOATTR); + return -ENOATTR; } /* @@ -2155,7 +2152,7 @@ xfs_attr3_leaf_lookup_int( if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; - return XFS_ERROR(EEXIST); + return -EEXIST; } else { name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); if (name_rmt->namelen != args->namelen) @@ -2166,16 +2163,16 @@ xfs_attr3_leaf_lookup_int( if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; - args->valuelen = be32_to_cpu(name_rmt->valuelen); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); args->rmtblkno = be32_to_cpu(name_rmt->valueblk); args->rmtblkcnt = xfs_attr3_rmt_blocks( args->dp->i_mount, - args->valuelen); - return XFS_ERROR(EEXIST); + args->rmtvaluelen); + return -EEXIST; } } args->index = probe; - return XFS_ERROR(ENOATTR); + return -ENOATTR; } /* @@ -2196,7 +2193,7 @@ xfs_attr3_leaf_getvalue( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); - ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); + ASSERT(ichdr.count < args->geo->blksize / 8); ASSERT(args->index < ichdr.count); entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2211,7 +2208,7 @@ xfs_attr3_leaf_getvalue( } if (args->valuelen < valuelen) { args->valuelen = valuelen; - return XFS_ERROR(ERANGE); + return -ERANGE; } args->valuelen = valuelen; memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); @@ -2219,19 +2216,19 @@ xfs_attr3_leaf_getvalue( name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); ASSERT(name_rmt->namelen == args->namelen); ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); - valuelen = be32_to_cpu(name_rmt->valuelen); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); args->rmtblkno = be32_to_cpu(name_rmt->valueblk); args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, - valuelen); + args->rmtvaluelen); if (args->flags & ATTR_KERNOVAL) { - args->valuelen = valuelen; + args->valuelen = args->rmtvaluelen; return 0; } - if (args->valuelen < valuelen) { - args->valuelen = valuelen; - return XFS_ERROR(ERANGE); + if (args->valuelen < args->rmtvaluelen) { + args->valuelen = args->rmtvaluelen; + return -ERANGE; } - args->valuelen = valuelen; + args->valuelen = args->rmtvaluelen; } return 0; } @@ -2247,14 +2244,14 @@ xfs_attr3_leaf_getvalue( /*ARGSUSED*/ STATIC void xfs_attr3_leaf_moveents( + struct xfs_da_args *args, struct xfs_attr_leafblock *leaf_s, struct xfs_attr3_icleaf_hdr *ichdr_s, int start_s, struct xfs_attr_leafblock *leaf_d, struct xfs_attr3_icleaf_hdr *ichdr_d, int start_d, - int count, - struct xfs_mount *mp) + int count) { struct xfs_attr_leaf_entry *entry_s; struct xfs_attr_leaf_entry *entry_d; @@ -2274,10 +2271,10 @@ xfs_attr3_leaf_moveents( ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); ASSERT(ichdr_s->magic == ichdr_d->magic); - ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) + xfs_attr3_leaf_hdr_size(leaf_s)); - ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8); + ASSERT(ichdr_d->count < args->geo->blksize / 8); ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) + xfs_attr3_leaf_hdr_size(leaf_d)); @@ -2329,11 +2326,11 @@ xfs_attr3_leaf_moveents( entry_d->nameidx = cpu_to_be16(ichdr_d->firstused); entry_d->flags = entry_s->flags; ASSERT(be16_to_cpu(entry_d->nameidx) + tmp - <= XFS_LBSIZE(mp)); + <= args->geo->blksize); memmove(xfs_attr3_leaf_name(leaf_d, desti), xfs_attr3_leaf_name(leaf_s, start_s + i), tmp); ASSERT(be16_to_cpu(entry_s->nameidx) + tmp - <= XFS_LBSIZE(mp)); + <= args->geo->blksize); memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); ichdr_s->usedbytes -= tmp; ichdr_d->usedbytes += tmp; @@ -2354,7 +2351,7 @@ xfs_attr3_leaf_moveents( tmp = count * sizeof(xfs_attr_leaf_entry_t); entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + XFS_LBSIZE(mp))); + ((char *)leaf_s + args->geo->blksize)); memset(entry_s, 0, tmp); } else { /* @@ -2369,7 +2366,7 @@ xfs_attr3_leaf_moveents( tmp = count * sizeof(xfs_attr_leaf_entry_t); entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count]; ASSERT(((char *)entry_s + tmp) <= - ((char *)leaf_s + XFS_LBSIZE(mp))); + ((char *)leaf_s + args->geo->blksize)); memset(entry_s, 0, tmp); } @@ -2437,22 +2434,21 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) * a "local" or a "remote" attribute. */ int -xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local) +xfs_attr_leaf_newentsize( + struct xfs_da_args *args, + int *local) { - int size; + int size; - size = xfs_attr_leaf_entsize_local(namelen, valuelen); - if (size < xfs_attr_leaf_entsize_local_max(blocksize)) { - if (local) { + size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); + if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { + if (local) *local = 1; - } - } else { - size = xfs_attr_leaf_entsize_remote(namelen); - if (local) { - *local = 0; - } + return size; } - return size; + if (local) + *local = 0; + return xfs_attr_leaf_entsize_remote(args->namelen); } @@ -2485,7 +2481,7 @@ xfs_attr3_leaf_clearflag( */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); + return error; leaf = bp->b_addr; entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2518,7 +2514,7 @@ xfs_attr3_leaf_clearflag( ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->valuelen); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } @@ -2552,7 +2548,7 @@ xfs_attr3_leaf_setflag( */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); + return error; leaf = bp->b_addr; #ifdef DEBUG @@ -2676,7 +2672,7 @@ xfs_attr3_leaf_flipflags( ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->valuelen); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); } diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 3ec5ec0b8678..e2929da7c3ba 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -96,8 +96,7 @@ int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count); int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); -int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, - int *local); +int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 739e0a52deda..7510ab8058a4 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks( */ static bool xfs_attr3_rmt_hdr_ok( - struct xfs_mount *mp, void *ptr, xfs_ino_t ino, uint32_t offset, @@ -110,7 +109,7 @@ xfs_attr3_rmt_verify( if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) return false; if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) + be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) return false; if (rmt->rm_owner == 0) return false; @@ -125,8 +124,8 @@ xfs_attr3_rmt_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; int len; - bool corrupt = false; xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -135,27 +134,25 @@ xfs_attr3_rmt_read_verify( ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), - XFS_ATTR3_RMT_CRC_OFF)) { - corrupt = true; + if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + xfs_buf_ioerror(bp, -EFSBADCRC); break; } - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { - corrupt = true; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); break; } - len -= XFS_LBSIZE(mp); - ptr += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); } - if (corrupt) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } else + if (bp->b_error) + xfs_verifier_error(bp); + else ASSERT(len == 0); } @@ -168,6 +165,7 @@ xfs_attr3_rmt_write_verify( char *ptr; int len; xfs_daddr_t bno; + int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -176,13 +174,12 @@ xfs_attr3_rmt_write_verify( ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0) { - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } if (bip) { @@ -191,11 +188,11 @@ xfs_attr3_rmt_write_verify( rmt = (struct xfs_attr3_rmt_hdr *)ptr; rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); } - xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF); + xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); - len -= XFS_LBSIZE(mp); - ptr += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + ptr += blksize; + bno += BTOBB(blksize); } ASSERT(len == 0); } @@ -244,22 +241,23 @@ xfs_attr_rmtval_copyout( char *src = bp->b_addr; xfs_daddr_t bno = bp->b_bn; int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { int hdr_size = 0; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset, + if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", bno, *offset, byte_cnt, ino); - return EFSCORRUPTED; + return -EFSCORRUPTED; } hdr_size = sizeof(struct xfs_attr3_rmt_hdr); } @@ -267,9 +265,9 @@ xfs_attr_rmtval_copyout( memcpy(*dst, src + hdr_size, byte_cnt); /* roll buffer forwards */ - len -= XFS_LBSIZE(mp); - src += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + src += blksize; + bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; @@ -291,12 +289,13 @@ xfs_attr_rmtval_copyin( char *dst = bp->b_addr; xfs_daddr_t bno = bp->b_bn; int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; - ASSERT(len >= XFS_LBSIZE(mp)); + ASSERT(len >= blksize); while (len > 0 && *valuelen > 0) { int hdr_size; - int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); + int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); byte_cnt = min(*valuelen, byte_cnt); hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, @@ -308,17 +307,17 @@ xfs_attr_rmtval_copyin( * If this is the last block, zero the remainder of it. * Check that we are actually the last block, too. */ - if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) { + if (byte_cnt + hdr_size < blksize) { ASSERT(*valuelen - byte_cnt == 0); - ASSERT(len == XFS_LBSIZE(mp)); + ASSERT(len == blksize); memset(dst + hdr_size + byte_cnt, 0, - XFS_LBSIZE(mp) - hdr_size - byte_cnt); + blksize - hdr_size - byte_cnt); } /* roll buffer forwards */ - len -= XFS_LBSIZE(mp); - dst += XFS_LBSIZE(mp); - bno += mp->m_bsize; + len -= blksize; + dst += blksize; + bno += BTOBB(blksize); /* roll attribute data forwards */ *valuelen -= byte_cnt; @@ -340,7 +339,7 @@ xfs_attr_rmtval_get( struct xfs_buf *bp; xfs_dablk_t lblkno = args->rmtblkno; __uint8_t *dst = args->value; - int valuelen = args->valuelen; + int valuelen; int nmap; int error; int blkcnt = args->rmtblkcnt; @@ -350,7 +349,9 @@ xfs_attr_rmtval_get( trace_xfs_attr_rmtval_get(args); ASSERT(!(args->flags & ATTR_KERNOVAL)); + ASSERT(args->rmtvaluelen == args->valuelen); + valuelen = args->rmtvaluelen; while (valuelen > 0) { nmap = ATTR_RMTVALUE_MAPSIZE; error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, @@ -418,7 +419,7 @@ xfs_attr_rmtval_set( * attributes have headers, we can't just do a straight byte to FSB * conversion and have to take the header space into account. */ - blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); + blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, XFS_ATTR_FORK); if (error) @@ -451,7 +452,7 @@ xfs_attr_rmtval_set( ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -472,7 +473,7 @@ xfs_attr_rmtval_set( */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; } /* @@ -483,7 +484,7 @@ xfs_attr_rmtval_set( */ lblkno = args->rmtblkno; blkcnt = args->rmtblkcnt; - valuelen = args->valuelen; + valuelen = args->rmtvaluelen; while (valuelen > 0) { struct xfs_buf *bp; xfs_daddr_t dblkno; @@ -497,7 +498,7 @@ xfs_attr_rmtval_set( blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) - return(error); + return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -507,7 +508,7 @@ xfs_attr_rmtval_set( bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); if (!bp) - return ENOMEM; + return -ENOMEM; bp->b_ops = &xfs_attr3_rmt_buf_ops; xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, @@ -562,7 +563,7 @@ xfs_attr_rmtval_remove( error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) - return(error); + return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -621,7 +622,7 @@ xfs_attr_rmtval_remove( */ error = xfs_trans_roll(&args->trans, args->dp); if (error) - return (error); + return error; } - return(0); + return 0; } diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 5a9acfa156d7..5a9acfa156d7 100644 --- a/fs/xfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 919756e3ba53..919756e3ba53 100644 --- a/fs/xfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h index f1e3c907044d..e1649c0d3e02 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/libxfs/xfs_bit.h @@ -66,8 +66,11 @@ static inline int xfs_lowbit64(__uint64_t v) n = ffs(w); } else { /* upper bits */ w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; + if (w) { + n = ffs(w); + if (n) + n += 32; + } } return n - 1; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3ef11b22e750..86df952d3e24 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -94,7 +94,7 @@ xfs_bmap_compute_maxlevels( maxleafents = MAXAEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); } - maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); + maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; @@ -233,7 +233,6 @@ xfs_default_attroffset( */ STATIC void xfs_bmap_forkoff_reset( - xfs_mount_t *mp, xfs_inode_t *ip, int whichfork) { @@ -393,7 +392,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); @@ -905,7 +904,7 @@ xfs_bmap_local_to_extents_empty( ASSERT(ifp->if_bytes == 0); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); - xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); + xfs_bmap_forkoff_reset(ip, whichfork); ifp->if_flags &= ~XFS_IFINLINE; ifp->if_flags |= XFS_IFEXTENTS; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); @@ -1034,7 +1033,7 @@ xfs_bmap_add_attrfork_btree( goto error0; if (stat == 0) { xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } *firstblock = cur->bc_private.b.firstblock; cur->bc_private.b.allocated = 0; @@ -1099,10 +1098,11 @@ xfs_bmap_add_attrfork_local( if (S_ISDIR(ip->i_d.di_mode)) { memset(&dargs, 0, sizeof(dargs)); + dargs.geo = ip->i_mount->m_dir_geo; dargs.dp = ip; dargs.firstblock = firstblock; dargs.flist = flist; - dargs.total = ip->i_mount->m_dirblkfsbs; + dargs.total = dargs.geo->fsbcount; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; return xfs_dir2_sf_to_block(&dargs); @@ -1115,7 +1115,7 @@ xfs_bmap_add_attrfork_local( /* should only be called for types that support local format data */ ASSERT(0); - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* @@ -1192,7 +1192,7 @@ xfs_bmap_add_attrfork( break; default: ASSERT(0); - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto trans_cancel; } @@ -1299,7 +1299,7 @@ xfs_bmap_read_extents( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* @@ -1399,7 +1399,7 @@ xfs_bmap_read_extents( return 0; error0: xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } @@ -1429,11 +1429,7 @@ xfs_bmap_search_multi_extents( gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; gotp->br_state = XFS_EXT_INVALID; -#if XFS_BIG_BLKNOS gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; -#else - gotp->br_startblock = 0xffffa5a5; -#endif prevp->br_startoff = NULLFILEOFF; ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); @@ -1576,7 +1572,7 @@ xfs_bmap_last_before( if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EIO); + return -EIO; if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *last_block = 0; return 0; @@ -1635,7 +1631,7 @@ xfs_bmap_last_extent( * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. * - * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be * at, or past the EOF. */ STATIC int @@ -1650,9 +1646,14 @@ xfs_bmap_isaeof( bma->aeof = 0; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); - if (error || is_empty) + if (error) return error; + if (is_empty) { + bma->aeof = 1; + return 0; + } + /* * Check if we are allocation or past the last extent, or at least into * the last delayed allocated extent. @@ -1670,7 +1671,6 @@ xfs_bmap_isaeof( */ int xfs_bmap_last_offset( - struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *last_block, int whichfork) @@ -1686,7 +1686,7 @@ xfs_bmap_last_offset( if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return XFS_ERROR(EIO); + return -EIO; error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); if (error || is_empty) @@ -3319,7 +3319,7 @@ xfs_bmap_extsize_align( if (orig_off < align_off || orig_end > align_off + align_alen || align_alen - temp < orig_alen) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Try to fix it by moving the start up. */ @@ -3344,7 +3344,7 @@ xfs_bmap_extsize_align( * Result doesn't cover the request, fail it. */ if (orig_off < align_off || orig_end > align_off + align_alen) - return XFS_ERROR(EINVAL); + return -EINVAL; } else { ASSERT(orig_off >= align_off); ASSERT(orig_end <= align_off + align_alen); @@ -3512,6 +3512,67 @@ xfs_bmap_adjacent( #undef ISVALID } +static int +xfs_bmap_longest_free_extent( + struct xfs_trans *tp, + xfs_agnumber_t ag, + xfs_extlen_t *blen, + int *notinit) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + xfs_extlen_t longest; + int error = 0; + + pag = xfs_perag_get(mp, ag); + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); + if (error) + goto out; + + if (!pag->pagf_init) { + *notinit = 1; + goto out; + } + } + + longest = xfs_alloc_longest_free_extent(mp, pag); + if (*blen < longest) + *blen = longest; + +out: + xfs_perag_put(pag); + return error; +} + +static void +xfs_bmap_select_minlen( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen, + int notinit) +{ + if (notinit || *blen < ap->minlen) { + /* + * Since we did a BUF_TRYLOCK above, it is possible that + * there is space for this request. + */ + args->minlen = ap->minlen; + } else if (*blen < args->maxlen) { + /* + * If the best seen length is less than the request length, + * use the best as the minimum. + */ + args->minlen = *blen; + } else { + /* + * Otherwise we've seen an extent as big as maxlen, use that + * as the minimum. + */ + args->minlen = args->maxlen; + } +} + STATIC int xfs_bmap_btalloc_nullfb( struct xfs_bmalloca *ap, @@ -3519,111 +3580,74 @@ xfs_bmap_btalloc_nullfb( xfs_extlen_t *blen) { struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_perag *pag; xfs_agnumber_t ag, startag; int notinit = 0; int error; - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) - args->type = XFS_ALLOCTYPE_NEAR_BNO; - else - args->type = XFS_ALLOCTYPE_START_BNO; + args->type = XFS_ALLOCTYPE_START_BNO; args->total = ap->total; - /* - * Search for an allocation group with a single extent large enough - * for the request. If one isn't found, then adjust the minimum - * allocation size to the largest space found. - */ startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); if (startag == NULLAGNUMBER) startag = ag = 0; - pag = xfs_perag_get(mp, ag); while (*blen < args->maxlen) { - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, args->tp, ag, - XFS_ALLOC_FLAG_TRYLOCK); - if (error) { - xfs_perag_put(pag); - return error; - } - } - - /* - * See xfs_alloc_fix_freelist... - */ - if (pag->pagf_init) { - xfs_extlen_t longest; - longest = xfs_alloc_longest_free_extent(mp, pag); - if (*blen < longest) - *blen = longest; - } else - notinit = 1; - - if (xfs_inode_is_filestream(ap->ip)) { - if (*blen >= args->maxlen) - break; - - if (ap->userdata) { - /* - * If startag is an invalid AG, we've - * come here once before and - * xfs_filestream_new_ag picked the - * best currently available. - * - * Don't continue looping, since we - * could loop forever. - */ - if (startag == NULLAGNUMBER) - break; - - error = xfs_filestream_new_ag(ap, &ag); - xfs_perag_put(pag); - if (error) - return error; + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; - /* loop again to set 'blen'*/ - startag = NULLAGNUMBER; - pag = xfs_perag_get(mp, ag); - continue; - } - } if (++ag == mp->m_sb.sb_agcount) ag = 0; if (ag == startag) break; - xfs_perag_put(pag); - pag = xfs_perag_get(mp, ag); } - xfs_perag_put(pag); - /* - * Since the above loop did a BUF_TRYLOCK, it is - * possible that there is space for this request. - */ - if (notinit || *blen < ap->minlen) - args->minlen = ap->minlen; - /* - * If the best seen length is less than the request - * length, use the best as the minimum. - */ - else if (*blen < args->maxlen) - args->minlen = *blen; - /* - * Otherwise we've seen an extent as big as maxlen, - * use that as the minimum. - */ - else - args->minlen = args->maxlen; + xfs_bmap_select_minlen(ap, args, blen, notinit); + return 0; +} + +STATIC int +xfs_bmap_btalloc_filestreams( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *blen) +{ + struct xfs_mount *mp = ap->ip->i_mount; + xfs_agnumber_t ag; + int notinit = 0; + int error; + + args->type = XFS_ALLOCTYPE_NEAR_BNO; + args->total = ap->total; + + ag = XFS_FSB_TO_AGNO(mp, args->fsbno); + if (ag == NULLAGNUMBER) + ag = 0; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); + if (error) + return error; + + if (*blen < args->maxlen) { + error = xfs_filestream_new_ag(ap, &ag); + if (error) + return error; + + error = xfs_bmap_longest_free_extent(args->tp, ag, blen, + ¬init); + if (error) + return error; + + } + + xfs_bmap_select_minlen(ap, args, blen, notinit); /* - * set the failure fallback case to look in the selected - * AG as the stream may have moved. + * Set the failure fallback case to look in the selected AG as stream + * may have moved. */ - if (xfs_inode_is_filestream(ap->ip)) - ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); - + ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); return 0; } @@ -3643,10 +3667,19 @@ xfs_bmap_btalloc( int isaligned; int tryagain; int error; + int stripe_align; ASSERT(ap->length); mp = ap->ip->i_mount; + + /* stripe alignment for allocation is determined by mount parameters */ + stripe_align = 0; + if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) + stripe_align = mp->m_swidth; + else if (mp->m_dalign) + stripe_align = mp->m_dalign; + align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -3655,6 +3688,8 @@ xfs_bmap_btalloc( ASSERT(!error); ASSERT(ap->length); } + + nullfb = *ap->firstblock == NULLFSBLOCK; fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); if (nullfb) { @@ -3692,7 +3727,15 @@ xfs_bmap_btalloc( args.firstblock = *ap->firstblock; blen = 0; if (nullfb) { - error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); + /* + * Search for an allocation group with a single extent large + * enough for the request. If one isn't found, then adjust + * the minimum allocation size to the largest space found. + */ + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); + else + error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; } else if (ap->flist->xbf_low) { @@ -3730,7 +3773,7 @@ xfs_bmap_btalloc( */ if (!ap->flist->xbf_low && ap->aeof) { if (!ap->offset) { - args.alignment = mp->m_dalign; + args.alignment = stripe_align; atype = args.type; isaligned = 1; /* @@ -3755,13 +3798,13 @@ xfs_bmap_btalloc( * of minlen+alignment+slop doesn't go up * between the calls. */ - if (blen > mp->m_dalign && blen <= args.maxlen) - nextminlen = blen - mp->m_dalign; + if (blen > stripe_align && blen <= args.maxlen) + nextminlen = blen - stripe_align; else nextminlen = args.minlen; - if (nextminlen + mp->m_dalign > args.minlen + 1) + if (nextminlen + stripe_align > args.minlen + 1) args.minalignslop = - nextminlen + mp->m_dalign - + nextminlen + stripe_align - args.minlen - 1; else args.minalignslop = 0; @@ -3783,7 +3826,7 @@ xfs_bmap_btalloc( */ args.type = atype; args.fsbno = ap->blkno; - args.alignment = mp->m_dalign; + args.alignment = stripe_align; args.minlen = nextminlen; args.minalignslop = 0; isaligned = 1; @@ -3997,17 +4040,18 @@ xfs_bmapi_read( ASSERT(*nmap >= 1); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| XFS_BMAPI_IGSTATE))); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_blk_mapr); @@ -4191,17 +4235,18 @@ xfs_bmapi_delay( ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_blk_mapw); @@ -4249,8 +4294,8 @@ xfs_bmapi_delay( } -int -__xfs_bmapi_allocate( +static int +xfs_bmapi_allocate( struct xfs_bmalloca *bma) { struct xfs_mount *mp = bma->ip->i_mount; @@ -4420,7 +4465,7 @@ xfs_bmapi_convert_unwritten( * so generate another request. */ if (mval->br_blockcount < len) - return EAGAIN; + return -EAGAIN; return 0; } @@ -4484,17 +4529,18 @@ xfs_bmapi_write( ASSERT(tp != NULL); ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -4528,9 +4574,6 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; - if (flags & XFS_BMAPI_STACK_SWITCH) - bma.stack_switch = 1; - while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); @@ -4573,7 +4616,7 @@ xfs_bmapi_write( /* Execute unwritten extent conversion if necessary */ error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); - if (error == EAGAIN) + if (error == -EAGAIN) continue; if (error) goto error0; @@ -4875,7 +4918,7 @@ xfs_bmap_del_extent( goto done; cur->bc_rec.b = new; error = xfs_btree_insert(cur, &i); - if (error && error != ENOSPC) + if (error && error != -ENOSPC) goto done; /* * If get no-space back from btree insert, @@ -4883,7 +4926,7 @@ xfs_bmap_del_extent( * block reservation. * Fix up our state and return the error. */ - if (error == ENOSPC) { + if (error == -ENOSPC) { /* * Reset the cursor, don't trust * it after any insert operation. @@ -4911,7 +4954,7 @@ xfs_bmap_del_extent( xfs_bmbt_set_blockcount(ep, got.br_blockcount); flags = 0; - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto done; } XFS_WANT_CORRUPTED_GOTO(i == 1, done); @@ -5029,12 +5072,13 @@ xfs_bunmapi( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(len > 0); ASSERT(nexts >= 0); @@ -5277,7 +5321,7 @@ xfs_bunmapi( del.br_startoff > got.br_startoff && del.br_startoff + del.br_blockcount < got.br_startoff + got.br_blockcount) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto error0; } error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, @@ -5358,3 +5402,203 @@ error0: } return error; } + +/* + * Shift extent records to the left to cover a hole. + * + * The maximum number of extents to be shifted in a single operation + * is @num_exts, and @current_ext keeps track of the current extent + * index we have shifted. @offset_shift_fsb is the length by which each + * extent is shifted. If there is no hole to shift the extents + * into, this will be considered invalid operation and we abort immediately. + */ +int +xfs_bmap_shift_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + int *done, + xfs_fileoff_t start_fsb, + xfs_fileoff_t offset_shift_fsb, + xfs_extnum_t *current_ext, + xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, + int num_exts) +{ + struct xfs_btree_cur *cur = NULL; + struct xfs_bmbt_rec_host *gotp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp; + xfs_extnum_t nexts = 0; + xfs_fileoff_t startoff; + int error = 0; + int i; + int whichfork = XFS_DATA_FORK; + int logflags = 0; + xfs_filblks_t blockcount = 0; + int total_extents; + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmap_shift_extents", + XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + ASSERT(current_ext != NULL); + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + /* Read in all the extents */ + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + /* + * If *current_ext is 0, we would need to lookup the extent + * from where we would start shifting and store it in gotp. + */ + if (!*current_ext) { + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); + /* + * gotp can be null in 2 cases: 1) if there are no extents + * or 2) start_fsb lies in a hole beyond which there are + * no extents. Either way, we are done. + */ + if (!gotp) { + *done = 1; + return 0; + } + } + + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } + + /* + * There may be delalloc extents in the data fork before the range we + * are collapsing out, so we cannot + * use the count of real extents here. Instead we have to calculate it + * from the incore fork. + */ + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + while (nexts++ < num_exts && *current_ext < total_extents) { + + gotp = xfs_iext_get_ext(ifp, *current_ext); + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* + * Before shifting extent into hole, make sure that the hole + * is large enough to accomodate the shift. + */ + if (*current_ext) { + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, + *current_ext - 1), &left); + + if (startoff < left.br_startoff + left.br_blockcount) + error = -EINVAL; + } else if (offset_shift_fsb > got.br_startoff) { + /* + * When first extent is shifted, offset_shift_fsb + * should be less than the stating offset of + * the first extent. + */ + error = -EINVAL; + } + + if (error) + goto del_cursor; + + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + /* Check if we can merge 2 adjacent extents */ + if (*current_ext && + left.br_startoff + left.br_blockcount == startoff && + left.br_startblock + left.br_blockcount == + got.br_startblock && + left.br_state == got.br_state && + left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { + blockcount = left.br_blockcount + + got.br_blockcount; + xfs_iext_remove(ip, *current_ext, 1, 0); + logflags |= XFS_ILOG_CORE; + if (cur) { + error = xfs_btree_delete(cur, &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } else { + logflags |= XFS_ILOG_DEXT; + } + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + gotp = xfs_iext_get_ext(ifp, --*current_ext); + xfs_bmbt_get_all(gotp, &got); + + /* Make cursor point to the extent we will update */ + if (cur) { + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + &i); + if (error) + goto del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } + + xfs_bmbt_set_blockcount(gotp, blockcount); + got.br_blockcount = blockcount; + } else { + /* We have to update the startoff */ + xfs_bmbt_set_startoff(gotp, startoff); + got.br_startoff = startoff; + } + + logflags |= XFS_ILOG_CORE; + if (cur) { + error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state); + if (error) + goto del_cursor; + } else { + logflags |= XFS_ILOG_DEXT; + } + + (*current_ext)++; + total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + } + + /* Check if we are done */ + if (*current_ext == total_extents) + *done = 1; + +del_cursor: + if (cur) + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + return error; +} diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 33b41f351225..b879ca56a64c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -77,7 +77,6 @@ typedef struct xfs_bmap_free * from written to unwritten, otherwise convert from unwritten to written. */ #define XFS_BMAPI_CONVERT 0x040 -#define XFS_BMAPI_STACK_SWITCH 0x080 #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ @@ -86,8 +85,7 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" }, \ - { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } + { XFS_BMAPI_CONVERT, "CONVERT" } static inline int xfs_bmapi_aflag(int w) @@ -127,6 +125,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) { BMAP_RIGHT_FILLING, "RF" }, \ { BMAP_ATTRFORK, "ATTR" } + +/* + * This macro is used to determine how many extents will be shifted + * in one write transaction. We could require two splits, + * an extent move on the first and an extent merge on the second, + * So it is proper that one extent is shifted inside write transaction + * at a time. + */ +#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 + #ifdef DEBUG void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, int whichfork, unsigned long caller_ip); @@ -146,8 +154,8 @@ int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *last_block, int whichfork); -int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, - xfs_fileoff_t *unused, int whichfork); +int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, + int whichfork); int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); @@ -169,5 +177,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); +int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, + int *done, xfs_fileoff_t start_fsb, + xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, + xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, + int num_exts); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 706bc3f777cb..fba753308f31 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -84,7 +84,7 @@ xfs_bmdr_to_bmbt( rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); fkp = XFS_BMDR_KEY_ADDR(dblock, 1); tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); @@ -111,23 +111,8 @@ __xfs_bmbt_get_all( ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); s->br_startoff = ((xfs_fileoff_t)l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -#if XFS_BIG_BLKNOS s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)l1) >> 21); -#else -#ifdef DEBUG - { - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - s->br_startblock = (xfs_fsblock_t)b; - } -#else /* !DEBUG */ - s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); /* This is xfs_extent_state() in-line */ if (ext_flag) { @@ -163,21 +148,8 @@ xfs_fsblock_t xfs_bmbt_get_startblock( xfs_bmbt_rec_host_t *r) { -#if XFS_BIG_BLKNOS return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)r->l1) >> 21); -#else -#ifdef DEBUG - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)r->l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - return (xfs_fsblock_t)b; -#else /* !DEBUG */ - return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ } /* @@ -241,7 +213,6 @@ xfs_bmbt_set_allf( ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); -#if XFS_BIG_BLKNOS ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | @@ -250,23 +221,6 @@ xfs_bmbt_set_allf( r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9); - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -298,8 +252,6 @@ xfs_bmbt_disk_set_allf( ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - -#if XFS_BIG_BLKNOS ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = cpu_to_be64( @@ -310,26 +262,6 @@ xfs_bmbt_disk_set_allf( ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); - r->l1 = cpu_to_be64(xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } else { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9)); - r->l1 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -365,24 +297,11 @@ xfs_bmbt_set_startblock( xfs_bmbt_rec_host_t *r, xfs_fsblock_t v) { -#if XFS_BIG_BLKNOS ASSERT((v & xfs_mask64hi(12)) == 0); r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | (xfs_bmbt_rec_base_t)(v >> 43); r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | (xfs_bmbt_rec_base_t)(v << 21); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(v)) { - r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -438,12 +357,12 @@ xfs_bmbt_to_bmdr( cpu_to_be64(XFS_BUF_DADDR_NULL)); } else ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); - ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); + ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); ASSERT(rblock->bb_level != 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; - dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); + dmxr = xfs_bmdr_maxrecs(dblocklen, 0); fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); tkp = XFS_BMDR_KEY_ADDR(dblock, 1); fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); @@ -519,7 +438,6 @@ xfs_bmbt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { xfs_alloc_arg_t args; /* block allocation args */ @@ -555,7 +473,7 @@ xfs_bmbt_alloc_block( args.minlen = args.maxlen = args.prod = 1; args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto error0; } error = xfs_alloc_vextent(&args); @@ -672,8 +590,7 @@ xfs_bmbt_get_dmaxrecs( { if (level != cur->bc_nlevels - 1) return cur->bc_mp->m_bmap_dmxr[level != 0]; - return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, - level == 0); + return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); } STATIC void @@ -765,11 +682,11 @@ xfs_bmbt_verify( /* sibling pointer verification */ if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && + (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) return false; if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && + (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) return false; @@ -780,12 +697,14 @@ static void xfs_bmbt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_lblock_verify_crc(bp) && - xfs_bmbt_verify(bp))) { + if (!xfs_btree_lblock_verify_crc(bp)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_bmbt_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); } } @@ -794,11 +713,9 @@ xfs_bmbt_write_verify( struct xfs_buf *bp) { if (!xfs_bmbt_verify(bp)) { - xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } xfs_btree_lblock_calc_crc(bp); @@ -914,7 +831,6 @@ xfs_bmbt_maxrecs( */ int xfs_bmdr_maxrecs( - struct xfs_mount *mp, int blocklen, int leaf) { @@ -962,7 +878,7 @@ xfs_bmbt_change_owner( cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); if (!cur) - return ENOMEM; + return -ENOMEM; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 6e42e1e50b89..819a8a4dee95 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -130,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); -extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmdr_maxrecs(int blocklen, int leaf); extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 9adaae4f3e2f..8fe6a93ff473 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -33,6 +33,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_alloc.h" /* * Cursor allocation zone. @@ -43,9 +44,10 @@ kmem_zone_t *xfs_btree_cur_zone; * Btree magic numbers. */ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + XFS_FIBT_MAGIC }, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } }; #define xfs_btree_magic(cur) \ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -76,11 +78,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); @@ -90,7 +92,7 @@ xfs_btree_check_lblock( if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -138,7 +140,7 @@ xfs_btree_check_sblock( if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -165,12 +167,12 @@ xfs_btree_check_block( int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_dfsbno_t bno, /* btree block disk address */ + xfs_fsblock_t bno, /* btree block disk address */ int level) /* btree block level */ { XFS_WANT_CORRUPTED_RETURN( level > 0 && - bno != NULLDFSBNO && + bno != NULLFSBLOCK && XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); return 0; } @@ -234,8 +236,7 @@ xfs_btree_lblock_calc_crc( return; if (bip) block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_LBLOCK_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); } bool @@ -243,8 +244,8 @@ xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_LBLOCK_CRC_OFF); + return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + return true; } @@ -267,8 +268,7 @@ xfs_btree_sblock_calc_crc( return; if (bip) block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_SBLOCK_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } bool @@ -276,8 +276,8 @@ xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_SBLOCK_CRC_OFF); + return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + return true; } @@ -554,14 +554,11 @@ xfs_btree_get_bufl( xfs_fsblock_t fsbno, /* file system block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -576,15 +573,12 @@ xfs_btree_get_bufs( xfs_agblock_t agbno, /* allocation group block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -601,7 +595,7 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); else return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } @@ -724,7 +718,6 @@ xfs_btree_read_bufl( mp->m_bsize, lock, &bp, ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); if (bp) xfs_buf_set_ref(bp, refval); *bpp = bp; @@ -778,16 +771,16 @@ xfs_btree_readahead_lblock( struct xfs_btree_block *block) { int rval = 0; - xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); - xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); - if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { xfs_btree_reada_bufl(cur->bc_mp, left, 1, cur->bc_ops->buf_ops); rval++; } - if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { xfs_btree_reada_bufl(cur->bc_mp, right, 1, cur->bc_ops->buf_ops); rval++; @@ -859,7 +852,7 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { @@ -907,9 +900,9 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) @@ -925,7 +918,7 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return ptr->l == cpu_to_be64(NULLDFSBNO); + return ptr->l == cpu_to_be64(NULLFSBLOCK); else return ptr->s == cpu_to_be32(NULLAGBLOCK); } @@ -936,7 +929,7 @@ xfs_btree_set_ptr_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - ptr->l = cpu_to_be64(NULLDFSBNO); + ptr->l = cpu_to_be64(NULLFSBLOCK); else ptr->s = cpu_to_be32(NULLAGBLOCK); } @@ -1004,8 +997,8 @@ xfs_btree_init_block_int( buf->bb_numrecs = cpu_to_be16(numrecs); if (flags & XFS_BTREE_LONG_PTRS) { - buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); + buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); if (flags & XFS_BTREE_CRC_BLOCKS) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); @@ -1117,6 +1110,7 @@ xfs_btree_set_refs( xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: + case XFS_BTNUM_FINO: xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: @@ -1146,7 +1140,7 @@ xfs_btree_get_buf_block( mp->m_bsize, flags); if (!*bpp) - return ENOMEM; + return -ENOMEM; (*bpp)->b_ops = cur->bc_ops->buf_ops; *block = XFS_BUF_TO_BLOCK(*bpp); @@ -1161,7 +1155,6 @@ STATIC int xfs_btree_read_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - int level, int flags, struct xfs_btree_block **block, struct xfs_buf **bpp) @@ -1180,7 +1173,6 @@ xfs_btree_read_buf_block( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); return 0; @@ -1506,7 +1498,7 @@ xfs_btree_increment( if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) goto out0; ASSERT(0); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } ASSERT(lev < cur->bc_nlevels); @@ -1519,8 +1511,8 @@ xfs_btree_increment( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; @@ -1605,7 +1597,7 @@ xfs_btree_decrement( if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) goto out0; ASSERT(0); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } ASSERT(lev < cur->bc_nlevels); @@ -1618,8 +1610,8 @@ xfs_btree_decrement( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); @@ -1669,7 +1661,7 @@ xfs_btree_lookup_get_block( return 0; } - error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); + error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); if (error) return error; @@ -2020,7 +2012,7 @@ xfs_btree_lshift( goto out0; /* Set up the left neighbor as "left". */ - error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -2204,7 +2196,7 @@ xfs_btree_rshift( goto out0; /* Set up the right neighbor as "right". */ - error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -2332,7 +2324,7 @@ error1: * record (to be inserted into parent). */ STATIC int /* error */ -xfs_btree_split( +__xfs_btree_split( struct xfs_btree_cur *cur, int level, union xfs_btree_ptr *ptrp, @@ -2374,7 +2366,7 @@ xfs_btree_split( xfs_btree_buf_to_ptr(cur, lbp, &lptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); if (error) goto error0; if (*stat == 0) @@ -2472,7 +2464,7 @@ xfs_btree_split( * point back to right instead of to left. */ if (!xfs_btree_ptr_is_null(cur, &rrptr)) { - error = xfs_btree_read_buf_block(cur, &rrptr, level, + error = xfs_btree_read_buf_block(cur, &rrptr, 0, &rrblock, &rrbp); if (error) goto error0; @@ -2512,6 +2504,85 @@ error0: return error; } +struct xfs_btree_split_args { + struct xfs_btree_cur *cur; + int level; + union xfs_btree_ptr *ptrp; + union xfs_btree_key *key; + struct xfs_btree_cur **curp; + int *stat; /* success/failure */ + int result; + bool kswapd; /* allocation in kswapd context */ + struct completion *done; + struct work_struct work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( + struct work_struct *work) +{ + struct xfs_btree_split_args *args = container_of(work, + struct xfs_btree_split_args, work); + unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; + + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); + + args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, + args->key, args->curp, args->stat); + complete(args->done); + + current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + struct xfs_btree_split_args args; + DECLARE_COMPLETION_ONSTACK(done); + + if (cur->bc_btnum != XFS_BTNUM_BMAP) + return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + + args.cur = cur; + args.level = level; + args.ptrp = ptrp; + args.key = key; + args.curp = curp; + args.stat = stat; + args.done = &done; + args.kswapd = current_is_kswapd(); + INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); + queue_work(xfs_alloc_wq, &args.work); + wait_for_completion(&done); + destroy_work_on_stack(&args.work); + return args.result; +} + + /* * Copy the old inode root contents into a real block and make the * broot point to it. @@ -2547,7 +2618,7 @@ xfs_btree_new_iroot( pp = xfs_btree_ptr_addr(cur, 1, block); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); if (error) goto error0; if (*stat == 0) { @@ -2651,7 +2722,7 @@ xfs_btree_new_root( cur->bc_ops->init_ptr_from_cur(cur, &rptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); if (error) goto error0; if (*stat == 0) @@ -2686,8 +2757,7 @@ xfs_btree_new_root( lbp = bp; xfs_btree_buf_to_ptr(cur, lbp, &lptr); left = block; - error = xfs_btree_read_buf_block(cur, &rptr, - cur->bc_nlevels - 1, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; bp = rbp; @@ -2698,8 +2768,7 @@ xfs_btree_new_root( xfs_btree_buf_to_ptr(cur, rbp, &rptr); right = block; xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - error = xfs_btree_read_buf_block(cur, &lptr, - cur->bc_nlevels - 1, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; bp = lbp; @@ -3651,8 +3720,7 @@ xfs_btree_delrec( rptr = cptr; right = block; rbp = bp; - error = xfs_btree_read_buf_block(cur, &lptr, level, - 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -3669,8 +3737,7 @@ xfs_btree_delrec( lptr = cptr; left = block; lbp = bp; - error = xfs_btree_read_buf_block(cur, &rptr, level, - 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -3742,8 +3809,7 @@ xfs_btree_delrec( /* If there is a right sibling, point it to the remaining block. */ xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); if (!xfs_btree_ptr_is_null(cur, &cptr)) { - error = xfs_btree_read_buf_block(cur, &cptr, level, - 0, &rrblock, &rrbp); + error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); if (error) goto error0; xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); @@ -3952,7 +4018,7 @@ xfs_btree_block_change_owner( /* now read rh sibling block for next iteration */ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); if (xfs_btree_ptr_is_null(cur, &rptr)) - return ENOENT; + return -ENOENT; return xfs_btree_lookup_get_block(cur, level, &rptr, &block); } @@ -3995,7 +4061,7 @@ xfs_btree_change_owner( buffer_list); } while (!error); - if (error != ENOENT) + if (error != -ENOENT) return error; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 91e34f21bace..8f18bab73ea5 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -62,6 +62,7 @@ union xfs_btree_rec { #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) +#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) /* * For logging record fields. @@ -92,6 +93,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -105,6 +107,7 @@ do { \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -129,7 +132,7 @@ struct xfs_btree_ops { int (*alloc_block)(struct xfs_btree_cur *cur, union xfs_btree_ptr *start_bno, union xfs_btree_ptr *new_bno, - int length, int *stat); + int *stat); int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); /* update last record information */ @@ -255,7 +258,7 @@ xfs_btree_check_block( int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_dfsbno_t ptr, /* btree block disk address */ + xfs_fsblock_t ptr, /* btree block disk address */ int level); /* btree block level */ /* diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h index fad1676ad8cd..fad1676ad8cd 100644 --- a/fs/xfs/xfs_cksum.h +++ b/fs/xfs/libxfs/xfs_cksum.h diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 796272a2e129..2c42ae28d027 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -167,8 +167,8 @@ xfs_da3_node_verify( * we don't know if the node is for and attribute or directory tree, * so only fail if the count is outside both bounds */ - if (ichdr.count > mp->m_dir_node_ents && - ichdr.count > mp->m_attr_node_ents) + if (ichdr.count > mp->m_dir_geo->node_ents && + ichdr.count > mp->m_attr_geo->node_ents) return false; /* XXX: hash order check? */ @@ -185,8 +185,8 @@ xfs_da3_node_write_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (!xfs_da3_node_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -196,7 +196,7 @@ xfs_da3_node_write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); } /* @@ -209,18 +209,20 @@ static void xfs_da3_node_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_da_blkinfo *info = bp->b_addr; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: - if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DA3_NODE_CRC_OFF)) + if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { + xfs_buf_ioerror(bp, -EFSBADCRC); break; + } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) + if (!xfs_da3_node_verify(bp)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); break; + } return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -237,8 +239,7 @@ xfs_da3_node_read_verify( } /* corrupt block */ - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); } const struct xfs_buf_ops xfs_da3_node_buf_ops = { @@ -314,7 +315,7 @@ xfs_da3_node_create( error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); if (error) - return(error); + return error; bp->b_ops = &xfs_da3_node_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); node = bp->b_addr; @@ -336,7 +337,7 @@ xfs_da3_node_create( XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); *bpp = bp; - return(0); + return 0; } /* @@ -384,8 +385,8 @@ xfs_da3_split( switch (oldblk->magic) { case XFS_ATTR_LEAF_MAGIC: error = xfs_attr3_leaf_split(state, oldblk, newblk); - if ((error != 0) && (error != ENOSPC)) { - return(error); /* GROT: attr is inconsistent */ + if ((error != 0) && (error != -ENOSPC)) { + return error; /* GROT: attr is inconsistent */ } if (!error) { addblk = newblk; @@ -407,7 +408,7 @@ xfs_da3_split( &state->extrablk); } if (error) - return(error); /* GROT: attr inconsistent */ + return error; /* GROT: attr inconsistent */ addblk = newblk; break; case XFS_DIR2_LEAFN_MAGIC: @@ -421,7 +422,7 @@ xfs_da3_split( max - i, &action); addblk->bp = NULL; if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ /* * Record the newly split block for the next time thru? */ @@ -438,7 +439,7 @@ xfs_da3_split( xfs_da3_fixhashpath(state, &state->path); } if (!addblk) - return(0); + return 0; /* * Split the root node. @@ -448,7 +449,7 @@ xfs_da3_split( error = xfs_da3_root_split(state, oldblk, addblk); if (error) { addblk->bp = NULL; - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ } /* @@ -491,7 +492,7 @@ xfs_da3_split( sizeof(node->hdr.info))); } addblk->bp = NULL; - return(0); + return 0; } /* @@ -597,7 +598,7 @@ xfs_da3_root_split( * Set up the new root node. */ error = xfs_da3_node_create(args, - (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, + (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, level + 1, &bp, args->whichfork); if (error) return error; @@ -615,10 +616,10 @@ xfs_da3_root_split( #ifdef DEBUG if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - ASSERT(blk1->blkno >= mp->m_dirleafblk && - blk1->blkno < mp->m_dirfreeblk); - ASSERT(blk2->blkno >= mp->m_dirleafblk && - blk2->blkno < mp->m_dirfreeblk); + ASSERT(blk1->blkno >= args->geo->leafblk && + blk1->blkno < args->geo->freeblk); + ASSERT(blk2->blkno >= args->geo->leafblk && + blk2->blkno < args->geo->freeblk); } #endif @@ -662,25 +663,25 @@ xfs_da3_node_split( /* * Do we have to split the node? */ - if (nodehdr.count + newcount > state->node_ents) { + if (nodehdr.count + newcount > state->args->geo->node_ents) { /* * Allocate a new node, add to the doubly linked chain of * nodes, then move some of our excess entries into it. */ error = xfs_da_grow_inode(state->args, &blkno); if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ error = xfs_da3_node_create(state->args, blkno, treelevel, &newblk->bp, state->args->whichfork); if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ newblk->blkno = blkno; newblk->magic = XFS_DA_NODE_MAGIC; xfs_da3_node_rebalance(state, oldblk, newblk); error = xfs_da3_blk_link(state, oldblk, newblk); if (error) - return(error); + return error; *result = 1; } else { *result = 0; @@ -720,7 +721,7 @@ xfs_da3_node_split( } } - return(0); + return 0; } /* @@ -893,8 +894,8 @@ xfs_da3_node_add( ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); ASSERT(newblk->blkno != 0); if (state->args->whichfork == XFS_DATA_FORK) - ASSERT(newblk->blkno >= state->mp->m_dirleafblk && - newblk->blkno < state->mp->m_dirfreeblk); + ASSERT(newblk->blkno >= state->args->geo->leafblk && + newblk->blkno < state->args->geo->freeblk); /* * We may need to make some room before we insert the new node. @@ -962,9 +963,9 @@ xfs_da3_join( case XFS_ATTR_LEAF_MAGIC: error = xfs_attr3_leaf_toosmall(state, &action); if (error) - return(error); + return error; if (action == 0) - return(0); + return 0; xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); break; case XFS_DIR2_LEAFN_MAGIC: @@ -984,7 +985,7 @@ xfs_da3_join( xfs_da3_fixhashpath(state, &state->path); error = xfs_da3_node_toosmall(state, &action); if (error) - return(error); + return error; if (action == 0) return 0; xfs_da3_node_unbalance(state, drop_blk, save_blk); @@ -994,12 +995,12 @@ xfs_da3_join( error = xfs_da3_blk_unlink(state, drop_blk, save_blk); xfs_da_state_kill_altpath(state); if (error) - return(error); + return error; error = xfs_da_shrink_inode(state->args, drop_blk->blkno, drop_blk->bp); drop_blk->bp = NULL; if (error) - return(error); + return error; } /* * We joined all the way to the top. If it turns out that @@ -1009,7 +1010,7 @@ xfs_da3_join( xfs_da3_node_remove(state, drop_blk); xfs_da3_fixhashpath(state, &state->path); error = xfs_da3_root_join(state, &state->path.blk[0]); - return(error); + return error; } #ifdef DEBUG @@ -1088,16 +1089,17 @@ xfs_da3_root_join( * that could occur. For dir3 blocks we also need to update the block * number in the buffer header. */ - memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); + memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); root_blk->bp->b_ops = bp->b_ops; xfs_trans_buf_copy_type(root_blk->bp, bp); if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; da3->blkno = cpu_to_be64(root_blk->bp->b_bn); } - xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); + xfs_trans_log_buf(args->trans, root_blk->bp, 0, + args->geo->blksize - 1); error = xfs_da_shrink_inode(args, child, bp); - return(error); + return error; } /* @@ -1138,9 +1140,9 @@ xfs_da3_node_toosmall( info = blk->bp->b_addr; node = (xfs_da_intnode_t *)info; dp->d_ops->node_hdr_from_disk(&nodehdr, node); - if (nodehdr.count > (state->node_ents >> 1)) { + if (nodehdr.count > (state->args->geo->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ - return(0); /* blk over 50%, don't try to join */ + return 0; /* blk over 50%, don't try to join */ } /* @@ -1159,13 +1161,13 @@ xfs_da3_node_toosmall( error = xfs_da3_path_shift(state, &state->altpath, forward, 0, &retval); if (error) - return(error); + return error; if (retval) { *action = 0; } else { *action = 2; } - return(0); + return 0; } /* @@ -1175,8 +1177,8 @@ xfs_da3_node_toosmall( * We prefer coalescing with the lower numbered sibling so as * to shrink a directory over time. */ - count = state->node_ents; - count -= state->node_ents >> 2; + count = state->args->geo->node_ents; + count -= state->args->geo->node_ents >> 2; count -= nodehdr.count; /* start with smaller blk num */ @@ -1192,7 +1194,7 @@ xfs_da3_node_toosmall( error = xfs_da3_node_read(state->args->trans, dp, blkno, -1, &bp, state->args->whichfork); if (error) - return(error); + return error; node = bp->b_addr; dp->d_ops->node_hdr_from_disk(&thdr, node); @@ -1295,7 +1297,7 @@ xfs_da3_fixhashpath( node = blk->bp->b_addr; dp->d_ops->node_hdr_from_disk(&nodehdr, node); btree = dp->d_ops->node_tree_p(node); - if (be32_to_cpu(btree->hashval) == lasthash) + if (be32_to_cpu(btree[blk->index].hashval) == lasthash) break; blk->hashval = lasthash; btree[blk->index].hashval = cpu_to_be32(lasthash); @@ -1471,7 +1473,7 @@ xfs_da3_node_lookup_int( * Descend thru the B-tree searching each level for the right * node to use, until the right hashval is found. */ - blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0; + blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; for (blk = &state->path.blk[0], state->path.active = 1; state->path.active <= XFS_DA_NODE_MAXDEPTH; blk++, state->path.active++) { @@ -1484,7 +1486,7 @@ xfs_da3_node_lookup_int( if (error) { blk->blkno = 0; state->path.active--; - return(error); + return error; } curr = blk->bp->b_addr; blk->magic = be16_to_cpu(curr->magic); @@ -1577,25 +1579,25 @@ xfs_da3_node_lookup_int( args->blkno = blk->blkno; } else { ASSERT(0); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } - if (((retval == ENOENT) || (retval == ENOATTR)) && + if (((retval == -ENOENT) || (retval == -ENOATTR)) && (blk->hashval == args->hashval)) { error = xfs_da3_path_shift(state, &state->path, 1, 1, &retval); if (error) - return(error); + return error; if (retval == 0) { continue; } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { /* path_shift() gives ENOENT */ - retval = XFS_ERROR(ENOATTR); + retval = -ENOATTR; } } break; } *result = retval; - return(0); + return 0; } /*======================================================================== @@ -1690,7 +1692,7 @@ xfs_da3_blk_link( be32_to_cpu(old_info->back), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); @@ -1711,7 +1713,7 @@ xfs_da3_blk_link( be32_to_cpu(old_info->forw), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); @@ -1724,7 +1726,7 @@ xfs_da3_blk_link( xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); - return(0); + return 0; } /* @@ -1770,7 +1772,7 @@ xfs_da3_blk_unlink( be32_to_cpu(drop_info->back), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); @@ -1787,7 +1789,7 @@ xfs_da3_blk_unlink( be32_to_cpu(drop_info->forw), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); @@ -1799,7 +1801,7 @@ xfs_da3_blk_unlink( } xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); - return(0); + return 0; } /* @@ -1857,9 +1859,9 @@ xfs_da3_path_shift( } } if (level < 0) { - *result = XFS_ERROR(ENOENT); /* we're out of our tree */ + *result = -ENOENT; /* we're out of our tree */ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - return(0); + return 0; } /* @@ -1881,7 +1883,7 @@ xfs_da3_path_shift( error = xfs_da3_node_read(args->trans, dp, blkno, -1, &blk->bp, args->whichfork); if (error) - return(error); + return error; info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || @@ -2002,7 +2004,7 @@ xfs_da_grow_inode_int( struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; int w = args->whichfork; - xfs_drfsbno_t nblks = dp->i_d.di_nblocks; + xfs_rfsblock_t nblks = dp->i_d.di_nblocks; struct xfs_bmbt_irec map, *mapp; int nmap, error, got, i, mapi; @@ -2066,7 +2068,7 @@ xfs_da_grow_inode_int( if (got != count || mapp[0].br_startoff != *bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != *bno + count) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto out_free_map; } @@ -2089,20 +2091,12 @@ xfs_da_grow_inode( xfs_dablk_t *new_blkno) { xfs_fileoff_t bno; - int count; int error; trace_xfs_da_grow_inode(args); - if (args->whichfork == XFS_DATA_FORK) { - bno = args->dp->i_mount->m_dirleafblk; - count = args->dp->i_mount->m_dirblkfsbs; - } else { - bno = 0; - count = 1; - } - - error = xfs_da_grow_inode_int(args, &bno, count); + bno = args->geo->leafblk; + error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); if (!error) *new_blkno = (xfs_dablk_t)bno; return error; @@ -2157,27 +2151,27 @@ xfs_da3_swap_lastblock( w = args->whichfork; ASSERT(w == XFS_DATA_FORK); mp = dp->i_mount; - lastoff = mp->m_dirfreeblk; + lastoff = args->geo->freeblk; error = xfs_bmap_last_before(tp, dp, &lastoff, w); if (error) return error; if (unlikely(lastoff == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* * Read the last block in the btree space. */ - last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; + last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); if (error) return error; /* * Copy the last block into the dead buffer and log it. */ - memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize); - xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); + memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); + xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); dead_info = dead_buf->b_addr; /* * Get values from the moved block. @@ -2215,7 +2209,7 @@ xfs_da3_swap_lastblock( sib_info->magic != dead_info->magic)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } sib_info->forw = cpu_to_be32(dead_blkno); @@ -2237,7 +2231,7 @@ xfs_da3_swap_lastblock( sib_info->magic != dead_info->magic)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } sib_info->back = cpu_to_be32(dead_blkno); @@ -2246,7 +2240,7 @@ xfs_da3_swap_lastblock( sizeof(sib_info->back))); sib_buf = NULL; } - par_blkno = mp->m_dirleafblk; + par_blkno = args->geo->leafblk; level = -1; /* * Walk down the tree looking for the parent of the moved block. @@ -2260,7 +2254,7 @@ xfs_da3_swap_lastblock( if (level >= 0 && level != par_hdr.level + 1) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } level = par_hdr.level; @@ -2273,7 +2267,7 @@ xfs_da3_swap_lastblock( if (entno == par_hdr.count) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } par_blkno = be32_to_cpu(btree[entno].before); @@ -2300,7 +2294,7 @@ xfs_da3_swap_lastblock( if (unlikely(par_blkno == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); @@ -2311,7 +2305,7 @@ xfs_da3_swap_lastblock( if (par_hdr.level != level) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } btree = dp->d_ops->node_tree_p(par_node); @@ -2356,10 +2350,7 @@ xfs_da_shrink_inode( w = args->whichfork; tp = args->trans; mp = dp->i_mount; - if (w == XFS_DATA_FORK) - count = mp->m_dirblkfsbs; - else - count = 1; + count = args->geo->fsbcount; for (;;) { /* * Remove extents. If we get ENOSPC for a dir we have to move @@ -2368,7 +2359,7 @@ xfs_da_shrink_inode( error = xfs_bunmapi(tp, dp, dead_blkno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, &done); - if (error == ENOSPC) { + if (error == -ENOSPC) { if (w != XFS_DATA_FORK) break; error = xfs_da3_swap_lastblock(args, &dead_blkno, @@ -2436,7 +2427,7 @@ xfs_buf_map_from_irec( map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP | KM_NOFS); if (!map) - return ENOMEM; + return -ENOMEM; *mapp = map; } @@ -2461,7 +2452,6 @@ xfs_buf_map_from_irec( */ static int xfs_dabuf_map( - struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, @@ -2479,7 +2469,10 @@ xfs_dabuf_map( ASSERT(map && *map); ASSERT(*nmaps == 1); - nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; + if (whichfork == XFS_DATA_FORK) + nfsb = mp->m_dir_geo->fsbcount; + else + nfsb = mp->m_attr_geo->fsbcount; /* * Caller doesn't have a mapping. -2 means don't complain @@ -2507,8 +2500,8 @@ xfs_dabuf_map( } if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { - error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED); - if (unlikely(error == EFSCORRUPTED)) { + error = mappedbno == -2 ? -1 : -EFSCORRUPTED; + if (unlikely(error == -EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { int i; xfs_alert(mp, "%s: bno %lld dir: inode %lld", @@ -2557,7 +2550,7 @@ xfs_da_get_buf( *bpp = NULL; mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ @@ -2568,7 +2561,7 @@ xfs_da_get_buf( bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, mapp, nmap, 0); - error = bp ? bp->b_error : XFS_ERROR(EIO); + error = bp ? bp->b_error : -EIO; if (error) { xfs_trans_brelse(trans, bp); goto out_free; @@ -2605,7 +2598,7 @@ xfs_da_read_buf( *bpp = NULL; mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ @@ -2624,47 +2617,6 @@ xfs_da_read_buf( xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); else xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - - /* - * This verification code will be moved to a CRC verification callback - * function so just leave it here unchanged until then. - */ - { - xfs_dir2_data_hdr_t *hdr = bp->b_addr; - xfs_dir2_free_t *free = bp->b_addr; - xfs_da_blkinfo_t *info = bp->b_addr; - uint magic, magic1; - struct xfs_mount *mp = dp->i_mount; - - magic = be16_to_cpu(info->magic); - magic1 = be32_to_cpu(hdr->magic); - if (unlikely( - XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && - (magic != XFS_DA3_NODE_MAGIC) && - (magic != XFS_ATTR_LEAF_MAGIC) && - (magic != XFS_ATTR3_LEAF_MAGIC) && - (magic != XFS_DIR2_LEAF1_MAGIC) && - (magic != XFS_DIR3_LEAF1_MAGIC) && - (magic != XFS_DIR2_LEAFN_MAGIC) && - (magic != XFS_DIR3_LEAFN_MAGIC) && - (magic1 != XFS_DIR2_BLOCK_MAGIC) && - (magic1 != XFS_DIR3_BLOCK_MAGIC) && - (magic1 != XFS_DIR2_DATA_MAGIC) && - (magic1 != XFS_DIR3_DATA_MAGIC) && - (free->hdr.magic != - cpu_to_be32(XFS_DIR2_FREE_MAGIC)) && - (free->hdr.magic != - cpu_to_be32(XFS_DIR3_FREE_MAGIC)), - mp, XFS_ERRTAG_DA_READ_BUF, - XFS_RANDOM_DA_READ_BUF))) { - trace_xfs_da_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", - XFS_ERRLEVEL_LOW, mp, info); - error = XFS_ERROR(EFSCORRUPTED); - xfs_trans_brelse(trans, bp); - goto out_free; - } - } *bpp = bp; out_free: if (mapp != &map) @@ -2678,7 +2630,6 @@ out_free: */ xfs_daddr_t xfs_da_reada_buf( - struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, @@ -2692,7 +2643,7 @@ xfs_da_reada_buf( mapp = ↦ nmap = 1; - error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, + error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, &mapp, &nmap); if (error) { /* mapping a hole is not an error, but we don't continue */ diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 6e95ea79f5d7..6e153e399a77 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -25,6 +25,23 @@ struct xfs_trans; struct zone; struct xfs_dir_ops; +/* + * Directory/attribute geometry information. There will be one of these for each + * data fork type, and it will be passed around via the xfs_da_args. Global + * structures will be attached to the xfs_mount. + */ +struct xfs_da_geometry { + int blksize; /* da block size in bytes */ + int fsbcount; /* da block size in filesystem blocks */ + uint8_t fsblog; /* log2 of _filesystem_ block size */ + uint8_t blklog; /* log2 of da block size */ + uint node_ents; /* # of entries in a danode */ + int magicpct; /* 37% of block size in bytes */ + xfs_dablk_t datablk; /* blockno of dir data v2 */ + xfs_dablk_t leafblk; /* blockno of leaf data v2 */ + xfs_dablk_t freeblk; /* blockno of free data v2 */ +}; + /*======================================================================== * Btree searching and modification structure definitions. *========================================================================*/ @@ -42,6 +59,7 @@ enum xfs_dacmp { * Structure to ease passing around component names. */ typedef struct xfs_da_args { + struct xfs_da_geometry *geo; /* da block geometry */ const __uint8_t *name; /* string (maybe not NULL terminated) */ int namelen; /* length of string (maybe no NULL) */ __uint8_t filetype; /* filetype of inode for directories */ @@ -60,10 +78,12 @@ typedef struct xfs_da_args { int index; /* index of attr of interest in blk */ xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ int rmtblkcnt; /* remote attr value block count */ + int rmtvaluelen; /* remote attr value length in bytes */ xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ int index2; /* index of 2nd attr in blk */ xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ int rmtblkcnt2; /* remote attr value block count */ + int rmtvaluelen2; /* remote attr value length in bytes */ int op_flags; /* operation flags */ enum xfs_dacmp cmpresult; /* name compare result for lookups */ } xfs_da_args_t; @@ -108,8 +128,6 @@ typedef struct xfs_da_state_path { typedef struct xfs_da_state { xfs_da_args_t *args; /* filename arguments */ struct xfs_mount *mp; /* filesystem mount point */ - unsigned int blocksize; /* logical block size */ - unsigned int node_ents; /* how many entries in danode */ xfs_da_state_path_t path; /* search/split paths */ xfs_da_state_path_t altpath; /* alternate path for join */ unsigned char inleaf; /* insert into 1->lf, 0->splf */ @@ -183,9 +201,9 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops); -xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno, - int whichfork, const struct xfs_buf_ops *ops); +xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno, int whichfork, + const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, struct xfs_buf *dead_buf); diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index e6c83e1fbc8a..c9aee52a37e2 100644 --- a/fs/xfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -26,8 +26,10 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" +#include "xfs_dir2_priv.h" /* * Shortform directory ops @@ -425,9 +427,9 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) * Directory Leaf block operations */ static int -xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / (uint)sizeof(struct xfs_dir2_leaf_entry); } @@ -438,9 +440,9 @@ xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) } static int -xfs_dir3_max_leaf_ents(struct xfs_mount *mp) +xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / (uint)sizeof(struct xfs_dir2_leaf_entry); } @@ -591,9 +593,9 @@ xfs_da3_node_hdr_to_disk( * Directory free space block operations */ static int -xfs_dir2_free_max_bests(struct xfs_mount *mp) +xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / sizeof(xfs_dir2_data_off_t); } @@ -607,24 +609,25 @@ xfs_dir2_free_bests_p(struct xfs_dir2_free *free) * Convert data space db to the corresponding free db. */ static xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir2_free_max_bests(geo)); } /* * Convert data space db to the corresponding index in a free db. */ static int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return db % xfs_dir2_free_max_bests(mp); + return db % xfs_dir2_free_max_bests(geo); } static int -xfs_dir3_free_max_bests(struct xfs_mount *mp) +xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) { - return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) / + return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / sizeof(xfs_dir2_data_off_t); } @@ -638,18 +641,19 @@ xfs_dir3_free_bests_p(struct xfs_dir2_free *free) * Convert data space db to the corresponding free db. */ static xfs_dir2_db_t -xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / xfs_dir3_free_max_bests(geo)); } /* * Convert data space db to the corresponding index in a free db. */ static int -xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) { - return db % xfs_dir3_free_max_bests(mp); + return db % xfs_dir3_free_max_bests(geo); } static void diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index a19d3f8f639c..0a49b0286372 100644 --- a/fs/xfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -19,10 +19,6 @@ #ifndef __XFS_DA_FORMAT_H__ #define __XFS_DA_FORMAT_H__ -/*======================================================================== - * Directory Structure when greater than XFS_LBSIZE(mp) bytes. - *========================================================================*/ - /* * This structure is common to both leaf nodes and non-leaf nodes in the Btree. * @@ -122,8 +118,6 @@ struct xfs_da3_icnode_hdr { __uint16_t level; }; -#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize - /* * Directory version 2. * @@ -330,8 +324,6 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) #define XFS_DIR2_DATA_SPACE 0 #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_DATA_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) /* * Describe a free area in the data block. @@ -456,8 +448,6 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) */ #define XFS_DIR2_LEAF_SPACE 1 #define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_LEAF_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET) /* * Leaf block header. @@ -514,17 +504,6 @@ struct xfs_dir3_leaf { #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc) /* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ - return (struct xfs_dir2_leaf_tail *) - ((char *)lp + mp->m_dirblksize - - sizeof(struct xfs_dir2_leaf_tail)); -} - -/* * Get address of the bests array in the single-leaf block. */ static inline __be16 * @@ -534,123 +513,6 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) } /* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr. It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_db_t) - (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return (xfs_dir2_data_aoff_t)(by & - ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ - return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return ((xfs_dir2_off_t)db << - (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ - return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, - xfs_dir2_data_aoff_t o) -{ - return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ - return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ - return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/* * Free space block defintions for the node format. */ @@ -659,8 +521,6 @@ xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) */ #define XFS_DIR2_FREE_SPACE 2 #define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define XFS_DIR2_FREE_FIRSTDB(mp) \ - xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET) typedef struct xfs_dir2_free_hdr { __be32 magic; /* XFS_DIR2_FREE_MAGIC */ @@ -736,16 +596,6 @@ typedef struct xfs_dir2_block_tail { } xfs_dir2_block_tail_t; /* - * Pointer to the leaf header embedded in a data block (1-block format) - */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir2_block_tail *) - ((char *)hdr + mp->m_dirblksize)) - 1; -} - -/* * Pointer to the leaf entries embedded in a data block (1-block format) */ static inline struct xfs_dir2_leaf_entry * @@ -764,10 +614,6 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) * of an attribute name may not be unique, we may have duplicate keys. The * internal links in the Btree are logical block offsets into the file. * - *======================================================================== - * Attribute structure when equal to XFS_LBSIZE(mp) bytes. - *======================================================================== - * * Struct leaf_entry's are packed from the top. Name/values grow from the * bottom but are not packed. The freemap contains run-length-encoded entries * for the free bytes after the leaf_entry's, but only the N largest such, diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h index e5869b50dc41..623bbe8fd921 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/libxfs/xfs_dinode.h @@ -89,6 +89,8 @@ typedef struct xfs_dinode { /* structure must be padded to 64 bit alignment */ } xfs_dinode_t; +#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) + #define DI_MAX_FLUSH 0xffff /* diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index ce16ef02997a..6cef22152fd6 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -85,38 +85,74 @@ static struct xfs_nameops xfs_ascii_ci_nameops = { .compname = xfs_ascii_ci_compname, }; -void -xfs_dir_mount( - xfs_mount_t *mp) +int +xfs_da_mount( + struct xfs_mount *mp) { - int nodehdr_size; + struct xfs_da_geometry *dageo; + int nodehdr_size; - ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb)); + ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); - mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); - mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; - mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); - mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); - mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); - nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; - mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) / + mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), + KM_SLEEP | KM_MAYFAIL); + if (!mp->m_dir_geo || !mp->m_attr_geo) { + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); + return -ENOMEM; + } + + /* set up directory geometry */ + dageo = mp->m_dir_geo; + dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; + + /* + * Now we've set up the block conversion variables, we can calculate the + * segment block constants using the geometry structure. + */ + dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); + dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); + dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); + dageo->node_ents = (dageo->blksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); - mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) / + dageo->magicpct = (dageo->blksize * 37) / 100; + + /* set up attribute geometry - single fsb only */ + dageo = mp->m_attr_geo; + dageo->blklog = mp->m_sb.sb_blocklog; + dageo->fsblog = mp->m_sb.sb_blocklog; + dageo->blksize = 1 << dageo->blklog; + dageo->fsbcount = 1; + dageo->node_ents = (dageo->blksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); + dageo->magicpct = (dageo->blksize * 37) / 100; - mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; if (xfs_sb_version_hasasciici(&mp->m_sb)) mp->m_dirnameops = &xfs_ascii_ci_nameops; else mp->m_dirnameops = &xfs_default_nameops; + return 0; +} + +void +xfs_da_unmount( + struct xfs_mount *mp) +{ + kmem_free(mp->m_dir_geo); + kmem_free(mp->m_attr_geo); } /* @@ -166,7 +202,7 @@ xfs_dir_ino_validate( xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -180,16 +216,24 @@ xfs_dir_init( xfs_inode_t *dp, xfs_inode_t *pdp) { - xfs_da_args_t args; + struct xfs_da_args *args; int error; - memset((char *)&args, 0, sizeof(args)); - args.dp = dp; - args.trans = tp; ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) + error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); + if (error) return error; - return xfs_dir2_sf_create(&args, pdp->i_ino); + + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return -ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->dp = dp; + args->trans = tp; + error = xfs_dir2_sf_create(args, pdp->i_ino); + kmem_free(args); + return error; } /* @@ -205,41 +249,57 @@ xfs_dir_createname( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) return rval; XFS_STATS_INC(xs_dir_create); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = inum; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_addname(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return -ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); else - rval = xfs_dir2_node_addname(&args); + rval = xfs_dir2_node_addname(args); + +out_free: + kmem_free(args); return rval; } @@ -254,18 +314,18 @@ xfs_dir_cilookup_result( int len) { if (args->cmpresult == XFS_CMP_DIFFERENT) - return ENOENT; + return -ENOENT; if (args->cmpresult != XFS_CMP_CASE || !(args->op_flags & XFS_DA_OP_CILOOKUP)) - return EEXIST; + return -EEXIST; args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); if (!args->value) - return ENOMEM; + return -ENOMEM; memcpy(args->value, name, len); args->valuelen = len; - return EEXIST; + return -EEXIST; } /* @@ -282,46 +342,67 @@ xfs_dir_lookup( xfs_ino_t *inum, /* out: inode number */ struct xfs_name *ci_name) /* out: actual name if CI match */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_lookup); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.dp = dp; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_OKNOENT; + /* + * We need to use KM_NOFS here so that lockdep will not throw false + * positive deadlock warnings on a non-transactional lookup path. It is + * safe to recurse into inode recalim in that case, but lockdep can't + * easily be taught about it. Hence KM_NOFS avoids having to add more + * lockdep Doing this avoids having to add a bunch of lockdep class + * annotations into the reclaim path for the ilock. + */ + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_OKNOENT; if (ci_name) - args.op_flags |= XFS_DA_OP_CILOOKUP; + args->op_flags |= XFS_DA_OP_CILOOKUP; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_lookup(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_lookup(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_lookup(&args); + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_lookup(args); + goto out_check_rval; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_lookup(args); else - rval = xfs_dir2_node_lookup(&args); - if (rval == EEXIST) + rval = xfs_dir2_node_lookup(args); + +out_check_rval: + if (rval == -EEXIST) rval = 0; if (!rval) { - *inum = args.inumber; + *inum = args->inumber; if (ci_name) { - ci_name->name = args.value; - ci_name->len = args.valuelen; + ci_name->name = args->value; + ci_name->len = args->valuelen; } } +out_free: + kmem_free(args); return rval; } @@ -338,38 +419,52 @@ xfs_dir_removename( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_remove); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = ino; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_removename(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_removename(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_removename(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return -ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = ino; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_removename(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_removename(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_removename(args); else - rval = xfs_dir2_node_removename(&args); + rval = xfs_dir2_node_removename(args); +out_free: + kmem_free(args); return rval; } @@ -386,40 +481,55 @@ xfs_dir_replace( xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) + rval = xfs_dir_ino_validate(tp->t_mountp, inum); + if (rval) return rval; - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.inumber = inum; - args.dp = dp; - args.firstblock = first; - args.flist = flist; - args.total = total; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_replace(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_replace(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_replace(&args); + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return -ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->inumber = inum; + args->dp = dp; + args->firstblock = first; + args->flist = flist; + args->total = total; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_replace(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_replace(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_replace(args); else - rval = xfs_dir2_node_replace(&args); + rval = xfs_dir2_node_replace(args); +out_free: + kmem_free(args); return rval; } @@ -434,7 +544,7 @@ xfs_dir_canenter( struct xfs_name *name, /* name of entry to add */ uint resblks) { - xfs_da_args_t args; + struct xfs_da_args *args; int rval; int v; /* type-checking value */ @@ -443,29 +553,43 @@ xfs_dir_canenter( ASSERT(S_ISDIR(dp->i_d.di_mode)); - memset(&args, 0, sizeof(xfs_da_args_t)); - args.name = name->name; - args.namelen = name->len; - args.filetype = name->type; - args.hashval = dp->i_mount->m_dirnameops->hashname(name); - args.dp = dp; - args.whichfork = XFS_DATA_FORK; - args.trans = tp; - args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | + args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); + if (!args) + return -ENOMEM; + + args->geo = dp->i_mount->m_dir_geo; + args->name = name->name; + args->namelen = name->len; + args->filetype = name->type; + args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->dp = dp; + args->whichfork = XFS_DATA_FORK; + args->trans = tp; + args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_addname(&args); - else if ((rval = xfs_dir2_isblock(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_block_addname(&args); - else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) - return rval; - else if (v) - rval = xfs_dir2_leaf_addname(&args); + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + rval = xfs_dir2_sf_addname(args); + goto out_free; + } + + rval = xfs_dir2_isblock(args, &v); + if (rval) + goto out_free; + if (v) { + rval = xfs_dir2_block_addname(args); + goto out_free; + } + + rval = xfs_dir2_isleaf(args, &v); + if (rval) + goto out_free; + if (v) + rval = xfs_dir2_leaf_addname(args); else - rval = xfs_dir2_node_addname(&args); + rval = xfs_dir2_node_addname(args); +out_free: + kmem_free(args); return rval; } @@ -497,13 +621,13 @@ xfs_dir2_grow_inode( * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); - count = mp->m_dirblkfsbs; + count = args->geo->fsbcount; error = xfs_da_grow_inode_int(args, &bno, count); if (error) return error; - *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. @@ -525,19 +649,16 @@ xfs_dir2_grow_inode( */ int xfs_dir2_isblock( - xfs_trans_t *tp, - xfs_inode_t *dp, - int *vp) /* out: 1 is block, 0 is not block */ + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ { - xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; - int rval; + xfs_fileoff_t last; /* last file offset */ + int rval; - mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; - rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; - ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); + rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; + ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); *vp = rval; return 0; } @@ -547,18 +668,15 @@ xfs_dir2_isblock( */ int xfs_dir2_isleaf( - xfs_trans_t *tp, - xfs_inode_t *dp, - int *vp) /* out: 1 is leaf, 0 is not leaf */ + struct xfs_da_args *args, + int *vp) /* out: 1 is block, 0 is not block */ { - xfs_fileoff_t last; /* last file offset */ - xfs_mount_t *mp; - int rval; + xfs_fileoff_t last; /* last file offset */ + int rval; - mp = dp->i_mount; - if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; - *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); + *vp = last == args->geo->leafblk + args->geo->fsbcount; return 0; } @@ -586,11 +704,11 @@ xfs_dir2_shrink_inode( dp = args->dp; mp = dp->i_mount; tp = args->trans; - da = xfs_dir2_db_to_da(mp, db); + da = xfs_dir2_db_to_da(args->geo, db); /* * Unmap the fsblock(s). */ - if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, + if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, &done))) { /* @@ -617,12 +735,12 @@ xfs_dir2_shrink_inode( /* * If it's not a data block, we're done. */ - if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) + if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)) return 0; /* * If the block isn't the last one in the directory, we're done. */ - if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0)) + if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) return 0; bno = da; if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { @@ -631,7 +749,7 @@ xfs_dir2_shrink_inode( */ return error; } - if (db == mp->m_dirdatablk) + if (db == args->geo->datablk) ASSERT(bno == 0); else ASSERT(bno > 0); diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index cec70e0781ab..c8e86b0b5e99 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -80,7 +80,7 @@ struct xfs_dir_ops { struct xfs_dir3_icleaf_hdr *from); void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, struct xfs_dir2_leaf *from); - int (*leaf_max_ents)(struct xfs_mount *mp); + int (*leaf_max_ents)(struct xfs_da_geometry *geo); struct xfs_dir2_leaf_entry * (*leaf_ents_p)(struct xfs_dir2_leaf *lp); @@ -97,10 +97,12 @@ struct xfs_dir_ops { struct xfs_dir3_icfree_hdr *from); void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, struct xfs_dir2_free *from); - int (*free_max_bests)(struct xfs_mount *mp); + int (*free_max_bests)(struct xfs_da_geometry *geo); __be16 * (*free_bests_p)(struct xfs_dir2_free *free); - xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db); - int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db); + xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo, + xfs_dir2_db_t db); + int (*db_to_fdindex)(struct xfs_da_geometry *geo, + xfs_dir2_db_t db); }; extern const struct xfs_dir_ops * @@ -112,7 +114,9 @@ extern const struct xfs_dir_ops * * Generic directory interface routines */ extern void xfs_dir_startup(void); -extern void xfs_dir_mount(struct xfs_mount *mp); +extern int xfs_da_mount(struct xfs_mount *mp); +extern void xfs_da_unmount(struct xfs_mount *mp); + extern int xfs_dir_isempty(struct xfs_inode *dp); extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_inode *pdp); @@ -142,23 +146,23 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); /* * Interface routines used by userspace utilities */ -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r); +extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); extern void xfs_dir2_data_freescan(struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_header(struct xfs_da_args *args, struct xfs_buf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, + struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_use_free(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 90cdbf4b5f19..9628ceccfa02 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -89,13 +89,14 @@ xfs_dir3_block_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_DATA_CRC_OFF)) || - !xfs_dir3_block_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_dir3_block_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -107,8 +108,8 @@ xfs_dir3_block_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_block_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -118,7 +119,7 @@ xfs_dir3_block_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_block_buf_ops = { @@ -135,7 +136,7 @@ xfs_dir3_block_read( struct xfs_mount *mp = dp->i_mount; int err; - err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, + err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); if (!err && tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); @@ -280,8 +281,7 @@ out: */ static void xfs_dir2_block_compact( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, @@ -314,18 +314,17 @@ xfs_dir2_block_compact( *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); *lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(args, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), needlog, &needscan); - blp += be32_to_cpu(btp->stale) - 1; btp->stale = cpu_to_be32(1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, needlog); + xfs_dir2_data_freescan(args->dp, hdr, needlog); } /* @@ -377,7 +376,7 @@ xfs_dir2_block_addname( * Set up pointers to parts of the block. */ hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -393,7 +392,7 @@ xfs_dir2_block_addname( if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_trans_brelse(tp, bp); if (!dup) - return XFS_ERROR(ENOSPC); + return -ENOSPC; return 0; } @@ -403,7 +402,7 @@ xfs_dir2_block_addname( if (!dup) { /* Don't have a space reservation: return no-space. */ if (args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Convert to the next larger format. * Then add the new entry in that format. @@ -420,7 +419,7 @@ xfs_dir2_block_addname( * If need to compact the leaf entries, do it now. */ if (compact) { - xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog, + xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); /* recalculate blp post-compaction */ blp = xfs_dir2_block_leaf_p(btp); @@ -455,7 +454,7 @@ xfs_dir2_block_addname( /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(tp, dp, bp, enddup, + xfs_dir2_data_use_free(args, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), @@ -536,13 +535,13 @@ xfs_dir2_block_addname( * Fill in the leaf entry. */ blp[mid].hashval = cpu_to_be32(args->hashval); - blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* @@ -560,9 +559,9 @@ xfs_dir2_block_addname( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(args, bp); xfs_dir2_block_log_tail(tp, bp); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -581,7 +580,7 @@ xfs_dir2_block_log_leaf( xfs_dir2_leaf_entry_t *blp; xfs_dir2_block_tail_t *btp; - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); blp = xfs_dir2_block_leaf_p(btp); xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); @@ -598,7 +597,7 @@ xfs_dir2_block_log_tail( xfs_dir2_data_hdr_t *hdr = bp->b_addr; xfs_dir2_block_tail_t *btp; - btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); + btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), (uint)((char *)(btp + 1) - (char *)hdr - 1)); } @@ -633,13 +632,14 @@ xfs_dir2_block_lookup( mp = dp->i_mount; hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. */ @@ -647,7 +647,7 @@ xfs_dir2_block_lookup( args->filetype = dp->d_ops->data_get_ftype(dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); - return XFS_ERROR(error); + return error; } /* @@ -685,7 +685,7 @@ xfs_dir2_block_lookup_int( hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. @@ -703,7 +703,7 @@ xfs_dir2_block_lookup_int( if (low > high) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); xfs_trans_brelse(tp, bp); - return XFS_ERROR(ENOENT); + return -ENOENT; } } /* @@ -723,7 +723,7 @@ xfs_dir2_block_lookup_int( * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -751,7 +751,7 @@ xfs_dir2_block_lookup_int( * No match, release the buffer and return ENOENT. */ xfs_trans_brelse(tp, bp); - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -790,18 +790,19 @@ xfs_dir2_block_removename( tp = args->trans; mp = dp->i_mount; hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry using the leaf entry. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); /* * Mark the data entry's space free. */ needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(args, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* @@ -820,7 +821,7 @@ xfs_dir2_block_removename( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(args, bp); xfs_dir3_data_check(dp, bp); /* * See if the size as a shortform is good enough. @@ -865,20 +866,21 @@ xfs_dir2_block_replace( dp = args->dp; mp = dp->i_mount; hdr = bp->b_addr; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Point to the data entry we need to change. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(blp[ent].address))); ASSERT(be64_to_cpu(dep->inumber) != args->inumber); /* * Change the inode number to the new value. */ dep->inumber = cpu_to_be64(args->inumber); dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -938,7 +940,7 @@ xfs_dir2_leaf_to_block( leaf = lbp->b_addr; dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ents = dp->d_ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); @@ -948,13 +950,13 @@ xfs_dir2_leaf_to_block( * been left behind during no-space-reservation operations. * These will show up in the leaf bests table. */ - while (dp->i_d.di_size > mp->m_dirblksize) { + while (dp->i_d.di_size > args->geo->blksize) { int hdrsz; hdrsz = dp->d_ops->data_entry_offset; bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == - mp->m_dirblksize - hdrsz) { + args->geo->blksize - hdrsz) { if ((error = xfs_dir2_leaf_trim_data(args, lbp, (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -966,7 +968,7 @@ xfs_dir2_leaf_to_block( * Read the data block if we don't already have it, give up if it fails. */ if (!dbp) { - error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); + error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); if (error) return error; } @@ -982,7 +984,7 @@ xfs_dir2_leaf_to_block( /* * Look at the last data entry. */ - tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; + tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1; dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); /* * If it's not free or is too short we can't do it. @@ -1001,12 +1003,12 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size, + xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, &needlog, &needscan); /* * Initialize the block tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale); btp->stale = 0; xfs_dir2_block_log_tail(tp, dbp); @@ -1027,11 +1029,11 @@ xfs_dir2_leaf_to_block( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * Pitch the old leaf block. */ - error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); + error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp); if (error) return error; @@ -1089,7 +1091,7 @@ xfs_dir2_sf_to_block( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return XFS_ERROR(EIO); + return -EIO; } oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; @@ -1140,13 +1142,13 @@ xfs_dir2_sf_to_block( */ dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog, - &needscan); + xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, + i, &needlog, &needscan); ASSERT(needscan == 0); /* * Fill in the tail. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); @@ -1154,7 +1156,7 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* @@ -1167,9 +1169,9 @@ xfs_dir2_sf_to_block( dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); - blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); /* * Create entry for .. @@ -1181,9 +1183,9 @@ xfs_dir2_sf_to_block( dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); - blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = dp->d_ops->data_first_offset; /* @@ -1215,7 +1217,7 @@ xfs_dir2_sf_to_block( dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)hdr)); - xfs_dir2_data_log_unused(tp, bp, dup); + xfs_dir2_data_log_unused(args, bp, dup); xfs_dir2_data_freeinsert(hdr, dp->d_ops->data_bestfree_p(hdr), dup, &dummy); @@ -1232,12 +1234,12 @@ xfs_dir2_sf_to_block( memcpy(dep->name, sfep->name, dep->namelen); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(args, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); - blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, + blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = (int)((char *)(tagp + 1) - (char *)hdr); if (++i == sfp->count) diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 70acff4ee173..fdd803fecb8e 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -63,8 +63,10 @@ __xfs_dir3_data_check( int stale; /* count of stale leaves */ struct xfs_name name; const struct xfs_dir_ops *ops; + struct xfs_da_geometry *geo; mp = bp->b_target->bt_mount; + geo = mp->m_dir_geo; /* * We can be passed a null dp here from a verifier, so we need to go the @@ -78,7 +80,7 @@ __xfs_dir3_data_check( switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; @@ -94,11 +96,11 @@ __xfs_dir3_data_check( break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + mp->m_dirblksize; + endp = (char *)hdr + geo->blksize; break; default: XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* @@ -172,9 +174,9 @@ __xfs_dir3_data_check( lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = mp->m_dirnameops->hashname(&name); @@ -241,7 +243,6 @@ static void xfs_dir3_data_reada_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; switch (hdr->magic) { @@ -255,8 +256,8 @@ xfs_dir3_data_reada_verify( xfs_dir3_data_verify(bp); return; default: - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); break; } } @@ -267,13 +268,14 @@ xfs_dir3_data_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_DATA_CRC_OFF)) || - !xfs_dir3_data_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_dir3_data_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -285,8 +287,8 @@ xfs_dir3_data_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_data_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -296,7 +298,7 @@ xfs_dir3_data_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_data_buf_ops = { @@ -329,12 +331,11 @@ xfs_dir3_data_read( int xfs_dir3_data_readahead( - struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno) { - return xfs_da_reada_buf(tp, dp, bno, mapped_bno, + return xfs_da_reada_buf(dp, bno, mapped_bno, XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); } @@ -510,6 +511,7 @@ xfs_dir2_data_freescan( struct xfs_dir2_data_free *bf; char *endp; /* end of block's data */ char *p; /* current entry pointer */ + struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -528,10 +530,10 @@ xfs_dir2_data_freescan( p = (char *)dp->d_ops->data_entry_p(hdr); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(dp->i_mount, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + dp->i_mount->m_dirblksize; + endp = (char *)hdr + geo->blksize; /* * Loop over the block's entries. */ @@ -585,8 +587,8 @@ xfs_dir3_data_init( /* * Get the buffer set up for the block. */ - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, - XFS_DATA_FORK); + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), + -1, &bp, XFS_DATA_FORK); if (error) return error; bp->b_ops = &xfs_dir3_data_buf_ops; @@ -621,15 +623,15 @@ xfs_dir3_data_init( dup = dp->d_ops->data_unused_p(hdr); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset; + t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; bf[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ - xfs_dir2_data_log_header(tp, dp, bp); - xfs_dir2_data_log_unused(tp, bp, dup); + xfs_dir2_data_log_header(args, bp); + xfs_dir2_data_log_unused(args, bp, dup); *bpp = bp; return 0; } @@ -639,8 +641,7 @@ xfs_dir3_data_init( */ void xfs_dir2_data_log_entry( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { @@ -651,8 +652,8 @@ xfs_dir2_data_log_entry( hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) - + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), + (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - (char *)hdr - 1)); } @@ -661,8 +662,7 @@ xfs_dir2_data_log_entry( */ void xfs_dir2_data_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG @@ -674,7 +674,8 @@ xfs_dir2_data_log_header( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1); + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->data_entry_offset - 1); } /* @@ -682,7 +683,7 @@ xfs_dir2_data_log_header( */ void xfs_dir2_data_log_unused( - struct xfs_trans *tp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { @@ -696,13 +697,13 @@ xfs_dir2_data_log_unused( /* * Log the first part of the unused entry. */ - xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), + xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ - xfs_trans_log_buf(tp, bp, + xfs_trans_log_buf(args->trans, bp, (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); @@ -714,8 +715,7 @@ xfs_dir2_data_log_unused( */ void xfs_dir2_data_make_free( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ @@ -725,14 +725,12 @@ xfs_dir2_data_make_free( xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ char *endptr; /* end of data area */ - xfs_mount_t *mp; /* filesystem mount point */ int needscan; /* need to regen bestfree */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *postdup; /* unused entry after us */ xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ struct xfs_dir2_data_free *bf; - mp = tp->t_mountp; hdr = bp->b_addr; /* @@ -740,20 +738,20 @@ xfs_dir2_data_make_free( */ if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + mp->m_dirblksize; + endptr = (char *)hdr + args->geo->blksize; else { xfs_dir2_block_tail_t *btp; /* block tail */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > dp->d_ops->data_entry_offset) { + if (offset > args->dp->d_ops->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -779,7 +777,7 @@ xfs_dir2_data_make_free( * Previous and following entries are both free, * merge everything into a single free entry. */ - bf = dp->d_ops->data_bestfree_p(hdr); + bf = args->dp->d_ops->data_bestfree_p(hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ @@ -801,7 +799,7 @@ xfs_dir2_data_make_free( be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, prevdup); + xfs_dir2_data_log_unused(args, bp, prevdup); if (!needscan) { /* * Has to be the case that entries 0 and 1 are @@ -836,7 +834,7 @@ xfs_dir2_data_make_free( be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, prevdup); + xfs_dir2_data_log_unused(args, bp, prevdup); /* * If the previous entry was in the table, the new entry * is longer, so it will be in the table too. Remove @@ -864,7 +862,7 @@ xfs_dir2_data_make_free( newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If the following entry was in the table, the new entry * is longer, so it will be in the table too. Remove @@ -891,7 +889,7 @@ xfs_dir2_data_make_free( newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } *needscanp = needscan; @@ -902,8 +900,7 @@ xfs_dir2_data_make_free( */ void xfs_dir2_data_use_free( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ @@ -934,7 +931,7 @@ xfs_dir2_data_use_free( * Look up the entry in the bestfree table. */ oldlen = be16_to_cpu(dup->length); - bf = dp->d_ops->data_bestfree_p(hdr); + bf = args->dp->d_ops->data_bestfree_p(hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* @@ -966,7 +963,7 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ @@ -994,7 +991,7 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ @@ -1022,13 +1019,13 @@ xfs_dir2_data_use_free( newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup); + xfs_dir2_data_log_unused(args, bp, newdup); newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = cpu_to_be16((char *)newdup2 - (char *)hdr); - xfs_dir2_data_log_unused(tp, bp, newdup2); + xfs_dir2_data_log_unused(args, bp, newdup2); /* * If the old entry was in the table, we need to scan * if the 3rd entry was valid, since these entries diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index ae47ec6e16c4..a19174eb3cb2 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -41,9 +41,10 @@ */ static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, int *indexp, struct xfs_buf **dbpp); -static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, - int first, int last); -static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); +static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, + struct xfs_buf *bp, int first, int last); +static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, + struct xfs_buf *bp); /* * Check the internal consistency of a leaf1 block. @@ -92,6 +93,7 @@ xfs_dir3_leaf_check_int( int i; const struct xfs_dir_ops *ops; struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_da_geometry *geo = mp->m_dir_geo; /* * we can be passed a null dp here from a verifier, so we need to go the @@ -105,14 +107,14 @@ xfs_dir3_leaf_check_int( } ents = ops->leaf_ents_p(leaf); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); /* * XXX (dgc): This value is not restrictive enough. * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > ops->leaf_max_ents(mp)) + if (hdr->count > ops->leaf_max_ents(geo)) return false; /* Leaves and bests don't overlap in leaf format. */ @@ -179,13 +181,14 @@ __read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_LEAF_CRC_OFF)) || - !xfs_dir3_leaf_verify(bp, magic)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_dir3_leaf_verify(bp, magic)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -198,8 +201,8 @@ __write_verify( struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_leaf_verify(bp, magic)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -209,7 +212,7 @@ __write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } static void @@ -322,7 +325,7 @@ xfs_dir3_leaf_init( if (type == XFS_DIR2_LEAF1_MAGIC) { struct xfs_dir2_leaf_tail *ltp; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf); ltp->bestcount = 0; bp->b_ops = &xfs_dir3_leaf1_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); @@ -346,18 +349,18 @@ xfs_dir3_leaf_get_buf( int error; ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); - ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) && - bno < XFS_DIR2_FREE_FIRSTDB(mp)); + ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && + bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, - XFS_DATA_FORK); + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), + -1, &bp, XFS_DATA_FORK); if (error) return error; xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_header(args, bp); if (magic == XFS_DIR2_LEAF1_MAGIC) - xfs_dir3_leaf_log_tail(tp, bp); + xfs_dir3_leaf_log_tail(args, bp); *bpp = bp; return 0; } @@ -402,8 +405,8 @@ xfs_dir2_block_to_leaf( if ((error = xfs_da_grow_inode(args, &blkno))) { return error; } - ldb = xfs_dir2_da_to_db(mp, blkno); - ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); + ldb = xfs_dir2_da_to_db(args->geo, blkno); + ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)); /* * Initialize the leaf block, get a buffer for it. */ @@ -414,7 +417,7 @@ xfs_dir2_block_to_leaf( leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); bf = dp->d_ops->data_bestfree_p(hdr); ents = dp->d_ops->leaf_ents_p(leaf); @@ -426,23 +429,23 @@ xfs_dir2_block_to_leaf( leafhdr.count = be32_to_cpu(btp->count); leafhdr.stale = be32_to_cpu(btp->stale); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); /* * Could compact these but I think we always do the conversion * after squeezing out stale entries. */ memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1); + xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); needscan = 0; needlog = 1; /* * Make the space formerly occupied by the leaf entries and block * tail be free. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(args, dbp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), - (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - + (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize - (char *)blp), &needlog, &needscan); /* @@ -460,7 +463,7 @@ xfs_dir2_block_to_leaf( /* * Set up leaf tail and bests table. */ - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ltp->bestcount = cpu_to_be32(1); bestsp = xfs_dir2_leaf_bests_p(ltp); bestsp[0] = bf[0].length; @@ -468,10 +471,10 @@ xfs_dir2_block_to_leaf( * Log the data header and leaf bests table. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, 0); + xfs_dir3_leaf_log_bests(args, lbp, 0, 0); return 0; } @@ -640,7 +643,7 @@ xfs_dir2_leaf_addname( tp = args->trans; mp = dp->i_mount; - error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); if (error) return error; @@ -652,7 +655,7 @@ xfs_dir2_leaf_addname( */ index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ents = dp->d_ops->leaf_ents_p(leaf); dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); @@ -669,7 +672,7 @@ xfs_dir2_leaf_addname( index++, lep++) { if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) continue; - i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); ASSERT(i < be32_to_cpu(ltp->bestcount)); ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); if (be16_to_cpu(bestsp[i]) >= length) { @@ -728,7 +731,7 @@ xfs_dir2_leaf_addname( if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) { xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* * Convert to node form. @@ -752,7 +755,7 @@ xfs_dir2_leaf_addname( */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_trans_brelse(tp, lbp); - return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; + return use_block == -1 ? -ENOSPC : 0; } /* * If no allocations are allowed, return now before we've @@ -760,7 +763,7 @@ xfs_dir2_leaf_addname( */ if (args->total == 0 && use_block == -1) { xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* * Need to compact the leaf entries, removing stale ones. @@ -809,14 +812,15 @@ xfs_dir2_leaf_addname( memmove(&bestsp[0], &bestsp[1], be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); be32_add_cpu(<p->bestcount, 1); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); } /* * If we're filling in a previously empty block just log it. */ else - xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); hdr = dbp->b_addr; bf = dp->d_ops->data_bestfree_p(hdr); bestsp[use_block] = bf[0].length; @@ -827,8 +831,8 @@ xfs_dir2_leaf_addname( * Just read that one in. */ error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, use_block), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, use_block), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -847,7 +851,7 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(args, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -869,8 +873,8 @@ xfs_dir2_leaf_addname( * Need to log the data block's header. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_header(args, dbp); + xfs_dir2_data_log_entry(args, dbp, dep); /* * If the bests table needs to be changed, do it. * Log the change unless we've already done that. @@ -878,7 +882,7 @@ xfs_dir2_leaf_addname( if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) { bestsp[use_block] = bf[0].length; if (!grown) - xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); + xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); } lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, @@ -888,14 +892,15 @@ xfs_dir2_leaf_addname( * Fill in the new leaf entry. */ lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block, + lep->address = cpu_to_be32( + xfs_dir2_db_off_to_dataptr(args->geo, use_block, be16_to_cpu(*tagp))); /* * Log the leaf fields and give up the buffers. */ dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); return 0; @@ -947,9 +952,9 @@ xfs_dir3_leaf_compact( leafhdr->stale = 0; dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); - xfs_dir3_leaf_log_header(args->trans, dp, bp); + xfs_dir3_leaf_log_header(args, bp); if (loglow != -1) - xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1); + xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); } /* @@ -1051,7 +1056,7 @@ xfs_dir3_leaf_compact_x1( */ static void xfs_dir3_leaf_log_bests( - xfs_trans_t *tp, /* transaction pointer */ + struct xfs_da_args *args, struct xfs_buf *bp, /* leaf buffer */ int first, /* first entry to log */ int last) /* last entry to log */ @@ -1064,10 +1069,11 @@ xfs_dir3_leaf_log_bests( ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); - ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); firstb = xfs_dir2_leaf_bests_p(ltp) + first; lastb = xfs_dir2_leaf_bests_p(ltp) + last; - xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstb - (char *)leaf), (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); } @@ -1076,8 +1082,7 @@ xfs_dir3_leaf_log_bests( */ void xfs_dir3_leaf_log_ents( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, int first, int last) @@ -1092,10 +1097,11 @@ xfs_dir3_leaf_log_ents( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ents = dp->d_ops->leaf_ents_p(leaf); + ents = args->dp->d_ops->leaf_ents_p(leaf); firstlep = &ents[first]; lastlep = &ents[last]; - xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)firstlep - (char *)leaf), (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); } @@ -1104,8 +1110,7 @@ xfs_dir3_leaf_log_ents( */ void xfs_dir3_leaf_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -1115,8 +1120,9 @@ xfs_dir3_leaf_log_header( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), - dp->d_ops->leaf_hdr_size - 1); + xfs_trans_log_buf(args->trans, bp, + (uint)((char *)&leaf->hdr - (char *)leaf), + args->dp->d_ops->leaf_hdr_size - 1); } /* @@ -1124,21 +1130,20 @@ xfs_dir3_leaf_log_header( */ STATIC void xfs_dir3_leaf_log_tail( - struct xfs_trans *tp, + struct xfs_da_args *args, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ - struct xfs_mount *mp = tp->t_mountp; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); - xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), - (uint)(mp->m_dirblksize - 1)); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), + (uint)(args->geo->blksize - 1)); } /* @@ -1184,7 +1189,7 @@ xfs_dir2_leaf_lookup( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); /* * Return the found inode number & CI name if appropriate */ @@ -1193,7 +1198,7 @@ xfs_dir2_leaf_lookup( error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); - return XFS_ERROR(error); + return error; } /* @@ -1230,7 +1235,7 @@ xfs_dir2_leaf_lookup_int( tp = args->trans; mp = dp->i_mount; - error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); if (error) return error; @@ -1259,7 +1264,8 @@ xfs_dir2_leaf_lookup_int( /* * Get the new data block number. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * If it's not the same as the old data block number, * need to pitch the old one and read the new one. @@ -1268,8 +1274,8 @@ xfs_dir2_leaf_lookup_int( if (dbp) xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, newdb), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, newdb), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1280,7 +1286,8 @@ xfs_dir2_leaf_lookup_int( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store @@ -1309,8 +1316,8 @@ xfs_dir2_leaf_lookup_int( if (cidb != curdb) { xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, cidb), - -1, &dbp); + xfs_dir2_db_to_da(args->geo, cidb), + -1, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1320,13 +1327,13 @@ xfs_dir2_leaf_lookup_int( return 0; } /* - * No match found, return ENOENT. + * No match found, return -ENOENT. */ ASSERT(cidb == -1); if (dbp) xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -1379,18 +1386,18 @@ xfs_dir2_leaf_removename( * Point to the leaf entry, use that to point to the data entry. */ lep = &ents[index]; - db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); needscan = needlog = 0; oldbest = be16_to_cpu(bf[0].length); - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); ASSERT(be16_to_cpu(bestsp[db]) == oldbest); /* * Mark the former data entry unused. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(args, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* @@ -1398,10 +1405,10 @@ xfs_dir2_leaf_removename( */ leafhdr.stale++; dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index); + xfs_dir3_leaf_log_ents(args, lbp, index, index); /* * Scan the freespace in the data block again if necessary, @@ -1410,22 +1417,22 @@ xfs_dir2_leaf_removename( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. */ if (be16_to_cpu(bf[0].length) != oldbest) { bestsp[db] = bf[0].length; - xfs_dir3_leaf_log_bests(tp, lbp, db, db); + xfs_dir3_leaf_log_bests(args, lbp, db, db); } xfs_dir3_data_check(dp, dbp); /* * If the data block is now empty then get rid of the data block. */ if (be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset) { - ASSERT(db != mp->m_dirdatablk); + args->geo->blksize - dp->d_ops->data_entry_offset) { + ASSERT(db != args->geo->datablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* * Nope, can't get rid of it because it caused @@ -1433,7 +1440,7 @@ xfs_dir2_leaf_removename( * Just go on, returning success, leaving the * empty block in place. */ - if (error == ENOSPC && args->total == 0) + if (error == -ENOSPC && args->total == 0) error = 0; xfs_dir3_leaf_check(dp, lbp); return error; @@ -1458,15 +1465,16 @@ xfs_dir2_leaf_removename( memmove(&bestsp[db - i], bestsp, (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); be32_add_cpu(<p->bestcount, -(db - i)); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, + be32_to_cpu(ltp->bestcount) - 1); } else bestsp[db] = cpu_to_be16(NULLDATAOFF); } /* * If the data block was not the first one, drop it. */ - else if (db != mp->m_dirdatablk) + else if (db != args->geo->datablk) dbp = NULL; xfs_dir3_leaf_check(dp, lbp); @@ -1514,7 +1522,7 @@ xfs_dir2_leaf_replace( */ dep = (xfs_dir2_data_entry_t *) ((char *)dbp->b_addr + - xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); ASSERT(args->inumber != be64_to_cpu(dep->inumber)); /* * Put the new inode number in, log it. @@ -1522,7 +1530,7 @@ xfs_dir2_leaf_replace( dep->inumber = cpu_to_be64(args->inumber); dp->d_ops->data_put_ftype(dep, args->filetype); tp = args->trans; - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_entry(args, dbp, dep); xfs_dir3_leaf_check(dp, lbp); xfs_trans_brelse(tp, lbp); return 0; @@ -1608,12 +1616,13 @@ xfs_dir2_leaf_trim_data( /* * Read the offending data block. We need its buffer. */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); + error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), + -1, &dbp); if (error) return error; leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); #ifdef DEBUG { @@ -1623,7 +1632,7 @@ xfs_dir2_leaf_trim_data( ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); ASSERT(be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset); + args->geo->blksize - dp->d_ops->data_entry_offset); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); } #endif @@ -1632,7 +1641,7 @@ xfs_dir2_leaf_trim_data( * Get rid of the data block. */ if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); xfs_trans_brelse(tp, dbp); return error; } @@ -1642,8 +1651,8 @@ xfs_dir2_leaf_trim_data( bestsp = xfs_dir2_leaf_bests_p(ltp); be32_add_cpu(<p->bestcount, -1); memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); - xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); return 0; } @@ -1707,22 +1716,22 @@ xfs_dir2_node_to_leaf( /* * Get the last offset in the file. */ - if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) { + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) { return error; } - fo -= mp->m_dirblkfsbs; + fo -= args->geo->fsbcount; /* * If there are freespace blocks other than the first one, * take this opportunity to remove trailing empty freespace blocks * that may have been left behind during no-space-reservation * operations. */ - while (fo > mp->m_dirfreeblk) { + while (fo > args->geo->freeblk) { if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) { return error; } if (rval) - fo -= mp->m_dirblkfsbs; + fo -= args->geo->fsbcount; else return 0; } @@ -1735,7 +1744,7 @@ xfs_dir2_node_to_leaf( /* * If it's not the single leaf block, give up. */ - if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) + if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize) return 0; lbp = state->path.blk[0].bp; leaf = lbp->b_addr; @@ -1747,7 +1756,7 @@ xfs_dir2_node_to_leaf( /* * Read the freespace block. */ - error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp); + error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); if (error) return error; free = fbp->b_addr; @@ -1759,7 +1768,7 @@ xfs_dir2_node_to_leaf( * Now see if the leafn and free data will fit in a leaf1. * If not, release the buffer and give up. */ - if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) { + if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) { xfs_trans_brelse(tp, fbp); return 0; } @@ -1779,7 +1788,7 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf tail from the freespace block. */ - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ltp->bestcount = cpu_to_be32(freehdr.nvalid); /* @@ -1789,22 +1798,24 @@ xfs_dir2_node_to_leaf( freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); - xfs_dir3_leaf_log_tail(tp, lbp); + xfs_dir3_leaf_log_header(args, lbp); + xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); + xfs_dir3_leaf_log_tail(args, lbp); xfs_dir3_leaf_check(dp, lbp); /* * Get rid of the freespace block. */ - error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp); + error = xfs_dir2_shrink_inode(args, + xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), + fbp); if (error) { /* * This can't fail here because it can only happen when * punching out the middle of an extent, and this is an * isolated block. */ - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); return error; } fbp = NULL; diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 56369d4509d5..2ae6ac2c11ae 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -115,13 +115,14 @@ xfs_dir3_free_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_FREE_CRC_OFF)) || - !xfs_dir3_free_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_dir3_free_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -133,8 +134,8 @@ xfs_dir3_free_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_free_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -144,7 +145,7 @@ xfs_dir3_free_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_free_buf_ops = { @@ -194,17 +195,18 @@ xfs_dir2_free_try_read( static int xfs_dir3_free_get_buf( - struct xfs_trans *tp, - struct xfs_inode *dp, + xfs_da_args_t *args, xfs_dir2_db_t fbno, struct xfs_buf **bpp) { + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp; int error; struct xfs_dir3_icfree_hdr hdr; - error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno), + error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), -1, &bp, XFS_DATA_FORK); if (error) return error; @@ -239,8 +241,7 @@ xfs_dir3_free_get_buf( */ STATIC void xfs_dir2_free_log_bests( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ @@ -249,10 +250,10 @@ xfs_dir2_free_log_bests( __be16 *bests; free = bp->b_addr; - bests = dp->d_ops->free_bests_p(free); + bests = args->dp->d_ops->free_bests_p(free); ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); - xfs_trans_log_buf(tp, bp, + xfs_trans_log_buf(args->trans, bp, (uint)((char *)&bests[first] - (char *)free), (uint)((char *)&bests[last] - (char *)free + sizeof(bests[0]) - 1)); @@ -263,8 +264,7 @@ xfs_dir2_free_log_bests( */ static void xfs_dir2_free_log_header( - struct xfs_trans *tp, - struct xfs_inode *dp, + struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG @@ -274,7 +274,8 @@ xfs_dir2_free_log_header( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1); + xfs_trans_log_buf(args->trans, bp, 0, + args->dp->d_ops->free_hdr_size - 1); } /* @@ -314,20 +315,20 @@ xfs_dir2_leaf_to_node( if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) { return error; } - ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp)); + ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); /* * Get the buffer for the new freespace block. */ - error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp); + error = xfs_dir3_free_get_buf(args, fdb, &fbp); if (error) return error; free = fbp->b_addr; dp->d_ops->free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(mp, leaf); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ASSERT(be32_to_cpu(ltp->bestcount) <= - (uint)dp->i_d.di_size / mp->m_dirblksize); + (uint)dp->i_d.di_size / args->geo->blksize); /* * Copy freespace entries from the leaf block to the new block. @@ -348,8 +349,8 @@ xfs_dir2_leaf_to_node( freehdr.nvalid = be32_to_cpu(ltp->bestcount); dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_log_header(args, fbp); /* * Converting the leaf to a leafnode is just a matter of changing the @@ -363,7 +364,7 @@ xfs_dir2_leaf_to_node( leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); lbp->b_ops = &xfs_dir3_leafn_buf_ops; xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_header(args, lbp); xfs_dir3_leaf_check(dp, lbp); return 0; } @@ -405,7 +406,7 @@ xfs_dir2_leafn_add( * into other peoples memory */ if (index < 0) - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; /* * If there are already the maximum number of leaf entries in @@ -414,9 +415,9 @@ xfs_dir2_leafn_add( * a compact. */ - if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) { + if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { if (!leafhdr.stale) - return XFS_ERROR(ENOSPC); + return -ENOSPC; compact = leafhdr.stale > 1; } else compact = 0; @@ -449,12 +450,12 @@ xfs_dir2_leafn_add( highstale, &lfloglow, &lfloghigh); lep->hashval = cpu_to_be32(args->hashval); - lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, + lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, args->blkno, args->index)); dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); - xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_header(args, bp); + xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, bp); return 0; } @@ -470,7 +471,8 @@ xfs_dir2_free_hdr_check( dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); - ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0); + ASSERT((hdr.firstdb % + dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); ASSERT(hdr.firstdb <= db); ASSERT(db < hdr.firstdb + hdr.nvalid); } @@ -575,7 +577,8 @@ xfs_dir2_leafn_lookup_for_addname( /* * Pull the data block number from the entry. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * For addname, we're looking for a place to put the new entry. * We want to use a data block with an entry of equal @@ -592,7 +595,7 @@ xfs_dir2_leafn_lookup_for_addname( * Convert the data block to the free block * holding its freespace information. */ - newfdb = dp->d_ops->db_to_fdb(mp, newdb); + newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); /* * If it's not the one we have in hand, read it in. */ @@ -604,7 +607,8 @@ xfs_dir2_leafn_lookup_for_addname( xfs_trans_brelse(tp, curbp); error = xfs_dir2_free_read(tp, dp, - xfs_dir2_db_to_da(mp, newfdb), + xfs_dir2_db_to_da(args->geo, + newfdb), &curbp); if (error) return error; @@ -615,7 +619,7 @@ xfs_dir2_leafn_lookup_for_addname( /* * Get the index for our entry. */ - fi = dp->d_ops->db_to_fdindex(mp, curdb); + fi = dp->d_ops->db_to_fdindex(args->geo, curdb); /* * If it has room, return it. */ @@ -625,7 +629,7 @@ xfs_dir2_leafn_lookup_for_addname( XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) xfs_trans_brelse(tp, curbp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } curfdb = newfdb; if (be16_to_cpu(bests[fi]) >= length) @@ -656,7 +660,7 @@ out: * Return the index, that will be the insertion point. */ *indexp = index; - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -720,7 +724,8 @@ xfs_dir2_leafn_lookup_for_entry( /* * Pull the data block number from the entry. */ - newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + newdb = xfs_dir2_dataptr_to_db(args->geo, + be32_to_cpu(lep->address)); /* * Not adding a new entry, so we really want to find * the name given to us. @@ -745,7 +750,8 @@ xfs_dir2_leafn_lookup_for_entry( curbp = state->extrablk.bp; } else { error = xfs_dir3_data_read(tp, dp, - xfs_dir2_db_to_da(mp, newdb), + xfs_dir2_db_to_da(args->geo, + newdb), -1, &curbp); if (error) return error; @@ -757,7 +763,8 @@ xfs_dir2_leafn_lookup_for_entry( * Point to the data entry. */ dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); /* * Compare the entry and if it's an exact match, return * EEXIST immediately. If it's the first case-insensitive @@ -782,7 +789,7 @@ xfs_dir2_leafn_lookup_for_entry( curbp->b_ops = &xfs_dir3_data_buf_ops; xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); if (cmp == XFS_CMP_EXACT) - return XFS_ERROR(EEXIST); + return -EEXIST; } } ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); @@ -805,7 +812,7 @@ xfs_dir2_leafn_lookup_for_entry( state->extravalid = 0; } *indexp = index; - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -843,7 +850,6 @@ xfs_dir3_leafn_moveents( int start_d,/* destination leaf index */ int count) /* count of leaves to copy */ { - struct xfs_trans *tp = args->trans; int stale; /* count stale leaves copied */ trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); @@ -862,7 +868,7 @@ xfs_dir3_leafn_moveents( if (start_d < dhdr->count) { memmove(&dents[start_d + count], &dents[start_d], (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count, + xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, count + dhdr->count - 1); } /* @@ -884,8 +890,7 @@ xfs_dir3_leafn_moveents( */ memcpy(&dents[start_d], &sents[start_s], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, - start_d, start_d + count - 1); + xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); /* * If there are source entries after the ones we copied, @@ -894,8 +899,7 @@ xfs_dir3_leafn_moveents( if (start_s + count < shdr->count) { memmove(&sents[start_s], &sents[start_s + count], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_s, - start_s, start_s + count - 1); + xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); } /* @@ -1031,8 +1035,8 @@ xfs_dir2_leafn_rebalance( /* log the changes made when moving the entries */ dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); - xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp); - xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp); + xfs_dir3_leaf_log_header(args, blk1->bp); + xfs_dir3_leaf_log_header(args, blk2->bp); xfs_dir3_leaf_check(dp, blk1->bp); xfs_dir3_leaf_check(dp, blk2->bp); @@ -1075,7 +1079,6 @@ xfs_dir3_data_block_free( struct xfs_buf *fbp, int longest) { - struct xfs_trans *tp = args->trans; int logfree = 0; __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; @@ -1089,7 +1092,7 @@ xfs_dir3_data_block_free( * value. */ bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); return 0; } @@ -1117,7 +1120,7 @@ xfs_dir3_data_block_free( } dp->d_ops->free_hdr_to_disk(free, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_header(args, fbp); /* * If there are no useful entries left in the block, get rid of the @@ -1130,7 +1133,7 @@ xfs_dir3_data_block_free( if (error == 0) { fbp = NULL; logfree = 0; - } else if (error != ENOSPC || args->total != 0) + } else if (error != -ENOSPC || args->total != 0) return error; /* * It's possible to get ENOSPC if there is no @@ -1141,7 +1144,7 @@ xfs_dir3_data_block_free( /* Log the free entry that changed, unless we got rid of it. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); return 0; } @@ -1192,9 +1195,9 @@ xfs_dir2_leafn_remove( /* * Extract the data block and offset from the entry. */ - db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); ASSERT(dblk->blkno == db); - off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); + off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); ASSERT(dblk->index == off); /* @@ -1203,10 +1206,10 @@ xfs_dir2_leafn_remove( */ leafhdr.stale++; dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_header(args, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, bp, index, index); + xfs_dir3_leaf_log_ents(args, bp, index, index); /* * Make the data entry free. Keep track of the longest freespace @@ -1218,7 +1221,7 @@ xfs_dir2_leafn_remove( bf = dp->d_ops->data_bestfree_p(hdr); longest = be16_to_cpu(bf[0].length); needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, dbp, off, + xfs_dir2_data_make_free(args, dbp, off, dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* * Rescan the data block freespaces for bestfree. @@ -1227,7 +1230,7 @@ xfs_dir2_leafn_remove( if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); xfs_dir3_data_check(dp, dbp); /* * If the longest data block freespace changes, need to update @@ -1244,8 +1247,9 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = dp->d_ops->db_to_fdb(mp, db); - error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), + fdb = dp->d_ops->db_to_fdb(args->geo, db); + error = xfs_dir2_free_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fdb), &fbp); if (error) return error; @@ -1254,20 +1258,21 @@ xfs_dir2_leafn_remove( { struct xfs_dir3_icfree_hdr freehdr; dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) * - (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); + ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * + (fdb - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET))); } #endif /* * Calculate which entry we need to fix. */ - findex = dp->d_ops->db_to_fdindex(mp, db); + findex = dp->d_ops->db_to_fdindex(args->geo, db); longest = be16_to_cpu(bf[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == mp->m_dirblksize - + if (longest == args->geo->blksize - dp->d_ops->data_entry_offset) { /* * Try to punch out the data block. @@ -1282,7 +1287,7 @@ xfs_dir2_leafn_remove( * In this case just drop the buffer and some one else * will eventually get rid of the empty block. */ - else if (!(error == ENOSPC && args->total == 0)) + else if (!(error == -ENOSPC && args->total == 0)) return error; } /* @@ -1302,7 +1307,7 @@ xfs_dir2_leafn_remove( */ *rval = (dp->d_ops->leaf_hdr_size + (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < - mp->m_dir_magicpct; + args->geo->magicpct; return 0; } @@ -1335,7 +1340,7 @@ xfs_dir2_leafn_split( /* * Initialize the new leaf block. */ - error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno), + error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno), &newblk->bp, XFS_DIR2_LEAFN_MAGIC); if (error) return error; @@ -1409,7 +1414,7 @@ xfs_dir2_leafn_toosmall( count = leafhdr.count - leafhdr.stale; bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); - if (bytes > (state->blocksize >> 1)) { + if (bytes > (state->args->geo->blksize >> 1)) { /* * Blk over 50%, don't try to join. */ @@ -1462,7 +1467,8 @@ xfs_dir2_leafn_toosmall( * Count bytes in the two blocks combined. */ count = leafhdr.count - leafhdr.stale; - bytes = state->blocksize - (state->blocksize >> 2); + bytes = state->args->geo->blksize - + (state->args->geo->blksize >> 2); leaf = bp->b_addr; dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); @@ -1559,8 +1565,8 @@ xfs_dir2_leafn_unbalance( /* log the changes made when moving the entries */ dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); - xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp); - xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp); + xfs_dir3_leaf_log_header(args, save_blk->bp); + xfs_dir3_leaf_log_header(args, drop_blk->bp); xfs_dir3_leaf_check(dp, save_blk->bp); xfs_dir3_leaf_check(dp, drop_blk->bp); @@ -1586,8 +1592,6 @@ xfs_dir2_node_addname( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; /* * Look up the name. We're not supposed to find it, but * this gives us the insertion point. @@ -1595,7 +1599,7 @@ xfs_dir2_node_addname( error = xfs_da3_node_lookup_int(state, &rval); if (error) rval = error; - if (rval != ENOENT) { + if (rval != -ENOENT) { goto done; } /* @@ -1624,7 +1628,7 @@ xfs_dir2_node_addname( * It didn't work, we need to split the leaf block. */ if (args->total == 0) { - ASSERT(rval == ENOSPC); + ASSERT(rval == -ENOSPC); goto done; } /* @@ -1726,9 +1730,9 @@ xfs_dir2_node_addname_int( if (dbno == -1) { xfs_fileoff_t fo; /* freespace block number */ - if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) + if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) return error; - lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo); + lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); fbno = ifbno; } /* @@ -1746,7 +1750,8 @@ xfs_dir2_node_addname_int( * us a freespace block to start with. */ if (++fbno == 0) - fbno = XFS_DIR2_FREE_FIRSTDB(mp); + fbno = xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET); /* * If it's ifbno we already looked at it. */ @@ -1764,8 +1769,8 @@ xfs_dir2_node_addname_int( * to avoid it. */ error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(mp, fbno), - &fbp); + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); if (error) return error; if (!fbp) @@ -1810,7 +1815,7 @@ xfs_dir2_node_addname_int( * Not allowed to allocate, return failure. */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Allocate and initialize the new data block. @@ -1833,10 +1838,10 @@ xfs_dir2_node_addname_int( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = dp->d_ops->db_to_fdb(mp, dbno); + fbno = dp->d_ops->db_to_fdb(args->geo, dbno); error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(mp, fbno), - &fbp); + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); if (error) return error; @@ -1850,12 +1855,13 @@ xfs_dir2_node_addname_int( if (error) return error; - if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) { + if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { xfs_alert(mp, "%s: dir ino %llu needed freesp block %lld for\n" " data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb(mp, dbno), + (long long)dp->d_ops->db_to_fdb( + args->geo, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { @@ -1870,13 +1876,13 @@ xfs_dir2_node_addname_int( } XFS_ERROR_REPORT("xfs_dir2_node_addname_int", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* * Get a buffer for the new block. */ - error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp); + error = xfs_dir3_free_get_buf(args, fbno, &fbp); if (error) return error; free = fbp->b_addr; @@ -1886,8 +1892,10 @@ xfs_dir2_node_addname_int( /* * Remember the first slot as our empty slot. */ - freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - dp->d_ops->free_max_bests(mp); + freehdr.firstdb = + (fbno - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET)) * + dp->d_ops->free_max_bests(args->geo); } else { free = fbp->b_addr; bests = dp->d_ops->free_bests_p(free); @@ -1897,13 +1905,13 @@ xfs_dir2_node_addname_int( /* * Set the freespace block index from the data block number. */ - findex = dp->d_ops->db_to_fdindex(mp, dbno); + findex = dp->d_ops->db_to_fdindex(args->geo, dbno); /* * If it's after the end of the current entries in the * freespace block, extend that table. */ if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(mp)); + ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); freehdr.nvalid = findex + 1; /* * Tag new entry so nused will go up. @@ -1917,7 +1925,7 @@ xfs_dir2_node_addname_int( if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { freehdr.nused++; dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir2_free_log_header(args, fbp); } /* * Update the real value in the table. @@ -1942,7 +1950,8 @@ xfs_dir2_node_addname_int( /* * Read the data block in. */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), + error = xfs_dir3_data_read(tp, dp, + xfs_dir2_db_to_da(args->geo, dbno), -1, &dbp); if (error) return error; @@ -1960,7 +1969,7 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(args, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -1973,7 +1982,7 @@ xfs_dir2_node_addname_int( dp->d_ops->data_put_ftype(dep, args->filetype); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_entry(args, dbp, dep); /* * Rescan the block for bestfree if needed. */ @@ -1983,7 +1992,7 @@ xfs_dir2_node_addname_int( * Log the data block header if needed. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(args, dbp); /* * If the freespace entry is now wrong, update it. */ @@ -1996,7 +2005,7 @@ xfs_dir2_node_addname_int( * Log the freespace entry if needed. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(args, fbp, findex, findex); /* * Return the data block and offset in args, then drop the data block. */ @@ -2027,16 +2036,14 @@ xfs_dir2_node_lookup( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; /* * Fill in the path to the entry in the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); if (error) rval = error; - else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { - /* If a CI match, dup the actual name and return EEXIST */ + else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) { + /* If a CI match, dup the actual name and return -EEXIST */ xfs_dir2_data_entry_t *dep; dep = (xfs_dir2_data_entry_t *) @@ -2067,12 +2074,12 @@ xfs_dir2_node_lookup( */ int /* error */ xfs_dir2_node_removename( - xfs_da_args_t *args) /* operation arguments */ + struct xfs_da_args *args) /* operation arguments */ { - xfs_da_state_blk_t *blk; /* leaf block */ + struct xfs_da_state_blk *blk; /* leaf block */ int error; /* error return value */ int rval; /* operation return value */ - xfs_da_state_t *state; /* btree cursor */ + struct xfs_da_state *state; /* btree cursor */ trace_xfs_dir2_node_removename(args); @@ -2082,21 +2089,18 @@ xfs_dir2_node_removename( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; - /* - * Look up the entry we're deleting, set up the cursor. - */ + + /* Look up the entry we're deleting, set up the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); if (error) - rval = error; - /* - * Didn't find it, upper layer screwed up. - */ - if (rval != EEXIST) { - xfs_da_state_free(state); - return rval; + goto out_free; + + /* Didn't find it, upper layer screwed up. */ + if (rval != -EEXIST) { + error = rval; + goto out_free; } + blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); ASSERT(state->extravalid); @@ -2107,7 +2111,7 @@ xfs_dir2_node_removename( error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, &state->extrablk, &rval); if (error) - return error; + goto out_free; /* * Fix the hash values up the btree. */ @@ -2122,6 +2126,7 @@ xfs_dir2_node_removename( */ if (!error) error = xfs_dir2_node_to_leaf(state); +out_free: xfs_da_state_free(state); return error; } @@ -2152,8 +2157,6 @@ xfs_dir2_node_replace( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; - state->blocksize = state->mp->m_dirblksize; - state->node_ents = state->mp->m_dir_node_ents; inum = args->inumber; /* * Lookup the entry to change in the btree. @@ -2166,7 +2169,7 @@ xfs_dir2_node_replace( * It should be found, since the vnodeops layer has looked it up * and locked it. But paranoia is good. */ - if (rval == EEXIST) { + if (rval == -EEXIST) { struct xfs_dir2_leaf_entry *ents; /* * Find the leaf entry. @@ -2185,15 +2188,15 @@ xfs_dir2_node_replace( hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); dep = (xfs_dir2_data_entry_t *) ((char *)hdr + - xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(args->geo, + be32_to_cpu(lep->address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); args->dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, args->dp, - state->extrablk.bp, dep); + xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); rval = 0; } /* @@ -2261,15 +2264,15 @@ xfs_dir2_node_trim_free( /* * Blow the block away. */ - if ((error = - xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo), - bp))) { + error = xfs_dir2_shrink_inode(args, + xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); + if (error) { /* * Can't fail with ENOSPC since that only happens with no * space reservation, when breaking up an extent into two * pieces. This is the last block of an extent. */ - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); xfs_trans_brelse(tp, bp); return error; } diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 8b9d2281f85b..27ce0794d196 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -20,6 +20,140 @@ struct dir_context; +/* + * Directory offset/block conversion functions. + * + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) +{ + return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) +{ + return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_db_t)(by >> geo->blklog); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ + return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return ((xfs_dir2_off_t)db << geo->blklog) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ + return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o) +{ + return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ + return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); +} + +/* + * Directory tail pointer accessor functions. Based on block geometry. + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir2_block_tail *) + ((char *)hdr + geo->blksize)) - 1; +} + +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) +{ + return (struct xfs_dir2_leaf_tail *) + ((char *)lp + geo->blksize - + sizeof(struct xfs_dir2_leaf_tail)); +} + /* xfs_dir2.c */ extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, @@ -54,8 +188,8 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); -extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno); +extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, + xfs_daddr_t mapped_bno); extern struct xfs_dir2_data_free * xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, @@ -77,9 +211,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, struct xfs_buf *bp, int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index aafc6e46cb58..5079e051ef08 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -51,10 +51,9 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args); #else #define xfs_dir2_sf_check(args) #endif /* DEBUG */ -#if XFS_BIG_INUMS + static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); -#endif /* XFS_BIG_INUMS */ /* * Given a block directory (dp/block), calculate its size as a shortform (sf) @@ -82,8 +81,10 @@ xfs_dir2_block_sfsize( xfs_ino_t parent = 0; /* parent inode number */ int size=0; /* total computed size */ int has_ftype; + struct xfs_da_geometry *geo; mp = dp->i_mount; + geo = mp->m_dir_geo; /* * if there is a filetype field, add the extra byte to the namelen @@ -92,7 +93,7 @@ xfs_dir2_block_sfsize( has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; count = i8count = namelen = 0; - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* @@ -104,8 +105,8 @@ xfs_dir2_block_sfsize( /* * Calculate the pointer to the entry at hand. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); + dep = (xfs_dir2_data_entry_t *)((char *)hdr + + xfs_dir2_dataptr_to_off(geo, addr)); /* * Detect . and .., so we can special-case them. * . is not included in sf directories. @@ -115,10 +116,10 @@ xfs_dir2_block_sfsize( isdotdot = dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.'; -#if XFS_BIG_INUMS + if (!isdot) i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; -#endif + /* take into account the file type field */ if (!isdot && !isdotdot) { count++; @@ -170,6 +171,7 @@ xfs_dir2_block_to_sf( char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ + xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ trace_xfs_dir2_block_to_sf(args); @@ -177,39 +179,24 @@ xfs_dir2_block_to_sf( mp = dp->i_mount; /* - * Make a copy of the block data, so we can shrink the inode - * and add local data. + * allocate a temporary destination buffer the size of the inode + * to format the data into. Once we have formatted the data, we + * can free the block and copy the formatted data into the inode literal + * area. */ - hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(hdr, bp->b_addr, mp->m_dirblksize); - logflags = XFS_ILOG_CORE; - if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { - ASSERT(error != ENOSPC); - goto out; - } + dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); + hdr = bp->b_addr; /* - * The buffer is now unconditionally gone, whether - * xfs_dir2_shrink_inode worked or not. - * - * Convert the inode to local format. - */ - dp->i_df.if_flags &= ~XFS_IFEXTENTS; - dp->i_df.if_flags |= XFS_IFINLINE; - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; - ASSERT(dp->i_df.if_bytes == 0); - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - logflags |= XFS_ILOG_DDATA; - /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dst; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); - dp->i_d.di_size = size; + /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); @@ -258,10 +245,34 @@ xfs_dir2_block_to_sf( ptr += dp->d_ops->data_entsize(dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); + + /* now we are done with the block, we can shrink the inode */ + logflags = XFS_ILOG_CORE; + error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); + if (error) { + ASSERT(error != -ENOSPC); + goto out; + } + + /* + * The buffer is now unconditionally gone, whether + * xfs_dir2_shrink_inode worked or not. + * + * Convert the inode to local format and copy the data in. + */ + dp->i_df.if_flags &= ~XFS_IFEXTENTS; + dp->i_df.if_flags |= XFS_IFINLINE; + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + ASSERT(dp->i_df.if_bytes == 0); + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + + logflags |= XFS_ILOG_DDATA; + memcpy(dp->i_df.if_u1.if_data, dst, size); + dp->i_d.di_size = size; xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(hdr); + kmem_free(dst); return error; } @@ -275,21 +286,19 @@ int /* error */ xfs_dir2_sf_addname( xfs_da_args_t *args) /* operation arguments */ { - int add_entsize; /* size of the new entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ int incr_isize; /* total change in size */ int new_isize; /* di_size after adding name */ int objchange; /* changing to 8-byte inodes */ xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ - int old_isize; /* di_size before adding name */ int pick; /* which algorithm to use */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); - ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); + ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT); dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); /* @@ -297,7 +306,7 @@ xfs_dir2_sf_addname( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -306,29 +315,24 @@ xfs_dir2_sf_addname( /* * Compute entry (and change in) size. */ - add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen); - incr_isize = add_entsize; + incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); objchange = 0; -#if XFS_BIG_INUMS + /* * Do we have to change to 8 byte inodes? */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { /* - * Yes, adjust the entry size and the total size. + * Yes, adjust the inode size. old count + (parent + new) */ - add_entsize += - (uint)sizeof(xfs_dir2_ino8_t) - - (uint)sizeof(xfs_dir2_ino4_t); incr_isize += (sfp->count + 2) * ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; } -#endif - old_isize = (int)dp->i_d.di_size; - new_isize = old_isize + incr_isize; + + new_isize = (int)dp->i_d.di_size + incr_isize; /* * Won't fit as shortform any more (due to size), * or the pick routine says it won't (due to offset values). @@ -340,7 +344,7 @@ xfs_dir2_sf_addname( * Just checking or no space reservation, it doesn't fit. */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Convert to block form then add the name. */ @@ -365,10 +369,8 @@ xfs_dir2_sf_addname( */ else { ASSERT(pick == 2); -#if XFS_BIG_INUMS if (objchange) xfs_dir2_sf_toino8(args); -#endif xfs_dir2_sf_addname_hard(args, objchange, new_isize); } xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -420,10 +422,8 @@ xfs_dir2_sf_addname_easy( * Update the header and inode. */ sfp->count++; -#if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) sfp->i8count++; -#endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); } @@ -511,10 +511,8 @@ xfs_dir2_sf_addname_hard( dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); dp->d_ops->sf_put_ftype(sfep, args->filetype); sfp->count++; -#if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) sfp->i8count++; -#endif /* * If there's more left to copy, do that. */ @@ -583,22 +581,17 @@ xfs_dir2_sf_addname_pick( * we'll go back, convert to block, then try the insert and convert * to leaf. */ - if (used + (holefit ? 0 : size) > mp->m_dirblksize) + if (used + (holefit ? 0 : size) > args->geo->blksize) return 0; /* * If changing the inode number size, do it the hard way. */ -#if XFS_BIG_INUMS - if (objchange) { + if (objchange) return 2; - } -#else - ASSERT(objchange == 0); -#endif /* * If it won't fit at the end then do it the hard way (use the hole). */ - if (used + size > mp->m_dirblksize) + if (used + size > args->geo->blksize) return 2; /* * Do it the easy way. @@ -645,11 +638,10 @@ xfs_dir2_sf_check( ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); } ASSERT(i8count == sfp->i8count); - ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize); + (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize); } #endif /* DEBUG */ @@ -733,7 +725,7 @@ xfs_dir2_sf_lookup( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -746,7 +738,7 @@ xfs_dir2_sf_lookup( args->inumber = dp->i_ino; args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; - return XFS_ERROR(EEXIST); + return -EEXIST; } /* * Special case for .. @@ -756,7 +748,7 @@ xfs_dir2_sf_lookup( args->inumber = dp->d_ops->sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; - return XFS_ERROR(EEXIST); + return -EEXIST; } /* * Loop over all the entries trying to match ours. @@ -776,20 +768,20 @@ xfs_dir2_sf_lookup( args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); args->filetype = dp->d_ops->sf_get_ftype(sfep); if (cmp == XFS_CMP_EXACT) - return XFS_ERROR(EEXIST); + return -EEXIST; ci_sfep = sfep; } } ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); /* * Here, we can only be doing a lookup (not a rename or replace). - * If a case-insensitive match was not found, return ENOENT. + * If a case-insensitive match was not found, return -ENOENT. */ if (!ci_sfep) - return XFS_ERROR(ENOENT); + return -ENOENT; /* otherwise process the CI match as required by the caller */ error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); - return XFS_ERROR(error); + return error; } /* @@ -819,7 +811,7 @@ xfs_dir2_sf_removename( */ if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -842,7 +834,7 @@ xfs_dir2_sf_removename( * Didn't find it. */ if (i == sfp->count) - return XFS_ERROR(ENOENT); + return -ENOENT; /* * Calculate sizes. */ @@ -865,7 +857,6 @@ xfs_dir2_sf_removename( */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; -#if XFS_BIG_INUMS /* * Are we changing inode number size? */ @@ -875,7 +866,6 @@ xfs_dir2_sf_removename( else sfp->i8count--; } -#endif xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); return 0; @@ -890,12 +880,8 @@ xfs_dir2_sf_replace( { xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ -#if XFS_BIG_INUMS || defined(DEBUG) xfs_ino_t ino=0; /* entry old inode number */ -#endif -#if XFS_BIG_INUMS int i8elevated; /* sf_toino8 set i8count=1 */ -#endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ @@ -909,13 +895,13 @@ xfs_dir2_sf_replace( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); -#if XFS_BIG_INUMS + /* * New inode number is large, and need to convert to 8-byte inodes. */ @@ -946,17 +932,15 @@ xfs_dir2_sf_replace( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; -#endif + ASSERT(args->namelen != 1 || args->name[0] != '.'); /* * Replace ..'s entry. */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { -#if XFS_BIG_INUMS || defined(DEBUG) ino = dp->d_ops->sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); -#endif dp->d_ops->sf_put_parent_ino(sfp, args->inumber); } /* @@ -967,10 +951,8 @@ xfs_dir2_sf_replace( i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { -#if XFS_BIG_INUMS || defined(DEBUG) ino = dp->d_ops->sf_get_ino(sfp, sfep); ASSERT(args->inumber != ino); -#endif dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); dp->d_ops->sf_put_ftype(sfep, args->filetype); break; @@ -981,14 +963,11 @@ xfs_dir2_sf_replace( */ if (i == sfp->count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); -#if XFS_BIG_INUMS if (i8elevated) xfs_dir2_sf_toino4(args); -#endif - return XFS_ERROR(ENOENT); + return -ENOENT; } } -#if XFS_BIG_INUMS /* * See if the old number was large, the new number is small. */ @@ -1015,13 +994,11 @@ xfs_dir2_sf_replace( if (!i8elevated) sfp->i8count++; } -#endif xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); return 0; } -#if XFS_BIG_INUMS /* * Convert from 8-byte inode numbers to 4-byte inode numbers. * The last 8-byte inode number is gone, but the count is still 1. @@ -1100,9 +1077,9 @@ xfs_dir2_sf_toino4( } /* - * Convert from 4-byte inode numbers to 8-byte inode numbers. - * The new 8-byte inode number is not there yet, we leave with the - * count 1 but no corresponding entry. + * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. + * The new entry w/ an 8-byte inode number is not there yet; we leave with + * i8count set to 1, but no corresponding 8-byte entry. */ static void xfs_dir2_sf_toino8( @@ -1135,7 +1112,7 @@ xfs_dir2_sf_toino8( ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* - * Compute the new inode size. + * Compute the new inode size (nb: entry count + 1 for parent) */ newsize = oldsize + @@ -1176,4 +1153,3 @@ xfs_dir2_sf_toino8( dp->i_d.di_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } -#endif /* XFS_BIG_INUMS */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index d401457d2f25..bb969337efc8 100644 --- a/fs/xfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -35,7 +35,6 @@ int xfs_calc_dquots_per_chunk( - struct xfs_mount *mp, unsigned int nbblks) /* basic block units */ { unsigned int ndquots; @@ -194,7 +193,7 @@ xfs_dquot_buf_verify_crc( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk(mp, + ndquots = xfs_calc_dquots_per_chunk( XFS_BB_TO_FSB(mp, bp->b_length)); for (i = 0; i < ndquots; i++, d++) { @@ -225,7 +224,7 @@ xfs_dquot_buf_verify( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length); + ndquots = xfs_calc_dquots_per_chunk(bp->b_length); /* * On the first read of the buffer, verify that each dquot is valid. @@ -257,10 +256,13 @@ xfs_dquot_buf_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (!xfs_dquot_buf_verify_crc(mp, bp)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_dquot_buf_verify(mp, bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } /* @@ -275,8 +277,8 @@ xfs_dquot_buf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } } diff --git a/fs/xfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b6ab5a3cfa12..7e42bba9a420 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -68,11 +68,7 @@ struct xfs_ifork; #define XFS_RTLOBIT(w) xfs_lowbit32(w) #define XFS_RTHIBIT(w) xfs_highbit32(w) -#if XFS_BIG_BLKNOS #define XFS_RTBLOCKLOG(b) xfs_highbit64(b) -#else -#define XFS_RTBLOCKLOG(b) xfs_highbit32(b) -#endif /* * Dquot and dquot block format definitions @@ -145,6 +141,8 @@ struct xfs_dsymlink_hdr { __be64 sl_lsn; }; +#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) + /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 3 extents back from @@ -200,6 +198,8 @@ typedef __be32 xfs_alloc_ptr_t; */ #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ +#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ +#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ typedef __uint64_t xfs_inofree_t; #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) @@ -242,7 +242,17 @@ typedef __be32 xfs_inobt_ptr_t; * block numbers in the AG. */ #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + +/* + * The first data block of an AG depends on whether the filesystem was formatted + * with the finobt feature. If so, account for the finobt reserved root btree + * block. + */ +#define XFS_PREALLOC_BLOCKS(mp) \ + (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ + XFS_FIBT_BLOCK(mp) + 1 : \ + XFS_IBT_BLOCK(mp) + 1) @@ -290,23 +300,15 @@ typedef struct xfs_bmbt_rec_host { * Values and macros for delayed-allocation startblock fields. */ #define STARTBLOCKVALBITS 17 -#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) -#define DSTARTBLOCKMASKBITS (15 + 20) +#define STARTBLOCKMASKBITS (15 + 20) #define STARTBLOCKMASK \ (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) -#define DSTARTBLOCKMASK \ - (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) static inline int isnullstartblock(xfs_fsblock_t x) { return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; } -static inline int isnulldstartblock(xfs_dfsbno_t x) -{ - return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; -} - static inline xfs_fsblock_t nullstartblock(int k) { ASSERT(k < (1 << STARTBLOCKVALBITS)); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index e87719c5bebe..b62771f1f4b5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -52,7 +52,7 @@ xfs_ialloc_cluster_alignment( { if (xfs_sb_version_hasalign(&args->mp->m_sb) && args->mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) + XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) return args->mp->m_sb.sb_inoalignmt; return 1; } @@ -112,6 +112,66 @@ xfs_inobt_get_rec( } /* + * Insert a single inobt record. Cursor must already point to desired location. + */ +STATIC int +xfs_inobt_insert_rec( + struct xfs_btree_cur *cur, + __int32_t freecount, + xfs_inofree_t free, + int *stat) +{ + cur->bc_rec.i.ir_freecount = freecount; + cur->bc_rec.i.ir_free = free; + return xfs_btree_insert(cur, stat); +} + +/* + * Insert records describing a newly allocated inode chunk into the inobt. + */ +STATIC int +xfs_inobt_insert( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t newino, + xfs_agino_t newlen, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agino_t thisino; + int i; + int error; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + + error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, + XFS_INOBT_ALL_FREE, &i); + if (error) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + + return 0; +} + +/* * Verify that the number of free inodes in the AGI is correct. */ #ifdef DEBUG @@ -170,27 +230,20 @@ xfs_ialloc_inode_init( { struct xfs_buf *fbuf; struct xfs_dinode *free; - int blks_per_cluster, nbufs, ninodes; + int nbufs, blks_per_cluster, inodes_per_cluster; int version; int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; /* - * Loop over the new block(s), filling in the inodes. - * For small block sizes, manipulate the inodes in buffers - * which are multiples of the blocks size. + * Loop over the new block(s), filling in the inodes. For small block + * sizes, manipulate the inodes in buffers which are multiples of the + * blocks size. */ - if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { - blks_per_cluster = 1; - nbufs = length; - ninodes = mp->m_sb.sb_inopblock; - } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / - mp->m_sb.sb_blocksize; - nbufs = length / blks_per_cluster; - ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; - } + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + nbufs = length / blks_per_cluster; /* * Figure out what version number to use in the inodes we create. If @@ -225,12 +278,10 @@ xfs_ialloc_inode_init( * they track in the AIL as if they were physically logged. */ if (tp) - xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), + xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, mp->m_sb.sb_inodesize, length, gen); - } else if (xfs_sb_version_hasnlink(&mp->m_sb)) + } else version = 2; - else - version = 1; for (j = 0; j < nbufs; j++) { /* @@ -241,12 +292,12 @@ xfs_ialloc_inode_init( mp->m_bsize * blks_per_cluster, XBF_UNMAPPED); if (!fbuf) - return ENOMEM; + return -ENOMEM; /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); - for (i = 0; i < ninodes; i++) { + for (i = 0; i < inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); @@ -310,13 +361,10 @@ xfs_ialloc_ag_alloc( { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_agnumber_t agno; int error; - int i; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ - xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ struct xfs_perag *pag; @@ -329,11 +377,11 @@ xfs_ialloc_ag_alloc( * Locking will ensure that we don't have two callers in here * at one time. */ - newlen = XFS_IALLOC_INODES(args.mp); + newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) - return XFS_ERROR(ENOSPC); - args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); + return -ENOSPC; + args.minlen = args.maxlen = args.mp->m_ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill @@ -343,7 +391,7 @@ xfs_ialloc_ag_alloc( newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + - XFS_IALLOC_BLOCKS(args.mp); + args.mp->m_ialloc_blks; if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); @@ -370,6 +418,18 @@ xfs_ialloc_ag_alloc( args.minleft = args.mp->m_in_maxlevels - 1; if ((error = xfs_alloc_vextent(&args))) return error; + + /* + * This request might have dirtied the transaction if the AG can + * satisfy the request, but the exact block was not available. + * If the allocation did fail, subsequent requests will relax + * the exact agbno requirement and increase the alignment + * instead. It is critical that the total size of the request + * (len + alignment + slop) does not increase from this point + * on, so reset minalignslop to ensure it is not included in + * subsequent requests. + */ + args.minalignslop = 0; } else args.fsbno = NULLFSBLOCK; @@ -454,29 +514,19 @@ xfs_ialloc_ag_alloc( agi->agi_newino = cpu_to_be32(newino); /* - * Insert records describing the new inode chunk into the btree. + * Insert records describing the new inode chunk into the btrees. */ - cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); - for (thisino = newino; - thisino < newino + newlen; - thisino += XFS_INODES_PER_CHUNK) { - cur->bc_rec.i.ir_startino = thisino; - cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; - cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; - error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; - } - ASSERT(i == 0); - error = xfs_btree_insert(cur, &i); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_INO); + if (error) + return error; + + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_FINO); + if (error) return error; - } - ASSERT(i == 1); } - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); /* * Log allocation group header fields */ @@ -585,7 +635,7 @@ xfs_ialloc_ag_select( * Is there enough free space for the file plus a block of * inodes? (if we need to allocate some)? */ - ineed = XFS_IALLOC_BLOCKS(mp); + ineed = mp->m_ialloc_blks; longest = pag->pagf_longest; if (!longest) longest = pag->pagf_flcount > 0; @@ -670,13 +720,10 @@ xfs_ialloc_get_rec( } /* - * Allocate an inode. - * - * The caller selected an AG for us, and made sure that free inodes are - * available. + * Allocate an inode using the inobt-only algorithm. */ STATIC int -xfs_dialloc_ag( +xfs_dialloc_ag_inobt( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_ino_t parent, @@ -702,7 +749,7 @@ xfs_dialloc_ag( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -935,6 +982,294 @@ error0: } /* + * Use the free inode btree to allocate an inode based on distance from the + * parent. Note that the provided cursor may be deleted and replaced. + */ +STATIC int +xfs_dialloc_ag_finobt_near( + xfs_agino_t pagino, + struct xfs_btree_cur **ocur, + struct xfs_inobt_rec_incore *rec) +{ + struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ + struct xfs_btree_cur *rcur; /* right search cursor */ + struct xfs_inobt_rec_incore rrec; + int error; + int i, j; + + error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); + if (error) + return error; + + if (i == 1) { + error = xfs_inobt_get_rec(lcur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + /* + * See if we've landed in the parent inode record. The finobt + * only tracks chunks with at least one free inode, so record + * existence is enough. + */ + if (pagino >= rec->ir_startino && + pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) + return 0; + } + + error = xfs_btree_dup_cursor(lcur, &rcur); + if (error) + return error; + + error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); + if (error) + goto error_rcur; + if (j == 1) { + error = xfs_inobt_get_rec(rcur, &rrec, &j); + if (error) + goto error_rcur; + XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); + } + + XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); + if (i == 1 && j == 1) { + /* + * Both the left and right records are valid. Choose the closer + * inode chunk to the target. + */ + if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > + (rrec.ir_startino - pagino)) { + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else { + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + } else if (j == 1) { + /* only the right record is valid */ + *rec = rrec; + xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); + *ocur = rcur; + } else if (i == 1) { + /* only the left record is valid */ + xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); + } + + return 0; + +error_rcur: + xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Use the free inode btree to find a free inode based on a newino hint. If + * the hint is NULL, find the first free inode in the AG. + */ +STATIC int +xfs_dialloc_ag_finobt_newino( + struct xfs_agi *agi, + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *rec) +{ + int error; + int i; + + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { + error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, + &i); + if (error) + return error; + if (i == 1) { + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; + } + } + + /* + * Find the first inode available in the AG. + */ + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + return 0; +} + +/* + * Update the inobt based on a modification made to the finobt. Also ensure that + * the records from both trees are equivalent post-modification. + */ +STATIC int +xfs_dialloc_ag_update_inobt( + struct xfs_btree_cur *cur, /* inobt cursor */ + struct xfs_inobt_rec_incore *frec, /* finobt record */ + int offset) /* inode offset */ +{ + struct xfs_inobt_rec_incore rec; + int error; + int i; + + error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + + XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && + (rec.ir_freecount == frec->ir_freecount)); + + error = xfs_inobt_update(cur, &rec); + if (error) + return error; + + return 0; +} + +/* + * Allocate an inode using the free inode btree, if available. Otherwise, fall + * back to the inobt search algorithm. + * + * The caller selected an AG for us, and made sure that free inodes are + * available. + */ +STATIC int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; /* finobt cursor */ + struct xfs_btree_cur *icur; /* inobt cursor */ + struct xfs_inobt_rec_incore rec; + xfs_ino_t ino; + int error; + int offset; + int i; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + + pag = xfs_perag_get(mp, agno); + + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = be32_to_cpu(agi->agi_newino); + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_cur; + + /* + * The search algorithm depends on whether we're in the same AG as the + * parent. If so, find the closest available inode to the parent. If + * not, consider the agi hint or find the first free inode in the AG. + */ + if (agno == pagno) + error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); + else + error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); + if (error) + goto error_cur; + + offset = xfs_lowbit64(rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + + /* + * Modify or remove the finobt record. + */ + rec.ir_free &= ~XFS_INOBT_MASK(offset); + rec.ir_freecount--; + if (rec.ir_freecount) + error = xfs_inobt_update(cur, &rec); + else + error = xfs_btree_delete(cur, &i); + if (error) + goto error_cur; + + /* + * The finobt has now been updated appropriately. We haven't updated the + * agi and superblock yet, so we can create an inobt cursor and validate + * the original freecount. If all is well, make the equivalent update to + * the inobt using the finobt record and offset information. + */ + icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + + error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); + if (error) + goto error_icur; + + /* + * Both trees have now been updated. We must update the perag and + * superblock before we can check the freecount for each btree. + */ + be32_add_cpu(&agi->agi_freecount, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + pag->pagi_freecount--; + + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + + error = xfs_check_agi_freecount(icur, agi); + if (error) + goto error_icur; + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error_icur; + + xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_perag_put(pag); + *inop = ino; + return 0; + +error_icur: + xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); +error_cur: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); + return error; +} + +/* * Allocate an inode on disk. * * Mode is used to tell whether the new inode will need space, and whether it @@ -999,7 +1334,7 @@ xfs_dialloc( * inode. */ if (mp->m_maxicount && - mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { noroom = 1; okalloc = 0; } @@ -1050,7 +1385,7 @@ xfs_dialloc( if (error) { xfs_trans_brelse(tp, agbp); - if (error != ENOSPC) + if (error != -ENOSPC) goto out_error; xfs_perag_put(pag); @@ -1081,7 +1416,7 @@ nextag: agno = 0; if (agno == start_agno) { *inop = NULLFSINO; - return noroom ? ENOSPC : 0; + return noroom ? -ENOSPC : 0; } } @@ -1090,81 +1425,37 @@ out_alloc: return xfs_dialloc_ag(tp, agbp, parent, inop); out_error: xfs_perag_put(pag); - return XFS_ERROR(error); + return error; } -/* - * Free disk inode. Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int -xfs_difree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - xfs_bmap_free_t *flist, /* extents to free */ - int *delete, /* set if inode cluster was deleted */ - xfs_ino_t *first_ino) /* first inode in deleted cluster */ +STATIC int +xfs_difree_inobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_bmap_free *flist, + int *deleted, + xfs_ino_t *first_ino, + struct xfs_inobt_rec_incore *orec) { - /* REFERENCED */ - xfs_agblock_t agbno; /* block number containing inode */ - xfs_buf_t *agbp; /* buffer containing allocation group header */ - xfs_agino_t agino; /* inode number relative to allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agi_t *agi; /* allocation group header */ - xfs_btree_cur_t *cur; /* inode btree cursor */ - int error; /* error return value */ - int i; /* result code */ - int ilen; /* inodes in an inode cluster */ - xfs_mount_t *mp; /* mount structure for filesystem */ - int off; /* offset of inode in inode chunk */ - xfs_inobt_rec_incore_t rec; /* btree record */ - struct xfs_perag *pag; - - mp = tp->t_mountp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_perag *pag; + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int ilen; + int error; + int i; + int off; - /* - * Break up inode number into its components. - */ - agno = XFS_INO_TO_AGNO(mp, inode); - if (agno >= mp->m_sb.sb_agcount) { - xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", - __func__, agno, mp->m_sb.sb_agcount); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", - __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); - ASSERT(0); - return XFS_ERROR(EINVAL); - } - /* - * Get the allocation group header. - */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", - __func__, error); - return error; - } - agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); - ASSERT(agbno < be32_to_cpu(agi->agi_length)); + ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); + /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1202,9 +1493,9 @@ xfs_difree( * When an inode cluster is free, it becomes eligible for removal */ if (!(mp->m_flags & XFS_MOUNT_IKEEP) && - (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { + (rec.ir_freecount == mp->m_ialloc_inos)) { - *delete = 1; + *deleted = 1; *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); /* @@ -1212,7 +1503,7 @@ xfs_difree( * AGI and Superblock inode counts, and mark the disk space * to be freed when the transaction is committed. */ - ilen = XFS_IALLOC_INODES(mp); + ilen = mp->m_ialloc_inos; be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); @@ -1228,11 +1519,11 @@ xfs_difree( goto error0; } - xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, - agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), - XFS_IALLOC_BLOCKS(mp), flist, mp); + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, + XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), + mp->m_ialloc_blks, flist, mp); } else { - *delete = 0; + *deleted = 0; error = xfs_inobt_update(cur, &rec); if (error) { @@ -1256,6 +1547,7 @@ xfs_difree( if (error) goto error0; + *orec = rec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; @@ -1264,6 +1556,182 @@ error0: return error; } +/* + * Free an inode in the free inode btree. + */ +STATIC int +xfs_difree_finobt( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agino_t agino, + struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + struct xfs_btree_cur *cur; + struct xfs_inobt_rec_incore rec; + int offset = agino - ibtrec->ir_startino; + int error; + int i; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + + error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + if (i == 0) { + /* + * If the record does not exist in the finobt, we must have just + * freed an inode in a previously fully allocated chunk. If not, + * something is out of sync. + */ + XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + + error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, + ibtrec->ir_free, &i); + if (error) + goto error; + ASSERT(i == 1); + + goto out; + } + + /* + * Read and update the existing record. We could just copy the ibtrec + * across here, but that would defeat the purpose of having redundant + * metadata. By making the modifications independently, we can catch + * corruptions that we wouldn't see if we just copied from one record + * to another. + */ + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(i == 1, error); + + rec.ir_free |= XFS_INOBT_MASK(offset); + rec.ir_freecount++; + + XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && + (rec.ir_freecount == ibtrec->ir_freecount), + error); + + /* + * The content of inobt records should always match between the inobt + * and finobt. The lifecycle of records in the finobt is different from + * the inobt in that the finobt only tracks records with at least one + * free inode. Hence, if all of the inodes are free and we aren't + * keeping inode chunks permanently on disk, remove the record. + * Otherwise, update the record with the new information. + */ + if (rec.ir_freecount == mp->m_ialloc_inos && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + error = xfs_btree_delete(cur, &i); + if (error) + goto error; + ASSERT(i == 1); + } else { + error = xfs_inobt_update(cur, &rec); + if (error) + goto error; + } + +out: + error = xfs_check_agi_freecount(cur, agi); + if (error) + goto error; + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; + +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t inode, /* inode to be freed */ + struct xfs_bmap_free *flist, /* extents to free */ + int *deleted,/* set if inode cluster was deleted */ + xfs_ino_t *first_ino)/* first inode in deleted cluster */ +{ + /* REFERENCED */ + xfs_agblock_t agbno; /* block number containing inode */ + struct xfs_buf *agbp; /* buffer for allocation group header */ + xfs_agino_t agino; /* allocation group inode number */ + xfs_agnumber_t agno; /* allocation group number */ + int error; /* error return value */ + struct xfs_mount *mp; /* mount structure for filesystem */ + struct xfs_inobt_rec_incore rec;/* btree record */ + + mp = tp->t_mountp; + + /* + * Break up inode number into its components. + */ + agno = XFS_INO_TO_AGNO(mp, inode); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", + __func__, agno, mp->m_sb.sb_agcount); + ASSERT(0); + return -EINVAL; + } + agino = XFS_INO_TO_AGINO(mp, inode); + if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + __func__, (unsigned long long)inode, + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); + ASSERT(0); + return -EINVAL; + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", + __func__, agbno, mp->m_sb.sb_agblocks); + ASSERT(0); + return -EINVAL; + } + /* + * Get the allocation group header. + */ + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", + __func__, error); + return error; + } + + /* + * Fix up the inode allocation btree. + */ + error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, + &rec); + if (error) + goto error0; + + /* + * Fix up the free inode btree. + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); + if (error) + goto error0; + } + + return 0; + +error0: + return error; +} + STATIC int xfs_imap_lookup( struct xfs_mount *mp, @@ -1295,13 +1763,13 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) error = xfs_inobt_get_rec(cur, &rec, &i); if (!error && i == 0) - error = EINVAL; + error = -EINVAL; } xfs_trans_brelse(tp, agbp); @@ -1311,13 +1779,13 @@ xfs_imap_lookup( /* check that the returned record contains the required inode */ if (rec.ir_startino > agino || - rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) - return EINVAL; + rec.ir_startino + mp->m_ialloc_inos <= agino) + return -EINVAL; /* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) - return EINVAL; + return -EINVAL; *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); *offset_agbno = agbno - *chunk_agbno; @@ -1361,7 +1829,7 @@ xfs_imap( * as they can be invalid without implying corruption. */ if (flags & XFS_IGET_UNTRUSTED) - return XFS_ERROR(EINVAL); + return -EINVAL; if (agno >= mp->m_sb.sb_agcount) { xfs_alert(mp, "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", @@ -1381,10 +1849,10 @@ xfs_imap( } xfs_stack_trace(); #endif /* DEBUG */ - return XFS_ERROR(EINVAL); + return -EINVAL; } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + blks_per_cluster = xfs_icluster_size_fsb(mp); /* * For bulkstat and handle lookups, we have an untrusted inode number @@ -1405,7 +1873,7 @@ xfs_imap( * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. */ - if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { + if (blks_per_cluster == 1) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); @@ -1454,7 +1922,7 @@ out_map: __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; } @@ -1483,7 +1951,16 @@ xfs_ialloc_compute_maxlevels( } /* - * Log specified fields for the ag hdr (inode section) + * Log specified fields for the ag hdr (inode section). The growth of the agi + * structure over time requires that we interpret the buffer as two logical + * regions delineated by the end of the unlinked list. This is due to the size + * of the hash table and its location in the middle of the agi. + * + * For example, a request to log a field before agi_unlinked and a field after + * agi_unlinked could cause us to log the entire hash table and use an excessive + * amount of log space. To avoid this behavior, log the region up through + * agi_unlinked in one call and the region after agi_unlinked through the end of + * the structure in another. */ void xfs_ialloc_log_agi( @@ -1506,6 +1983,8 @@ xfs_ialloc_log_agi( offsetof(xfs_agi_t, agi_newino), offsetof(xfs_agi_t, agi_dirino), offsetof(xfs_agi_t, agi_unlinked), + offsetof(xfs_agi_t, agi_free_root), + offsetof(xfs_agi_t, agi_free_level), sizeof(xfs_agi_t) }; #ifdef DEBUG @@ -1514,15 +1993,30 @@ xfs_ialloc_log_agi( agi = XFS_BUF_TO_AGI(bp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); + /* - * Compute byte offsets for the first and last fields. + * Compute byte offsets for the first and last fields in the first + * region and log the agi buffer. This only logs up through + * agi_unlinked. */ - xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); + if (fields & XFS_AGI_ALL_BITS_R1) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } + /* - * Log the allocation group inode header buffer. + * Mask off the bits in the first region and calculate the first and + * last field offsets for any bits in the second region. */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - xfs_trans_log_buf(tp, bp, first, last); + fields &= ~XFS_AGI_ALL_BITS_R1; + if (fields) { + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, + &first, &last); + xfs_trans_log_buf(tp, bp, first, last); + } } #ifdef DEBUG @@ -1575,18 +2069,17 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agi_ok = 1; - if (xfs_sb_version_hascrc(&mp->m_sb)) - agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agi, agi_crc)); - agi_ok = agi_ok && xfs_agi_verify(bp); - - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, - XFS_RANDOM_IALLOC_READ_AGI))) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, + XFS_ERRTAG_IALLOC_READ_AGI, + XFS_RANDOM_IALLOC_READ_AGI)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -1597,8 +2090,8 @@ xfs_agi_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agi_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -1607,8 +2100,7 @@ xfs_agi_write_verify( if (bip) XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agi, agi_crc)); + xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); } const struct xfs_buf_ops xfs_agi_buf_ops = { @@ -1637,7 +2129,6 @@ xfs_read_agi( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; } diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index a8f76a5ff418..95ad1c002d60 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -25,17 +25,18 @@ struct xfs_mount; struct xfs_trans; struct xfs_btree_cur; -/* - * Allocation parameters for inode allocation. - */ -#define XFS_IALLOC_INODES(mp) (mp)->m_ialloc_inos -#define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks - -/* - * Move inodes in clusters of this size. - */ +/* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 -#define XFS_INODE_CLUSTER_SIZE(mp) (mp)->m_inode_cluster_size + +/* Calculate and return the number of filesystem blocks per inode cluster */ +static inline int +xfs_icluster_size_fsb( + struct xfs_mount *mp) +{ + if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) + return 1; + return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; +} /* * Make an inode pointer out of the buffer/offset. @@ -89,7 +90,7 @@ xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ struct xfs_bmap_free *flist, /* extents to free */ - int *delete, /* set if inode cluster was deleted */ + int *deleted, /* set if inode cluster was deleted */ xfs_ino_t *first_ino); /* first inode in deleted cluster */ /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index c8fa5bbb36de..c9b06f30fe86 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -49,7 +49,8 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); } STATIC void @@ -66,12 +67,26 @@ xfs_inobt_set_root( xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); } +STATIC void +xfs_finobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_free_root = nptr->s; + be32_add_cpu(&agi->agi_free_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, + XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); +} + STATIC int xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int length, int *stat) { xfs_alloc_arg_t args; /* block allocation args */ @@ -173,6 +188,17 @@ xfs_inobt_init_ptr_from_cur( ptr->s = agi->agi_root; } +STATIC void +xfs_finobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ptr->s = agi->agi_free_root; +} + STATIC __int64_t xfs_inobt_key_diff( struct xfs_btree_cur *cur, @@ -203,6 +229,7 @@ xfs_inobt_verify( */ switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): + case cpu_to_be32(XFS_FIBT_CRC_MAGIC): if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) @@ -214,6 +241,7 @@ xfs_inobt_verify( return false; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): + case cpu_to_be32(XFS_FIBT_MAGIC): break; default: return 0; @@ -243,12 +271,14 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_sblock_verify_crc(bp) && - xfs_inobt_verify(bp))) { + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_inobt_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); } } @@ -258,9 +288,9 @@ xfs_inobt_write_verify( { if (!xfs_inobt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_btree_sblock_calc_crc(bp); @@ -315,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = { #endif }; +static const struct xfs_btree_ops xfs_finobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + + .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_finobt_set_root, + .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, + .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, + .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, + .buf_ops = &xfs_inobt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif +}; + /* * Allocate a new inode btree cursor. */ @@ -323,7 +375,8 @@ xfs_inobt_init_cursor( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *agbp, /* buffer for agi structure */ - xfs_agnumber_t agno) /* allocation group number */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* ialloc or free ino btree */ { struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; @@ -332,11 +385,17 @@ xfs_inobt_init_cursor( cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - cur->bc_btnum = XFS_BTNUM_INO; + cur->bc_btnum = btnum; + if (btnum == XFS_BTNUM_INO) { + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + cur->bc_ops = &xfs_inobt_ops; + } else { + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur->bc_ops = &xfs_finobt_ops; + } + cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_inobt_ops; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index f38b22011c4e..d7ebea72c2d0 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -58,7 +58,8 @@ struct xfs_mount; ((index) - 1) * sizeof(xfs_inobt_ptr_t))) extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, - struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); + struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, + xfs_btnum_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 4fc9f39dd89e..f18fd2da49f7 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -101,9 +101,8 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, EFSCORRUPTED); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, - mp, dip); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -175,14 +174,14 @@ xfs_imap_to_bp( (int)imap->im_len, buf_flags, &bp, &xfs_inode_buf_ops); if (error) { - if (error == EAGAIN) { + if (error == -EAGAIN) { ASSERT(buf_flags & XBF_TRYLOCK); return error; } - if (error == EFSCORRUPTED && + if (error == -EFSCORRUPTED && (iget_flags & XFS_IGET_UNTRUSTED)) - return XFS_ERROR(EINVAL); + return -EINVAL; xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", __func__, error); @@ -306,7 +305,7 @@ xfs_dinode_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - offsetof(struct xfs_dinode, di_crc))) + XFS_DINODE_CRC_OFF)) return false; if (be64_to_cpu(dip->di_ino) != ip->i_ino) return false; @@ -327,7 +326,7 @@ xfs_dinode_calc_crc( ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, - offsetof(struct xfs_dinode, di_crc)); + XFS_DINODE_CRC_OFF); dip->di_crc = xfs_end_cksum(crc); } @@ -391,7 +390,7 @@ xfs_iread( __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_brelse; } @@ -438,17 +437,16 @@ xfs_iread( } /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. + * Automatically convert version 1 inode formats in memory to version 2 + * inode format. If the inode is modified, it will get logged and + * rewritten as a version 2 inode. We can do this because we set the + * superblock feature bit for v2 inodes unconditionally during mount + * and it means the reast of the code can assume the inode version is 2 + * or higher. */ if (ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; + memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 9308c47f2a52..9308c47f2a52 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index cfee14a83cfe..6a00f7fed69d 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -102,7 +102,7 @@ xfs_iformat_fork( be64_to_cpu(dip->di_nblocks)); XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { @@ -111,7 +111,7 @@ xfs_iformat_fork( dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && @@ -121,7 +121,7 @@ xfs_iformat_fork( ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } switch (ip->i_d.di_mode & S_IFMT) { @@ -132,7 +132,7 @@ xfs_iformat_fork( if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ip->i_d.di_size = 0; ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); @@ -153,7 +153,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(4)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } di_size = be64_to_cpu(dip->di_size); @@ -166,7 +166,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(5)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } size = (int)di_size; @@ -181,13 +181,13 @@ xfs_iformat_fork( default: XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } break; default: XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (error) { return error; @@ -211,7 +211,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(8)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); @@ -223,7 +223,7 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); break; default: - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; break; } if (error) { @@ -266,7 +266,7 @@ xfs_iformat_local( XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp = XFS_IFORK_PTR(ip, whichfork); real_size = 0; @@ -322,7 +322,7 @@ xfs_iformat_extents( (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp->if_real_bytes = 0; @@ -350,7 +350,7 @@ xfs_iformat_extents( XFS_ERROR_REPORT("xfs_iformat_extents(2)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } } ifp->if_flags |= XFS_IFEXTENTS; @@ -399,7 +399,7 @@ xfs_iformat_btree( (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, mp, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp->if_broot_bytes = size; @@ -431,10 +431,12 @@ xfs_iread_extents( xfs_ifork_t *ifp; xfs_extnum_t nextents; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } nextents = XFS_IFORK_NEXTENTS(ip, whichfork); ifp = XFS_IFORK_PTR(ip, whichfork); @@ -526,7 +528,7 @@ xfs_iroot_realloc( ifp->if_broot_bytes = (int)new_size; ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= XFS_IFORK_SIZE(ip, whichfork)); - memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); + memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t)); return; } @@ -573,7 +575,7 @@ xfs_iroot_realloc( ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, (int)new_size); - memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); + memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t)); } kmem_free(ifp->if_broot); ifp->if_broot = new_broot; @@ -721,15 +723,16 @@ xfs_idestroy_fork( } /* - * xfs_iextents_copy() + * Convert in-core extents to on-disk form * - * This is called to copy the REAL extents (as opposed to the delayed - * allocation extents) from the inode into the given buffer. It - * returns the number of bytes copied into the buffer. + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. * - * If there are no delayed allocation extents, then we can just - * memcpy() the extents into the buffer. Otherwise, we need to - * examine each extent in turn and skip those which are delayed. + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only copy on-disk extents + * here, so callers must always use the physical fork size to determine the + * size of the buffer passed to this routine. We will return the size actually + * used. */ int xfs_iextents_copy( @@ -795,8 +798,7 @@ xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, xfs_inode_log_item_t *iip, - int whichfork, - xfs_buf_t *bp) + int whichfork) { char *cp; xfs_ifork_t *ifp; @@ -1690,7 +1692,7 @@ xfs_iext_idx_to_irec( } *idxp = page_idx; *erp_idxp = erp_idx; - return(erp); + return erp; } /* diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index eb329a1ea888..7d3b1ed6dcbe 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -127,8 +127,7 @@ typedef struct xfs_ifork { int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, - struct xfs_inode_log_item *, int, - struct xfs_buf *); + struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); void xfs_iroot_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h index 90efdaf1706f..4ff2278e147a 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/libxfs/xfs_inum.h @@ -54,11 +54,7 @@ struct xfs_mount; #define XFS_OFFBNO_TO_AGINO(mp,b,o) \ ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) -#if XFS_BIG_INUMS #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) -#else -#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) -#endif #define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) #endif /* __XFS_INUM_H__ */ diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index f0969c77bdbe..aff12f2d4428 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -380,7 +380,7 @@ typedef struct xfs_icdinode { xfs_ictimestamp_t di_mtime; /* time last modified */ xfs_ictimestamp_t di_ctime; /* time created/inode modified */ xfs_fsize_t di_size; /* number of bytes in file */ - xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ + xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ xfs_extnum_t di_nextents; /* number of extents in data fork */ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ @@ -516,7 +516,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) * EFI/EFD log format definitions */ typedef struct xfs_extent { - xfs_dfsbno_t ext_start; + xfs_fsblock_t ext_start; xfs_extlen_t ext_len; } xfs_extent_t; diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 1c55ccbb379d..1c55ccbb379d 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 2af1a0a4d0f1..ee7e0e80246b 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -42,7 +42,7 @@ xfs_log_calc_max_attrsetm_res( int size; int nblks; - size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) - + size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - MAXNAMELEN - 1; nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); nblks += XFS_B_TO_FSB(mp, size); diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index b3b2b1065c0f..1b0a08379759 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -98,8 +98,6 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ XFS_GQUOTA_ACTIVE | \ XFS_PQUOTA_ACTIVE)) -#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ - XFS_PQUOTA_ACTIVE)) #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) @@ -156,6 +154,6 @@ typedef __uint16_t xfs_qwarncnt_t; extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, uint flags, char *str); -extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); +extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index b1f2fe8af4a8..f4dd697cac08 100644 --- a/fs/xfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -74,7 +74,6 @@ xfs_rtbuf_get( mp->m_bsize, 0, &bp, NULL); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); *bpp = bp; return 0; } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index b7c9aea77f8f..ad525a5623a4 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -186,13 +186,13 @@ xfs_mount_validate_sb( */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); - return XFS_ERROR(EWRONGFS); + return -EWRONGFS; } if (!xfs_sb_good_version(sbp)) { xfs_warn(mp, "bad version"); - return XFS_ERROR(EWRONGFS); + return -EWRONGFS; } /* @@ -201,10 +201,6 @@ xfs_mount_validate_sb( * write validation, we don't need to check feature masks. */ if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { - xfs_alert(mp, -"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n" -"Use of these features in this kernel is at your own risk!"); - if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, @@ -224,7 +220,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write.\n" "Filesystem can only be safely mounted read only."); - return XFS_ERROR(EINVAL); + return -EINVAL; } } if (xfs_sb_has_incompat_feature(sbp, @@ -234,7 +230,7 @@ xfs_mount_validate_sb( "Filesystem can not be safely mounted by this kernel.", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -242,13 +238,13 @@ xfs_mount_validate_sb( if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, "Version 5 of Super block has XFS_OQUOTA bits."); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, "Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( @@ -256,7 +252,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "filesystem is marked as having an external log; " "specify logdev on the mount command line."); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (unlikely( @@ -264,7 +260,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "filesystem is marked as having an internal log; " "do not specify logdev on the mount command line."); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -288,16 +284,17 @@ xfs_mount_validate_sb( sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || + sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || - sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); - return XFS_ERROR(EFSCORRUPTED); + sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || + sbp->sb_shared_vn != 0)) { + xfs_notice(mp, "SB sanity check failed"); + return -EFSCORRUPTED; } /* @@ -308,7 +305,7 @@ xfs_mount_validate_sb( "File system with blocksize %d bytes. " "Only pagesize (%ld) or less will currently work.", sbp->sb_blocksize, PAGE_SIZE); - return XFS_ERROR(ENOSYS); + return -ENOSYS; } /* @@ -323,29 +320,20 @@ xfs_mount_validate_sb( default: xfs_warn(mp, "inode size of %d bytes not supported", sbp->sb_inodesize); - return XFS_ERROR(ENOSYS); + return -ENOSYS; } if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_warn(mp, "file system too large to be mounted on this system."); - return XFS_ERROR(EFBIG); + return -EFBIG; } if (check_inprogress && sbp->sb_inprogress) { xfs_warn(mp, "Offline file system operation in progress!"); - return XFS_ERROR(EFSCORRUPTED); - } - - /* - * Version 1 directory format has never worked on Linux. - */ - if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { - xfs_warn(mp, "file system using version 1 directory format"); - return XFS_ERROR(ENOSYS); + return -EFSCORRUPTED; } - return 0; } @@ -398,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) } } -void -xfs_sb_from_disk( +static void +__xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + xfs_dsb_t *from, + bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); @@ -457,6 +446,17 @@ xfs_sb_from_disk( to->sb_pad = 0; to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); + /* Convert on-disk flags to in-memory flags? */ + if (convert_xquota) + xfs_sb_quota_from_disk(to); +} + +void +xfs_sb_from_disk( + struct xfs_sb *to, + xfs_dsb_t *from) +{ + __xfs_sb_from_disk(to, from, true); } static inline void @@ -495,10 +495,16 @@ xfs_sb_quota_to_disk( } /* - * GQUOTINO and PQUOTINO cannot be used together in versions - * of superblock that do not have pquotino. from->sb_flags - * tells us which quota is active and should be copied to - * disk. + * GQUOTINO and PQUOTINO cannot be used together in versions of + * superblock that do not have pquotino. from->sb_flags tells us which + * quota is active and should be copied to disk. If neither are active, + * make sure we write NULLFSINO to the sb_gquotino field as a quota + * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature + * bit is set. + * + * Note that we don't need to handle the sb_uquotino or sb_pquotino here + * as they do not require any translation. Hence the main sb field loop + * will write them appropriately from the in-core superblock. */ if ((*fields & XFS_SB_GQUOTINO) && (from->sb_qflags & XFS_GQUOTA_ACCT)) @@ -506,6 +512,17 @@ xfs_sb_quota_to_disk( else if ((*fields & XFS_SB_PQUOTINO) && (from->sb_qflags & XFS_PQUOTA_ACCT)) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); + else { + /* + * We can't rely on just the fields being logged to tell us + * that it is safe to write NULLFSINO - we should only do that + * if quotas are not actually enabled. Hence only write + * NULLFSINO if both in-core quota inodes are NULL. + */ + if (from->sb_gquotino == NULLFSINO && + from->sb_pquotino == NULLFSINO) + to->sb_gquotino = cpu_to_be64(NULLFSINO); + } *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); } @@ -572,7 +589,11 @@ xfs_sb_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_sb sb; - xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); + /* + * Use call variant which doesn't convert quota flags from disk + * format, because xfs_mount_validate_sb checks the on-disk flags. + */ + __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); /* * Only check the in progress field for the primary superblock as @@ -611,12 +632,11 @@ xfs_sb_read_verify( XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { - if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), - offsetof(struct xfs_sb, sb_crc))) { + if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn != XFS_SB_DADDR && + if (bp->b_bn == XFS_SB_DADDR || xfs_sb_version_hascrc(&mp->m_sb)) { - error = EFSCORRUPTED; + error = -EFSBADCRC; goto out_error; } } @@ -625,10 +645,9 @@ xfs_sb_read_verify( out_error: if (error) { - if (error != EWRONGFS) - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); xfs_buf_ioerror(bp, error); + if (error == -EFSCORRUPTED || error == -EFSBADCRC) + xfs_verifier_error(bp); } } @@ -644,14 +663,13 @@ xfs_sb_quiet_read_verify( { struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); return; } /* quietly fail */ - xfs_buf_ioerror(bp, EWRONGFS); + xfs_buf_ioerror(bp, -EWRONGFS); } static void @@ -664,9 +682,8 @@ xfs_sb_write_verify( error = xfs_sb_verify(bp, false); if (error) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); xfs_buf_ioerror(bp, error); + xfs_verifier_error(bp); return; } @@ -676,8 +693,7 @@ xfs_sb_write_verify( if (bip) XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_sb, sb_crc)); + xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); } const struct xfs_buf_ops xfs_sb_buf_ops = { diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 35061d4b614c..2e739708afd3 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -36,8 +36,6 @@ struct xfs_trans; #define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */ #define XFS_SB_VERSION_NUMBITS 0x000f #define XFS_SB_VERSION_ALLFBITS 0xfff0 -#define XFS_SB_VERSION_SASHFBITS 0xf000 -#define XFS_SB_VERSION_REALFBITS 0x0ff0 #define XFS_SB_VERSION_ATTRBIT 0x0010 #define XFS_SB_VERSION_NLINKBIT 0x0020 #define XFS_SB_VERSION_QUOTABIT 0x0040 @@ -50,24 +48,15 @@ struct xfs_trans; #define XFS_SB_VERSION_DIRV2BIT 0x2000 #define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ #define XFS_SB_VERSION_MOREBITSBIT 0x8000 -#define XFS_SB_VERSION_OKSASHFBITS \ - (XFS_SB_VERSION_EXTFLGBIT | \ - XFS_SB_VERSION_DIRV2BIT | \ - XFS_SB_VERSION_BORGBIT) -#define XFS_SB_VERSION_OKREALFBITS \ - (XFS_SB_VERSION_ATTRBIT | \ - XFS_SB_VERSION_NLINKBIT | \ - XFS_SB_VERSION_QUOTABIT | \ - XFS_SB_VERSION_ALIGNBIT | \ - XFS_SB_VERSION_DALIGNBIT | \ - XFS_SB_VERSION_SHAREDBIT | \ - XFS_SB_VERSION_LOGV2BIT | \ - XFS_SB_VERSION_SECTORBIT | \ - XFS_SB_VERSION_MOREBITSBIT) -#define XFS_SB_VERSION_OKREALBITS \ - (XFS_SB_VERSION_NUMBITS | \ - XFS_SB_VERSION_OKREALFBITS | \ - XFS_SB_VERSION_OKSASHFBITS) + +/* + * Supported feature bit list is just all bits in the versionnum field because + * we've used them all up and understand them all. Except, of course, for the + * shared superblock bit, which nobody knows what it does and so is unsupported. + */ +#define XFS_SB_VERSION_OKBITS \ + ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ + ~XFS_SB_VERSION_SHAREDBIT) /* * There are two words to hold XFS "feature" bits: the original @@ -76,7 +65,6 @@ struct xfs_trans; * * These defines represent bits in sb_features2. */ -#define XFS_SB_VERSION2_REALFBITS 0x00ffffff /* Mask: features */ #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 @@ -86,16 +74,11 @@ struct xfs_trans; #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ #define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */ -#define XFS_SB_VERSION2_OKREALFBITS \ +#define XFS_SB_VERSION2_OKBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ XFS_SB_VERSION2_ATTR2BIT | \ XFS_SB_VERSION2_PROJID32BIT | \ XFS_SB_VERSION2_FTYPE) -#define XFS_SB_VERSION2_OKSASHFBITS \ - (0) -#define XFS_SB_VERSION2_OKREALBITS \ - (XFS_SB_VERSION2_OKREALFBITS | \ - XFS_SB_VERSION2_OKSASHFBITS ) /* * Superblock - in core version. Must match the ondisk version below. @@ -104,11 +87,11 @@ struct xfs_trans; typedef struct xfs_sb { __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ __uint32_t sb_blocksize; /* logical block size, bytes */ - xfs_drfsbno_t sb_dblocks; /* number of data blocks */ - xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */ - xfs_drtbno_t sb_rextents; /* number of realtime extents */ + xfs_rfsblock_t sb_dblocks; /* number of data blocks */ + xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ + xfs_rtblock_t sb_rextents; /* number of realtime extents */ uuid_t sb_uuid; /* file system unique id */ - xfs_dfsbno_t sb_logstart; /* starting block of log if internal */ + xfs_fsblock_t sb_logstart; /* starting block of log if internal */ xfs_ino_t sb_rootino; /* root inode number */ xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ @@ -182,6 +165,8 @@ typedef struct xfs_sb { /* must be padded to 64 bit alignment */ } xfs_sb_t; +#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) + /* * Superblock - on disk version. Must match the in core version above. * Must be padded to 64 bit alignment. @@ -343,214 +328,140 @@ typedef enum { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline int xfs_sb_good_version(xfs_sb_t *sbp) -{ - /* We always support version 1-3 */ - if (sbp->sb_versionnum >= XFS_SB_VERSION_1 && - sbp->sb_versionnum <= XFS_SB_VERSION_3) - return 1; - - /* We support version 4 if all feature bits are supported */ - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) { - if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) - return 0; - - if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) - return 0; - return 1; - } - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) - return 1; - - return 0; -} - /* - * Detect a mismatched features2 field. Older kernels read/wrote - * this into the wrong slot, so to be safe we keep them in sync. + * The first XFS version we support is a v4 superblock with V2 directories. */ -static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp) +static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) { - return (sbp->sb_bad_features2 != sbp->sb_features2); -} - -static inline unsigned xfs_sb_version_tonew(unsigned v) -{ - if (v == XFS_SB_VERSION_1) - return XFS_SB_VERSION_4; - - if (v == XFS_SB_VERSION_2) - return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) + return false; - return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT | - XFS_SB_VERSION_NLINKBIT; -} + /* check for unknown features in the fs */ + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) + return false; -static inline unsigned xfs_sb_version_toold(unsigned v) -{ - if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) - return 0; - if (v & XFS_SB_VERSION_NLINKBIT) - return XFS_SB_VERSION_3; - if (v & XFS_SB_VERSION_ATTRBIT) - return XFS_SB_VERSION_2; - return XFS_SB_VERSION_1; + return true; } -static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) +static inline bool xfs_sb_good_version(struct xfs_sb *sbp) { - return sbp->sb_versionnum == XFS_SB_VERSION_2 || - sbp->sb_versionnum == XFS_SB_VERSION_3 || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) + return true; + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + return xfs_sb_good_v4_features(sbp); + return false; } -static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) +/* + * Detect a mismatched features2 field. Older kernels read/wrote + * this into the wrong slot, so to be safe we keep them in sync. + */ +static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) { - if (sbp->sb_versionnum == XFS_SB_VERSION_1) - sbp->sb_versionnum = XFS_SB_VERSION_2; - else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) - sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; - else - sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) { - return sbp->sb_versionnum == XFS_SB_VERSION_3 || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); + return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); } -static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) { - if (sbp->sb_versionnum <= XFS_SB_VERSION_2) - sbp->sb_versionnum = XFS_SB_VERSION_3; - else - sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; } -static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); } -static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) +static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) { - if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) - sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; - else - sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | - XFS_SB_VERSION_QUOTABIT; + sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; } -static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); } -static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); -} - -static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); } -static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); } -static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); } -static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)); + return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); } -static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); + return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); } -static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); -} - -static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && - (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT)); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || + (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); } /* * sb_features2 bit version macros. - * - * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro: - * - * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp) - * ((xfs_sb_version_hasmorebits(sbp) && - * ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT) */ - -static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); } -static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); } -static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; + sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; } -static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) { sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; + sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; if (!sbp->sb_features2) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } -static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || (xfs_sb_version_hasmorebits(sbp) && (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); } -static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp) +static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; @@ -585,7 +496,9 @@ xfs_sb_has_compat_feature( return (sbp->sb_features_compat & feature) != 0; } -#define XFS_SB_FEAT_RO_COMPAT_ALL 0 +#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ +#define XFS_SB_FEAT_RO_COMPAT_ALL \ + (XFS_SB_FEAT_RO_COMPAT_FINOBT) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -621,12 +534,12 @@ xfs_sb_has_incompat_log_feature( /* * V5 superblock specific feature checks */ -static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) +static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } -static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp) +static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -639,6 +552,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); } +static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) +{ + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 8c5035a13df1..82404da2ca67 100644 --- a/fs/xfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; #define XFS_TRANS_SB_COUNT 41 #define XFS_TRANS_CHECKPOINT 42 #define XFS_TRANS_ICREATE 43 -#define XFS_TRANS_TYPE_MAX 43 +#define XFS_TRANS_CREATE_TMPFILE 44 +#define XFS_TRANS_TYPE_MAX 44 /* new transaction types need to be reflected in xfs_logprint(8) */ #define XFS_TRANS_TYPES \ @@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ { XFS_TRANS_INACTIVE, "INACTIVE" }, \ { XFS_TRANS_CREATE, "CREATE" }, \ + { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ { XFS_TRANS_REMOVE, "REMOVE" }, \ @@ -236,7 +238,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *); int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, +bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index bf59a2b45f8c..5782f037eab4 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -80,7 +80,6 @@ xfs_symlink_hdr_set( */ bool xfs_symlink_hdr_ok( - struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, uint32_t size, @@ -133,12 +132,13 @@ xfs_symlink_read_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_dsymlink_hdr, sl_crc)) || - !xfs_symlink_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } + if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) + xfs_buf_ioerror(bp, -EFSBADCRC); + else if (!xfs_symlink_verify(bp)) + xfs_buf_ioerror(bp, -EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -153,8 +153,8 @@ xfs_symlink_write_verify( return; if (!xfs_symlink_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -162,8 +162,7 @@ xfs_symlink_write_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); } - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_dsymlink_hdr, sl_crc)); + xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); } const struct xfs_buf_ops xfs_symlink_buf_ops = { diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 2fd59c0dae66..f2bda7c76b8a 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -26,6 +26,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap_btree.h" #include "xfs_ialloc.h" @@ -81,20 +82,69 @@ xfs_calc_buf_res( * on disk. Hence we need an inode reservation function that calculates all this * correctly. So, we log: * - * - log op headers for object + * - 4 log op headers for object + * - for the ilf, the inode core and 2 forks * - inode log format object - * - the entire inode contents (core + 2 forks) - * - two bmap btree block headers + * - the inode core + * - two inode forks containing bmap btree root blocks. + * - the btree data contained by both forks will fit into the inode size, + * hence when combined with the inode core above, we have a total of the + * actual inode size. + * - the BMBT headers need to be accounted separately, as they are + * additional to the records and pointers that fit inside the inode + * forks. */ STATIC uint xfs_calc_inode_res( struct xfs_mount *mp, uint ninodes) { - return ninodes * (sizeof(struct xlog_op_header) + - sizeof(struct xfs_inode_log_format) + - mp->m_sb.sb_inodesize + - 2 * XFS_BMBT_BLOCK_LEN(mp)); + return ninodes * + (4 * sizeof(struct xlog_op_header) + + sizeof(struct xfs_inode_log_format) + + mp->m_sb.sb_inodesize + + 2 * XFS_BMBT_BLOCK_LEN(mp)); +} + +/* + * The free inode btree is a conditional feature and the log reservation + * requirements differ slightly from that of the traditional inode allocation + * btree. The finobt tracks records for inode chunks with at least one free + * inode. A record can be removed from the tree for an inode allocation + * or free and thus the finobt reservation is unconditional across: + * + * - inode allocation + * - inode free + * - inode chunk allocation + * + * The 'modify' param indicates to include the record modification scenario. The + * 'alloc' param indicates to include the reservation for free space btree + * modifications on behalf of finobt modifications. This is required only for + * transactions that do not already account for free space btree modifications. + * + * the free inode btree: max depth * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the free inode btree entry: block size + */ +STATIC uint +xfs_calc_finobt_res( + struct xfs_mount *mp, + int alloc, + int modify) +{ + uint res; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); + if (alloc) + res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (modify) + res += (uint)XFS_FSB_TO_B(mp, 1); + + return res; } /* @@ -174,7 +224,7 @@ xfs_calc_itruncate_reservation( xfs_calc_buf_res(5, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0))); } @@ -204,6 +254,19 @@ xfs_calc_rename_reservation( } /* + * For removing an inode from unlinked list at first, we can modify: + * the agi hash list and counters: sector size + * the on disk inode before ours in the agi hash list: inode cluster size + */ +STATIC uint +xfs_calc_iunlink_remove_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); +} + +/* * For creating a link to an inode: * the parent directory inode: inode size * the linked inode: inode size @@ -220,6 +283,7 @@ xfs_calc_link_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + + xfs_calc_iunlink_remove_reservation(mp) + MAX((xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), @@ -229,6 +293,18 @@ xfs_calc_link_reservation( } /* + * For adding an inode to unlinked list we can modify: + * the agi hash list: sector size + * the unlinked inode: inode size + */ +STATIC uint +xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) +{ + return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_inode_res(mp, 1); +} + +/* * For removing a directory entry we can modify: * the parent directory inode: inode size * the removed inode: inode size @@ -245,10 +321,11 @@ xfs_calc_remove_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_inode_res(mp, 2) + + xfs_calc_iunlink_add_reservation(mp) + + MAX((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), XFS_FSB_TO_B(mp, 1)))); } @@ -267,6 +344,7 @@ xfs_calc_remove_reservation( * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size + * the finobt (record modification and allocation btrees) */ STATIC uint xfs_calc_create_resv_modify( @@ -275,14 +353,15 @@ xfs_calc_create_resv_modify( return xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 1, 1); } /* * For create we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode blocks allocated: mp->m_ialloc_blks * blocksize * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size */ @@ -292,7 +371,7 @@ xfs_calc_create_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), XFS_FSB_TO_B(mp, 1)); @@ -313,6 +392,7 @@ __xfs_calc_create_reservation( * the superblock for the nlink flag: sector size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -322,7 +402,8 @@ xfs_calc_icreate_resv_alloc( mp->m_sb.sb_sectsize + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 0); } STATIC uint @@ -343,6 +424,20 @@ xfs_calc_create_reservation( } +STATIC uint +xfs_calc_create_tmpfile_reservation( + struct xfs_mount *mp) +{ + uint res = XFS_DQUOT_LOGRES(mp); + + if (xfs_sb_version_hascrc(&mp->m_sb)) + res += xfs_calc_icreate_resv_alloc(mp); + else + res += xfs_calc_create_resv_alloc(mp); + + return res + xfs_calc_iunlink_add_reservation(mp); +} + /* * Making a new directory is the same as creating a new file. */ @@ -376,6 +471,7 @@ xfs_calc_symlink_reservation( * the on disk inode before ours in the agi hash list: inode cluster size * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the finobt (record insertion, removal or modification) */ STATIC uint xfs_calc_ifree_reservation( @@ -383,14 +479,15 @@ xfs_calc_ifree_reservation( { return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + + xfs_calc_iunlink_remove_reservation(mp) + xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_finobt_res(mp, 0, 1); } /* @@ -513,7 +610,7 @@ xfs_calc_addafork_reservation( return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, mp->m_dirblksize) + + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), @@ -644,15 +741,14 @@ xfs_calc_qm_setqlim_reservation( /* * Allocating quota on disk if needed. - * the write transaction log space: M_RES(mp)->tr_write.tr_logres + * the write transaction log space for quota file extent allocation * the unit of quota allocation: one system block size */ STATIC uint xfs_calc_qm_dqalloc_reservation( struct xfs_mount *mp) { - ASSERT(M_RES(mp)->tr_write.tr_logres); - return M_RES(mp)->tr_write.tr_logres + + return xfs_calc_write_reservation(mp) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); } @@ -729,6 +825,11 @@ xfs_trans_resv_calc( resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + resp->tr_create_tmpfile.tr_logres = + xfs_calc_create_tmpfile_reservation(mp); + resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; + resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; + resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; @@ -784,7 +885,6 @@ xfs_trans_resv_calc( /* The following transaction are logged in logical format */ resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); - resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp); resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index de7de9aaad8a..1097d14cd583 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -38,11 +38,11 @@ struct xfs_trans_resv { struct xfs_trans_res tr_remove; /* unlink trans */ struct xfs_trans_res tr_symlink; /* symlink trans */ struct xfs_trans_res tr_create; /* create trans */ + struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */ struct xfs_trans_res tr_mkdir; /* mkdir trans */ struct xfs_trans_res tr_ifree; /* inode free trans */ struct xfs_trans_res tr_ichange; /* inode update trans */ struct xfs_trans_res tr_growdata; /* fs data section grow trans */ - struct xfs_trans_res tr_swrite; /* sync write inode trans */ struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ struct xfs_trans_res tr_attrinval; /* attr fork buffer @@ -100,6 +100,7 @@ struct xfs_trans_resv { #define XFS_ITRUNCATE_LOG_COUNT 2 #define XFS_INACTIVE_LOG_COUNT 2 #define XFS_CREATE_LOG_COUNT 2 +#define XFS_CREATE_TMPFILE_LOG_COUNT 2 #define XFS_MKDIR_LOG_COUNT 3 #define XFS_SYMLINK_LOG_COUNT 3 #define XFS_REMOVE_LOG_COUNT 2 diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 7d2c920dfb9c..bf9c4579334d 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -28,7 +28,8 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) -#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) +#define XFS_DAENTER_1B(mp,w) \ + ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0)) #define XFS_DAENTER_BLOCKS(mp,w) \ @@ -47,13 +48,15 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1) + ((mp)->m_ialloc_blks + \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ + ((mp)->m_in_maxlevels - 1))) /* * Space reservation values for various transactions. */ #define XFS_ADDAFORK_SPACE_RES(mp) \ - ((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) + ((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) #define XFS_ATTRRM_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) /* This macro is not used - see inline code in xfs_attr_set */ @@ -82,5 +85,8 @@ (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) +#define XFS_IFREE_SPACE_RES(mp) \ + (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) + #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 370eb3e121d1..a65fa5dde6e9 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -124,16 +124,12 @@ struct posix_acl * xfs_get_acl(struct inode *inode, int type) { struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl; + struct posix_acl *acl = NULL; struct xfs_acl *xfs_acl; unsigned char *ea_name; int error; int len; - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - trace_xfs_get_acl(ip); switch (type) { @@ -156,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type) if (!xfs_acl) return ERR_PTR(-ENOMEM); - error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, &len, ATTR_ROOT); if (error) { /* @@ -164,10 +160,8 @@ xfs_get_acl(struct inode *inode, int type) * cache entry, for any other error assume it is transient and * leave the cache entry as ACL_NOT_CACHED. */ - if (error == -ENOATTR) { - acl = NULL; + if (error == -ENOATTR) goto out_update_cache; - } goto out; } @@ -183,15 +177,12 @@ out: } STATIC int -xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); unsigned char *ea_name; int error; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; @@ -219,7 +210,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) len -= sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); - error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, + error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); kmem_free(xfs_acl); @@ -227,7 +218,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) /* * A NULL ACL argument means we want to remove the ACL. */ - error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + error = xfs_attr_remove(ip, ea_name, ATTR_ROOT); /* * If the attribute didn't exist to start with that's fine. @@ -253,7 +244,7 @@ xfs_set_mode(struct inode *inode, umode_t mode) iattr.ia_mode = mode; iattr.ia_ctime = current_fs_time(inode->i_sb); - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } return error; @@ -282,131 +273,23 @@ posix_acl_default_exists(struct inode *inode) return xfs_acl_exists(inode, SGI_ACL_DEFAULT); } -/* - * No need for i_mutex because the inode is not yet exposed to the VFS. - */ int -xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) +xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - umode_t mode = inode->i_mode; - int error = 0, inherit = 0; - - if (S_ISDIR(inode->i_mode)) { - error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto out; - } - - error = posix_acl_create(&acl, GFP_KERNEL, &mode); - if (error < 0) - return error; - - /* - * If posix_acl_create returns a positive value we need to - * inherit a permission that can't be represented using the Unix - * mode bits and we actually need to set an ACL. - */ - if (error > 0) - inherit = 1; - - error = xfs_set_mode(inode, mode); - if (error) - goto out; - - if (inherit) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - -out: - posix_acl_release(acl); - return error; -} - -int -xfs_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); - if (error) - return error; - - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(acl); - return error; -} - -static int -xfs_xattr_acl_get(struct dentry *dentry, const char *name, - void *value, size_t size, int type) -{ - struct posix_acl *acl; - int error; - - acl = xfs_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(&init_user_ns, acl, value, size); - posix_acl_release(acl); - - return error; -} - -static int -xfs_xattr_acl_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl = NULL; int error = 0; - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if (!inode_owner_or_capable(inode)) - return -EPERM; - - if (!value) + if (!acl) goto set_acl; - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; + error = -E2BIG; if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) - goto out_release; + return error; if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { - posix_acl_release(acl); acl = NULL; if (error < 0) @@ -415,27 +298,9 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, error = xfs_set_mode(inode, mode); if (error) - goto out_release; + return error; } set_acl: - error = xfs_set_acl(inode, type, acl); - out_release: - posix_acl_release(acl); - out: - return error; + return __xfs_set_acl(inode, type, acl); } - -const struct xattr_handler xfs_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; - -const struct xattr_handler xfs_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 4016a567b83c..5dc163744511 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -60,20 +60,15 @@ struct xfs_acl { #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); -extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); -extern int xfs_acl_chmod(struct inode *inode); +extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); extern int posix_acl_access_exists(struct inode *inode); extern int posix_acl_default_exists(struct inode *inode); - -extern const struct xattr_handler xfs_xattr_acl_access_handler; -extern const struct xattr_handler xfs_xattr_acl_default_handler; #else static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) { return NULL; } -# define xfs_inherit_acl(inode, default_acl) 0 -# define xfs_acl_chmod(inode) 0 +# define xfs_set_acl NULL # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 71c8c9d2b882..b984647c24db 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -240,7 +240,7 @@ xfs_end_io( done: if (error) - ioend->io_error = -error; + ioend->io_error = error; xfs_destroy_ioend(ioend); } @@ -308,14 +308,14 @@ xfs_map_blocks( int nimaps = 1; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { if (nonblocking) - return -XFS_ERROR(EAGAIN); + return -EAGAIN; xfs_ilock(ip, XFS_ILOCK_SHARED); } @@ -332,14 +332,14 @@ xfs_map_blocks( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) - return -XFS_ERROR(error); + return error; if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return -XFS_ERROR(error); + return error; } #ifdef DEBUG @@ -407,7 +407,7 @@ xfs_alloc_ioend_bio( struct bio *bio = bio_alloc(GFP_NOIO, nvecs); ASSERT(bio->bi_private == NULL); - bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; return bio; } @@ -502,7 +502,7 @@ xfs_submit_ioend( * time. */ if (fail) { - ioend->io_error = -fail; + ioend->io_error = fail; xfs_finish_ioend(ioend); continue; } @@ -632,38 +632,46 @@ xfs_map_at_offset( } /* - * Test if a given page is suitable for writing as part of an unwritten - * or delayed allocate extent. + * Test if a given page contains at least one buffer of a given @type. + * If @check_all_buffers is true, then we walk all the buffers in the page to + * try to find one of the type passed in. If it is not set, then the caller only + * needs to check the first buffer on the page for a match. */ -STATIC int +STATIC bool xfs_check_page_type( struct page *page, - unsigned int type) + unsigned int type, + bool check_all_buffers) { - if (PageWriteback(page)) - return 0; + struct buffer_head *bh; + struct buffer_head *head; - if (page->mapping && page_has_buffers(page)) { - struct buffer_head *bh, *head; - int acceptable = 0; + if (PageWriteback(page)) + return false; + if (!page->mapping) + return false; + if (!page_has_buffers(page)) + return false; - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - acceptable += (type == XFS_IO_UNWRITTEN); - else if (buffer_delay(bh)) - acceptable += (type == XFS_IO_DELALLOC); - else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable += (type == XFS_IO_OVERWRITE); - else - break; - } while ((bh = bh->b_this_page) != head); + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) { + if (type == XFS_IO_UNWRITTEN) + return true; + } else if (buffer_delay(bh)) { + if (type == XFS_IO_DELALLOC) + return true; + } else if (buffer_dirty(bh) && buffer_mapped(bh)) { + if (type == XFS_IO_OVERWRITE) + return true; + } - if (acceptable) - return 1; - } + /* If we are only checking the first buffer, we are done now. */ + if (!check_all_buffers) + break; + } while ((bh = bh->b_this_page) != head); - return 0; + return false; } /* @@ -697,7 +705,7 @@ xfs_convert_page( goto fail_unlock_page; if (page->mapping != inode->i_mapping) goto fail_unlock_page; - if (!xfs_check_page_type(page, (*ioendp)->io_type)) + if (!xfs_check_page_type(page, (*ioendp)->io_type, false)) goto fail_unlock_page; /* @@ -742,6 +750,15 @@ xfs_convert_page( p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; page_dirty = p_offset / len; + /* + * The moment we find a buffer that doesn't match our current type + * specification or can't be written, abort the loop and start + * writeback. As per the above xfs_imap_valid() check, only + * xfs_vm_writepage() can handle partial page writeback fully - we are + * limited here to the buffers that are contiguous with the current + * ioend, and hence a buffer we can't write breaks that contiguity and + * we have to defer the rest of the IO to xfs_vm_writepage(). + */ bh = head = page_buffers(page); do { if (offset >= end_offset) @@ -750,7 +767,7 @@ xfs_convert_page( uptodate = 0; if (!(PageUptodate(page) || buffer_uptodate(bh))) { done = 1; - continue; + break; } if (buffer_unwritten(bh) || buffer_delay(bh) || @@ -762,10 +779,11 @@ xfs_convert_page( else type = XFS_IO_OVERWRITE; - if (!xfs_imap_valid(inode, imap, offset)) { - done = 1; - continue; - } + /* + * imap should always be valid because of the above + * partial page end_offset check on the imap. + */ + ASSERT(xfs_imap_valid(inode, imap, offset)); lock_buffer(bh); if (type != XFS_IO_OVERWRITE) @@ -777,6 +795,7 @@ xfs_convert_page( count++; } else { done = 1; + break; } } while (offset += len, (bh = bh->b_this_page) != head); @@ -868,7 +887,7 @@ xfs_aops_discard_page( struct buffer_head *bh, *head; loff_t offset = page_offset(page); - if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) + if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) goto out_invalidate; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -956,14 +975,39 @@ xfs_vm_writepage( * Given that we do not allow direct reclaim to call us, we should * never be called while in a filesystem transaction. */ - if (WARN_ON(current->flags & PF_FSTRANS)) + if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) goto redirty; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; - if (page->index >= end_index) { + + /* + * The page index is less than the end_index, adjust the end_offset + * to the highest offset that this page should represent. + * ----------------------------------------------------- + * | file mapping | <EOF> | + * ----------------------------------------------------- + * | Page ... | Page N-2 | Page N-1 | Page N | | + * ^--------------------------------^----------|-------- + * | desired writeback range | see else | + * ---------------------------------^------------------| + */ + if (page->index < end_index) + end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT; + else { + /* + * Check whether the page to write out is beyond or straddles + * i_size or not. + * ------------------------------------------------------- + * | file mapping | <EOF> | + * ------------------------------------------------------- + * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | + * ^--------------------------------^-----------|--------- + * | | Straddles | + * ---------------------------------^-----------|--------| + */ unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); /* @@ -971,24 +1015,36 @@ xfs_vm_writepage( * truncate operation that is in progress. We must redirty the * page so that reclaim stops reclaiming it. Otherwise * xfs_vm_releasepage() is called on it and gets confused. + * + * Note that the end_index is unsigned long, it would overflow + * if the given offset is greater than 16TB on 32-bit system + * and if we do check the page is fully outside i_size or not + * via "if (page->index >= end_index + 1)" as "end_index + 1" + * will be evaluated to 0. Hence this page will be redirtied + * and be written out repeatedly which would result in an + * infinite loop, the user program that perform this operation + * will hang. Instead, we can verify this situation by checking + * if the page to write is totally beyond the i_size or if it's + * offset is just equal to the EOF. */ - if (page->index >= end_index + 1 || offset_into_page == 0) + if (page->index > end_index || + (page->index == end_index && offset_into_page == 0)) goto redirty; /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. * "A file is mapped in multiples of the page size. For a file - * that is not a multiple of the page size, the remaining + * that is not a multiple of the page size, the remaining * memory is zeroed when mapped, and writes to that region are * not written out to the file." */ zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); + + /* Adjust the end_offset to the end of file */ + end_offset = offset; } - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - offset); len = 1 << inode->i_blkbits; bh = head = page_buffers(page); @@ -1169,9 +1225,9 @@ xfs_vm_releasepage( xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON(delalloc)) + if (WARN_ON_ONCE(delalloc)) return 0; - if (WARN_ON(unwritten)) + if (WARN_ON_ONCE(unwritten)) return 0; return try_to_free_buffers(page); @@ -1197,7 +1253,7 @@ __xfs_get_blocks( int new = 0; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); @@ -1217,7 +1273,7 @@ __xfs_get_blocks( lockmode = XFS_ILOCK_EXCL; xfs_ilock(ip, lockmode); } else { - lockmode = xfs_ilock_map_shared(ip); + lockmode = xfs_ilock_data_map_shared(ip); } ASSERT(offset <= mp->m_super->s_maxbytes); @@ -1246,7 +1302,7 @@ __xfs_get_blocks( error = xfs_iomap_write_direct(ip, offset, size, &imap, nimaps); if (error) - return -error; + return error; new = 1; } else { /* @@ -1325,6 +1381,14 @@ __xfs_get_blocks( /* * If this is O_DIRECT or the mpage code calling tell them how large * the mapping is, so that we can avoid repeated get_blocks calls. + * + * If the mapping spans EOF, then we have to break the mapping up as the + * mapping for blocks beyond EOF must be marked new so that sub block + * regions can be correctly zeroed. We can't do this for mappings within + * EOF unless the mapping was just allocated or is unwritten, otherwise + * the callers would overwrite existing data with zeros. Hence we have + * to split the mapping into a range up to and including EOF, and a + * second mapping for beyond EOF. */ if (direct || size > (1 << inode->i_blkbits)) { xfs_off_t mapping_size; @@ -1335,6 +1399,12 @@ __xfs_get_blocks( ASSERT(mapping_size > 0); if (mapping_size > size) mapping_size = size; + if (offset < i_size_read(inode) && + offset + mapping_size >= i_size_read(inode)) { + /* limit mapping to block that spans EOF */ + mapping_size = roundup_64(i_size_read(inode) - offset, + 1 << inode->i_blkbits); + } if (mapping_size > LONG_MAX) mapping_size = LONG_MAX; @@ -1345,7 +1415,7 @@ __xfs_get_blocks( out_unlock: xfs_iunlock(ip, lockmode); - return -error; + return error; } int @@ -1416,9 +1486,8 @@ STATIC ssize_t xfs_vm_direct_IO( int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); @@ -1426,7 +1495,7 @@ xfs_vm_direct_IO( ssize_t ret; if (rw & WRITE) { - size_t size = iov_length(iov, nr_segs); + size_t size = iov_iter_count(iter); /* * We cannot preallocate a size update transaction here as we @@ -1438,16 +1507,15 @@ xfs_vm_direct_IO( if (offset + size > XFS_I(inode)->i_d.di_size) ioend->io_isdirect = 1; - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct_write, NULL, 0); + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, + xfs_end_io_direct_write, NULL, + DIO_ASYNC_EXTEND); if (ret != -EIOCBQUEUED && iocb->private) goto out_destroy_ioend; } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, NULL, NULL, 0); } @@ -1546,6 +1614,16 @@ xfs_vm_write_failed( xfs_vm_kill_delalloc_range(inode, block_offset, block_offset + bh->b_size); + + /* + * This buffer does not contain data anymore. make sure anyone + * who finds it knows that for certain. + */ + clear_buffer_delay(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_dirty(bh); } } @@ -1579,12 +1657,21 @@ xfs_vm_write_begin( status = __block_write_begin(page, pos, len, xfs_get_blocks); if (unlikely(status)) { struct inode *inode = mapping->host; + size_t isize = i_size_read(inode); xfs_vm_write_failed(inode, page, pos, len); unlock_page(page); - if (pos + len > i_size_read(inode)) - truncate_pagecache(inode, i_size_read(inode)); + /* + * If the write is beyond EOF, we only want to kill blocks + * allocated in this write, not blocks that were previously + * written successfully. + */ + if (pos + len > isize) { + ssize_t start = max_t(ssize_t, pos, isize); + + truncate_pagecache_range(inode, start, pos + len); + } page_cache_release(page); page = NULL; @@ -1595,9 +1682,12 @@ xfs_vm_write_begin( } /* - * On failure, we only need to kill delalloc blocks beyond EOF because they - * will never be written. For blocks within EOF, generic_write_end() zeros them - * so they are safe to leave alone and be written with all the other valid data. + * On failure, we only need to kill delalloc blocks beyond EOF in the range of + * this specific write because they will never be written. Previous writes + * beyond EOF where block allocation succeeded do not need to be trashed, so + * only new blocks from this write should be trashed. For blocks within + * EOF, generic_write_end() zeros them so they are safe to leave alone and be + * written with all the other valid data. */ STATIC int xfs_vm_write_end( @@ -1620,8 +1710,11 @@ xfs_vm_write_end( loff_t to = pos + len; if (to > isize) { - truncate_pagecache(inode, isize); + /* only kill blocks in this write beyond EOF */ + if (pos > isize) + isize = pos; xfs_vm_kill_delalloc_range(inode, isize, to); + truncate_pagecache_range(inode, isize, to); } } return ret; @@ -1660,11 +1753,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 09480c57f069..aa2a8b1838a2 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -76,7 +76,7 @@ xfs_attr3_leaf_freextent( error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) { - return(error); + return error; } ASSERT(nmap == 1); ASSERT(map.br_startblock != DELAYSTARTBLOCK); @@ -95,21 +95,21 @@ xfs_attr3_leaf_freextent( dp->i_mount->m_ddev_targp, dblkno, dblkcnt, 0); if (!bp) - return ENOMEM; + return -ENOMEM; xfs_trans_binval(*trans, bp); /* * Roll to next transaction. */ error = xfs_trans_roll(trans, dp); if (error) - return (error); + return error; } tblkno += map.br_blockcount; tblkcnt -= map.br_blockcount; } - return(0); + return 0; } /* @@ -227,7 +227,7 @@ xfs_attr3_node_inactive( */ if (level > XFS_DA_NODE_MAXDEPTH) { xfs_trans_brelse(*trans, bp); /* no locks for later trans */ - return XFS_ERROR(EIO); + return -EIO; } node = bp->b_addr; @@ -256,7 +256,7 @@ xfs_attr3_node_inactive( error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp, XFS_ATTR_FORK); if (error) - return(error); + return error; if (child_bp) { /* save for re-read later */ child_blkno = XFS_BUF_ADDR(child_bp); @@ -277,7 +277,7 @@ xfs_attr3_node_inactive( child_bp); break; default: - error = XFS_ERROR(EIO); + error = -EIO; xfs_trans_brelse(*trans, child_bp); break; } @@ -360,7 +360,7 @@ xfs_attr3_root_inactive( error = xfs_attr3_leaf_inactive(trans, dp, bp); break; default: - error = XFS_ERROR(EIO); + error = -EIO; xfs_trans_brelse(*trans, bp); break; } @@ -414,7 +414,7 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); if (error) { xfs_trans_cancel(trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -443,10 +443,10 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; } diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 2d174b128153..62db83ab6cbc 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -50,11 +50,11 @@ xfs_attr_shortform_compare(const void *a, const void *b) sa = (xfs_attr_sf_sort_t *)a; sb = (xfs_attr_sf_sort_t *)b; if (sa->hash < sb->hash) { - return(-1); + return -1; } else if (sa->hash > sb->hash) { - return(1); + return 1; } else { - return(sa->entno - sb->entno); + return sa->entno - sb->entno; } } @@ -86,7 +86,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; ASSERT(sf != NULL); if (!sf->hdr.count) - return(0); + return 0; cursor = context->cursor; ASSERT(cursor != NULL); @@ -124,7 +124,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } trace_xfs_attr_list_sf_all(context); - return(0); + return 0; } /* do no more for a search callback */ @@ -150,7 +150,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); kmem_free(sbuf); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } sbp->entno = i; @@ -188,7 +188,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } if (i == nsbuf) { kmem_free(sbuf); - return(0); + return 0; } /* @@ -213,7 +213,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } kmem_free(sbuf); - return(0); + return 0; } STATIC int @@ -243,8 +243,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (cursor->blkno > 0) { error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); - if ((error != 0) && (error != EFSCORRUPTED)) - return(error); + if ((error != 0) && (error != -EFSCORRUPTED)) + return error; if (bp) { struct xfs_attr_leaf_entry *entries; @@ -295,7 +295,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) - return(error); + return error; node = bp->b_addr; magic = be16_to_cpu(node->hdr.info.magic); if (magic == XFS_ATTR_LEAF_MAGIC || @@ -308,7 +308,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) context->dp->i_mount, node); xfs_trans_brelse(NULL, bp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } dp->d_ops->node_hdr_from_disk(&nodehdr, node); @@ -444,9 +444,11 @@ xfs_attr3_leaf_list_int( xfs_da_args_t args; memset((char *)&args, 0, sizeof(args)); + args.geo = context->dp->i_mount->m_attr_geo; args.dp = context->dp; args.whichfork = XFS_ATTR_FORK; args.valuelen = valuelen; + args.rmtvaluelen = valuelen; args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); args.rmtblkno = be32_to_cpu(name_rmt->valueblk); args.rmtblkcnt = xfs_attr3_rmt_blocks( @@ -494,11 +496,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) context->cursor->blkno = 0; error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp); if (error) - return XFS_ERROR(error); + return error; error = xfs_attr3_leaf_list_int(bp, context); xfs_trans_brelse(NULL, bp); - return XFS_ERROR(error); + return error; } int @@ -507,17 +509,17 @@ xfs_attr_list_int( { int error; xfs_inode_t *dp = context->dp; + uint lock_mode; XFS_STATS_INC(xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; - - xfs_ilock(dp, XFS_ILOCK_SHARED); + return -EIO; /* * Decide on what work routines to call based on the inode size. */ + lock_mode = xfs_ilock_attr_map_shared(dp); if (!xfs_inode_hasattr(dp)) { error = 0; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { @@ -527,9 +529,7 @@ xfs_attr_list_int( } else { error = xfs_attr_node_list(context); } - - xfs_iunlock(dp, XFS_ILOCK_SHARED); - + xfs_iunlock(dp, lock_mode); return error; } @@ -616,16 +616,16 @@ xfs_attr_list( * Validate the cursor. */ if (cursor->pad1 || cursor->pad2) - return(XFS_ERROR(EINVAL)); + return -EINVAL; if ((cursor->initted == 0) && (cursor->hashval || cursor->blkno || cursor->offset)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Check for a properly aligned buffer. */ if (((long)buffer) & (sizeof(int)-1)) - return XFS_ERROR(EFAULT); + return -EFAULT; if (flags & ATTR_KERNOVAL) bufsize = 0; @@ -648,6 +648,6 @@ xfs_attr_list( alist->al_offset[0] = context.bufsize; error = xfs_attr_list_int(&context); - ASSERT(error >= 0); + ASSERT(error <= 0); return error; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c0323..1707980f9a4b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -133,7 +133,7 @@ xfs_bmap_finish( mp = ntp->t_mountp; if (!XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, - (error == EFSCORRUPTED) ? + (error == -EFSCORRUPTED) ? SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR); return error; @@ -249,48 +249,6 @@ xfs_bmap_rtalloc( } /* - * Stack switching interfaces for allocation - */ -static void -xfs_bmapi_allocate_worker( - struct work_struct *work) -{ - struct xfs_bmalloca *args = container_of(work, - struct xfs_bmalloca, work); - unsigned long pflags; - - /* we are in a transaction context here */ - current_set_flags_nested(&pflags, PF_FSTRANS); - - args->result = __xfs_bmapi_allocate(args); - complete(args->done); - - current_restore_flags_nested(&pflags, PF_FSTRANS); -} - -/* - * Some allocation requests often come in with little stack to work on. Push - * them off to a worker thread so there is lots of stack to use. Otherwise just - * call directly to avoid the context switch overhead here. - */ -int -xfs_bmapi_allocate( - struct xfs_bmalloca *args) -{ - DECLARE_COMPLETION_ONSTACK(done); - - if (!args->stack_switch) - return __xfs_bmapi_allocate(args); - - - args->done = &done; - INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); - queue_work(xfs_alloc_wq, &args->work); - wait_for_completion(&done); - return args->result; -} - -/* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary. All offsets are considered outside * the end of file for an empty fork, so 1 is returned in *eof in that case. @@ -407,7 +365,7 @@ xfs_bmap_count_tree( xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } xfs_trans_brelse(tp, bp); } else { @@ -467,14 +425,14 @@ xfs_bmap_count_blocks( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; @@ -566,13 +524,13 @@ xfs_getbmap( if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); + return -EINVAL; } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } prealloced = 0; @@ -581,7 +539,7 @@ xfs_getbmap( if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); + return -EINVAL; if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ @@ -601,38 +559,43 @@ xfs_getbmap( bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { - return XFS_ERROR(EINVAL); + return -EINVAL; } nex = bmv->bmv_count - 1; if (nex <= 0) - return XFS_ERROR(EINVAL); + return -EINVAL; bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); if (!out) - return XFS_ERROR(ENOMEM); + return -ENOMEM; xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { - if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { - error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (whichfork == XFS_DATA_FORK) { + if (!(iflags & BMV_IF_DELALLOC) && + (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; + + /* + * Even after flushing the inode, there can still be + * delalloc blocks on the inode beyond EOF due to + * speculative preallocation. These are not removed + * until the release function is called or the inode + * is inactivated. Hence we cannot assert here that + * ip->i_delayed_blks == 0. + */ } - /* - * even after flushing the inode, there can still be delalloc - * blocks on the inode beyond EOF due to speculative - * preallocation. These are not removed until the release - * function is called or the inode is inactivated. Hence we - * cannot assert here that ip->i_delayed_blks == 0. - */ - } - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); + } else { + lock = xfs_ilock_attr_map_shared(ip); + } /* * Don't let nex be bigger than the number of extents @@ -648,7 +611,7 @@ xfs_getbmap( /* * Allocate enough space to handle "subnex" maps at a time. */ - error = ENOMEM; + error = -ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) @@ -737,7 +700,7 @@ xfs_getbmap( out_free_map: kmem_free(map); out_unlock_ilock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -846,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) * have speculative prealloc/delalloc blocks to remove. */ if (VFS_I(ip)->i_size == 0 && - VN_CACHED(VFS_I(ip)) == 0 && + VFS_I(ip)->i_mapping->nrpages == 0 && ip->i_delayed_blks == 0) return false; @@ -919,7 +882,7 @@ xfs_free_eofblocks( if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); - return EAGAIN; + return -EAGAIN; } } @@ -992,14 +955,14 @@ xfs_alloc_file_space( trace_xfs_alloc_file_space(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(ip, 0); if (error) return error; if (len <= 0) - return XFS_ERROR(EINVAL); + return -EINVAL; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); @@ -1065,7 +1028,7 @@ xfs_alloc_file_space( /* * Free the transaction structure. */ - ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } @@ -1102,7 +1065,7 @@ xfs_alloc_file_space( allocated_fsb = imapp->br_blockcount; if (nimaps == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; break; } @@ -1163,14 +1126,20 @@ xfs_zero_remaining_bytes( mp->m_rtdev_targp : mp->m_ddev_targp, BTOBB(mp->m_sb.sb_blocksize), 0); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; xfs_buf_unlock(bp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { + uint lock_mode; + offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; + + lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); + xfs_iunlock(ip, lock_mode); + if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -1187,7 +1156,12 @@ xfs_zero_remaining_bytes( XFS_BUF_UNWRITE(bp); XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); - xfsbdstrat(mp, bp); + + if (XFS_FORCED_SHUTDOWN(mp)) { + error = -EIO; + break; + } + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, @@ -1200,7 +1174,12 @@ xfs_zero_remaining_bytes( XFS_BUF_UNDONE(bp); XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); - xfsbdstrat(mp, bp); + + if (XFS_FORCED_SHUTDOWN(mp)) { + error = -EIO; + break; + } + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, @@ -1255,7 +1234,7 @@ xfs_free_file_space( rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); - error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) goto out; @@ -1327,7 +1306,6 @@ xfs_free_file_space( * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); /* @@ -1337,7 +1315,7 @@ xfs_free_file_space( /* * Free the transaction structure. */ - ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } @@ -1397,6 +1375,8 @@ xfs_zero_file_space( xfs_off_t end_boundary; int error; + trace_xfs_zero_file_space(ip); + granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); /* @@ -1411,9 +1391,18 @@ xfs_zero_file_space( ASSERT(end_boundary <= offset + len); if (start_boundary < end_boundary - 1) { - /* punch out the page cache over the conversion range */ + /* + * punch out delayed allocation blocks and the page cache over + * the conversion range + */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, start_boundary), + XFS_B_TO_FSB(mp, end_boundary - start_boundary)); + xfs_iunlock(ip, XFS_ILOCK_EXCL); truncate_pagecache_range(VFS_I(ip), start_boundary, end_boundary - 1); + /* convert the blocks */ error = xfs_alloc_file_space(ip, start_boundary, end_boundary - start_boundary - 1, @@ -1446,6 +1435,120 @@ out: } /* + * xfs_collapse_file_space() + * This routine frees disk space and shift extent for the given file. + * The first thing we do is to free data blocks in the specified range + * by calling xfs_free_file_space(). It would also sync dirty data + * and invalidate page cache over the region on which collapse range + * is working. And Shift extent records to the left to cover a hole. + * RETURNS: + * 0 on success + * errno on error + * + */ +int +xfs_collapse_file_space( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t len) +{ + int done = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + xfs_extnum_t current_ext = 0; + struct xfs_bmap_free free_list; + xfs_fsblock_t first_block; + int committed; + xfs_fileoff_t start_fsb; + xfs_fileoff_t shift_fsb; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + trace_xfs_collapse_file_space(ip); + + start_fsb = XFS_B_TO_FSB(mp, offset + len); + shift_fsb = XFS_B_TO_FSB(mp, len); + + /* + * Writeback the entire file and force remove any post-eof blocks. The + * writeback prevents changes to the extent list via concurrent + * writeback and the eofblocks trim prevents the extent shift algorithm + * from running into a post-eof delalloc extent. + * + * XXX: This is a temporary fix until the extent shift loop below is + * converted to use offsets and lookups within the ILOCK rather than + * carrying around the index into the extent list for the next + * iteration. + */ + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + return error; + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + + error = xfs_free_file_space(ip, offset, len); + if (error) + return error; + + while (!error && !done) { + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + /* + * We would need to reserve permanent block for transaction. + * This will come into picture when after shifting extent into + * hole we found that adjacent extents can be merged which + * may lead to freeing of a block during record update. + */ + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); + if (error) { + xfs_trans_cancel(tp, 0); + break; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, + ip->i_gdquot, ip->i_pdquot, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out; + + xfs_trans_ijoin(tp, ip, 0); + + xfs_bmap_init(&free_list, &first_block); + + /* + * We are using the write transaction in which max 2 bmbt + * updates are allowed + */ + error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, + shift_fsb, ¤t_ext, + &first_block, &free_list, + XFS_BMAP_MAX_SHIFT_EXTENTS); + if (error) + goto out; + + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto out; + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + + return error; + +out: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* * We need to check that the format of the data fork in the temporary inode is * valid for the target inode before doing the swap. This is not a problem with * attr1 because of the fixed fork offset, but attr2 has a dynamically sized @@ -1474,14 +1577,14 @@ xfs_swap_extents_check_format( /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) - return EINVAL; + return -EINVAL; /* * if the target inode has less extents that then temporary inode then * why did userspace call us? */ if (ip->i_d.di_nextents < tip->i_d.di_nextents) - return EINVAL; + return -EINVAL; /* * if the target inode is in extent form and the temp inode is in btree @@ -1490,19 +1593,19 @@ xfs_swap_extents_check_format( */ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - return EINVAL; + return -EINVAL; /* Check temp in extent form to max in target */ if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; /* Check target in extent form to max in temp */ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; /* * If we are in a btree format, check that the temp root block will fit @@ -1516,26 +1619,50 @@ xfs_swap_extents_check_format( if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(ip) && XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) - return EINVAL; + return -EINVAL; if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; } /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(tip) && XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) - return EINVAL; + return -EINVAL; if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; } return 0; } int +xfs_swap_extent_flush( + struct xfs_inode *ip) +{ + int error; + + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + return error; + truncate_pagecache_range(VFS_I(ip), 0, -1); + + /* Verify O_DIRECT for ftmp */ + if (VFS_I(ip)->i_mapping->nrpages) + return -EINVAL; + + /* + * Don't try to swap extents on mmap()d files because we can't lock + * out races against page faults safely. + */ + if (mapping_mapped(VFS_I(ip)->i_mapping)) + return -EBUSY; + return 0; +} + +int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ @@ -1550,51 +1677,57 @@ xfs_swap_extents( int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; + int lock_flags; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { - error = XFS_ERROR(ENOMEM); + error = -ENOMEM; goto out; } /* - * we have to do two separate lock calls here to keep lockdep - * happy. If we try to get all the locks in one call, lock will - * report false positives when we drop the ILOCK and regain them - * below. + * Lock up the inodes against other IO and truncate to begin with. + * Then we can ensure the inodes are flushed and have no page cache + * safely. Once we have done this we can take the ilocks and do the rest + * of the checks. */ + lock_flags = XFS_IOLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } - error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); + error = xfs_swap_extent_flush(ip); + if (error) + goto out_unlock; + error = xfs_swap_extent_flush(tip); if (error) goto out_unlock; - truncate_pagecache_range(VFS_I(tip), 0, -1); - /* Verify O_DIRECT for ftmp */ - if (VN_CACHED(VFS_I(tip)) != 0) { - error = XFS_ERROR(EINVAL); + tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); goto out_unlock; } + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); + lock_flags |= XFS_ILOCK_EXCL; /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { - error = XFS_ERROR(EFAULT); - goto out_unlock; + error = -EFAULT; + goto out_trans_cancel; } trace_xfs_swap_extent_before(ip, 0); @@ -1606,7 +1739,7 @@ xfs_swap_extents( xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); - goto out_unlock; + goto out_trans_cancel; } /* @@ -1620,43 +1753,9 @@ xfs_swap_extents( (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { - error = XFS_ERROR(EBUSY); - goto out_unlock; + error = -EBUSY; + goto out_trans_cancel; } - - /* We need to fail if the file is memory mapped. Once we have tossed - * all existing pages, the page fault will have no option - * but to go to the filesystem for pages. By making the page fault call - * vop_read (or write in the case of autogrow) they block on the iolock - * until we have switched the extents. - */ - if (VN_MAPPED(VFS_I(ip))) { - error = XFS_ERROR(EBUSY); - goto out_unlock; - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL); - - /* - * There is a race condition here since we gave up the - * ilock. However, the data fork will not change since - * we have the iolock (locked for truncation too) so we - * are safe. We don't really care if non-io related - * fields change. - */ - truncate_pagecache_range(VFS_I(ip), 0, -1); - - tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_IOLOCK_EXCL); - xfs_trans_cancel(tp, 0); - goto out; - } - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); - /* * Count the number of extended attribute blocks */ @@ -1674,8 +1773,8 @@ xfs_swap_extents( goto out_trans_cancel; } - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ijoin(tp, tip, lock_flags); /* * Before we've swapped the forks, lets set the owners of the forks @@ -1804,8 +1903,8 @@ out: return error; out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_iunlock(ip, lock_flags); + xfs_iunlock(tip, lock_flags); goto out; out_trans_cancel: diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 900747b25772..2fdb72d2c908 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,12 +50,11 @@ struct xfs_bmalloca { xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ - char eof; /* set if allocating past last extent */ - char wasdel; /* replacing a delayed allocation */ - char userdata;/* set if is user data */ - char aeof; /* allocated space at eof */ - char conv; /* overwriting unwritten extents */ - char stack_switch; + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ int flags; struct completion *done; struct work_struct work; @@ -65,8 +64,6 @@ struct xfs_bmalloca { int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, int *committed); int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); -int xfs_bmapi_allocate(struct xfs_bmalloca *args); -int __xfs_bmapi_allocate(struct xfs_bmalloca *args); int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, @@ -99,6 +96,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); +int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, + xfs_off_t len); /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77dcb00..cd7b8ca9b064 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -130,7 +130,7 @@ xfs_buf_get_maps( bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), KM_NOFS); if (!bp->b_maps) - return ENOMEM; + return -ENOMEM; return 0; } @@ -216,8 +216,7 @@ _xfs_buf_alloc( STATIC int _xfs_buf_get_pages( xfs_buf_t *bp, - int page_count, - xfs_buf_flags_t flags) + int page_count) { /* Make sure that we have a page list */ if (bp->b_pages == NULL) { @@ -330,7 +329,7 @@ use_alloc_page: end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count = end - start; - error = _xfs_buf_get_pages(bp, page_count, flags); + error = _xfs_buf_get_pages(bp, page_count); if (unlikely(error)) return error; @@ -345,7 +344,7 @@ retry: if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; - error = ENOMEM; + error = -ENOMEM; goto out_free_pages; } @@ -396,7 +395,17 @@ _xfs_buf_map_pages( bp->b_addr = NULL; } else { int retried = 0; + unsigned noio_flag; + /* + * vm_map_ram() will allocate auxillary structures (e.g. + * pagetables) with GFP_KERNEL, yet we are likely to be under + * GFP_NOFS context here. Hence we need to tell memory reclaim + * that we are in such a context via PF_MEMALLOC_NOIO to prevent + * memory reclaim re-entering the filesystem here and + * potentially deadlocking. + */ + noio_flag = memalloc_noio_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, -1, PAGE_KERNEL); @@ -404,6 +413,7 @@ _xfs_buf_map_pages( break; vm_unmap_aliases(); } while (retried++ <= 1); + memalloc_noio_restore(noio_flag); if (!bp->b_addr) return -ENOMEM; @@ -445,8 +455,8 @@ _xfs_buf_find( numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(numbytes < (1 << btp->bt_sshift))); - ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); + ASSERT(!(numbytes < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask)); /* * Corrupted block numbers can get through to here, unfortunately, so we @@ -455,7 +465,7 @@ _xfs_buf_find( eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); if (blkno >= eofs) { /* - * XXX (dgc): we should really be returning EFSCORRUPTED here, + * XXX (dgc): we should really be returning -EFSCORRUPTED here, * but none of the higher level infrastructure supports * returning a specific error on buffer lookup failures. */ @@ -698,7 +708,11 @@ xfs_buf_read_uncached( bp->b_flags |= XBF_READ; bp->b_ops = ops; - xfsbdstrat(target->bt_mount, bp); + if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { + xfs_buf_relse(bp); + return NULL; + } + xfs_buf_iorequest(bp); xfs_buf_iowait(bp); return bp; } @@ -763,7 +777,7 @@ xfs_buf_associate_memory( bp->b_pages = NULL; bp->b_addr = mem; - rval = _xfs_buf_get_pages(bp, page_count, 0); + rval = _xfs_buf_get_pages(bp, page_count); if (rval) return rval; @@ -796,7 +810,7 @@ xfs_buf_get_uncached( goto fail; page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; - error = _xfs_buf_get_pages(bp, page_count, 0); + error = _xfs_buf_get_pages(bp, page_count); if (error) goto fail_free_buf; @@ -1038,8 +1052,8 @@ xfs_buf_ioerror( xfs_buf_t *bp, int error) { - ASSERT(error >= 0 && error <= 0xffff); - bp->b_error = (unsigned short)error; + ASSERT(error <= 0 && error >= -1000); + bp->b_error = error; trace_xfs_buf_ioerror(bp, error, _RET_IP_); } @@ -1050,7 +1064,7 @@ xfs_buf_ioerror_alert( { xfs_alert(bp->b_target->bt_mount, "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", - (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); + (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); } /* @@ -1069,7 +1083,7 @@ xfs_bioerror( /* * No need to wait until the buffer is unpinned, we aren't flushing it. */ - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); /* * We're calling xfs_buf_ioend, so delete XBF_DONE flag. @@ -1080,7 +1094,7 @@ xfs_bioerror( xfs_buf_ioend(bp, 0); - return EIO; + return -EIO; } /* @@ -1089,7 +1103,7 @@ xfs_bioerror( * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ -STATIC int +int xfs_bioerror_relse( struct xfs_buf *bp) { @@ -1113,13 +1127,13 @@ xfs_bioerror_relse( * There's no reason to mark error for * ASYNC buffers. */ - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); complete(&bp->b_iowait); } else { xfs_buf_relse(bp); } - return EIO; + return -EIO; } STATIC int @@ -1152,7 +1166,7 @@ xfs_bwrite( ASSERT(xfs_buf_islocked(bp)); bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); xfs_bdstrat_cb(bp); @@ -1164,25 +1178,6 @@ xfs_bwrite( return error; } -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); - return; - } - - xfs_buf_iorequest(bp); -} - STATIC void _xfs_buf_ioend( xfs_buf_t *bp, @@ -1204,7 +1199,7 @@ xfs_buf_bio_end_io( * buffers that require multiple bios to complete. */ if (!bp->b_error) - xfs_buf_ioerror(bp, -error); + xfs_buf_ioerror(bp, error); if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); @@ -1255,7 +1250,7 @@ next_chunk: bio = bio_alloc(GFP_NOIO, nr_pages); bio->bi_bdev = bp->b_target->bt_bdev; - bio->bi_sector = sector; + bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; @@ -1277,7 +1272,7 @@ next_chunk: total_nr_pages--; } - if (likely(bio->bi_size)) { + if (likely(bio->bi_iter.bi_size)) { if (xfs_buf_is_vmapped(bp)) { flush_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); @@ -1291,7 +1286,7 @@ next_chunk: * because the caller (xfs_buf_iorequest) holds a count itself. */ atomic_dec(&bp->b_io_remaining); - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); bio_put(bio); } @@ -1335,6 +1330,20 @@ _xfs_buf_ioapply( SHUTDOWN_CORRUPT_INCORE); return; } + } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* + * non-crc filesystems don't attach verifiers during + * log recovery, so don't warn for such filesystems. + */ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_warn(mp, + "%s: no ops on block 0x%llx/0x%x", + __func__, bp->b_bn, bp->b_length); + xfs_hex_dump(bp->b_addr, 64); + dump_stack(); + } } } else if (bp->b_flags & XBF_READ_AHEAD) { rw = READA; @@ -1376,21 +1385,29 @@ xfs_buf_iorequest( xfs_buf_wait_unpin(bp); xfs_buf_hold(bp); - /* Set the count to 1 initially, this will stop an I/O + /* + * Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started * all the I/O from calling xfs_buf_ioend too early. */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 1); + /* + * If _xfs_buf_ioapply failed, we'll get back here with + * only the reference we took above. _xfs_buf_ioend will + * drop it to zero, so we'd better not queue it for later, + * or we'll free it before it's done. + */ + _xfs_buf_ioend(bp, bp->b_error ? 0 : 1); xfs_buf_rele(bp); } /* * Waits for I/O to complete on the buffer supplied. It returns immediately if - * no I/O is pending or there is already a pending error on the buffer. It - * returns the I/O error code, if any, or 0 if there was no error. + * no I/O is pending or there is already a pending error on the buffer, in which + * case nothing will ever complete. It returns the I/O error code, if any, or + * 0 if there was no error. */ int xfs_buf_iowait( @@ -1516,6 +1533,12 @@ xfs_wait_buftarg( struct xfs_buf *bp; bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); + if (bp->b_flags & XBF_WRITE_FAIL) { + xfs_alert(btp->bt_mount, +"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" +"Please run xfs_repair to determine the extent of the problem.", + (long long)bp->b_bn); + } xfs_buf_rele(bp); } if (loop++ != 0) @@ -1602,16 +1625,14 @@ xfs_free_buftarg( kmem_free(btp); } -STATIC int -xfs_setsize_buftarg_flags( +int +xfs_setsize_buftarg( xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize, - int verbose) + unsigned int sectorsize) { - btp->bt_bsize = blocksize; - btp->bt_sshift = ffs(sectorsize) - 1; - btp->bt_smask = sectorsize - 1; + /* Set up metadata sector size info */ + btp->bt_meta_sectorsize = sectorsize; + btp->bt_meta_sectormask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { char name[BDEVNAME_SIZE]; @@ -1621,41 +1642,33 @@ xfs_setsize_buftarg_flags( xfs_warn(btp->bt_mount, "Cannot set_blocksize to %u on device %s", sectorsize, name); - return EINVAL; + return -EINVAL; } + /* Set up device logical sector size mask */ + btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev); + btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1; + return 0; } /* - * When allocating the initial buffer target we have not yet - * read in the superblock, so don't know what sized sectors - * are being used at this early stage. Play safe. + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used at this early stage. Play safe. */ STATIC int xfs_setsize_buftarg_early( xfs_buftarg_t *btp, struct block_device *bdev) { - return xfs_setsize_buftarg_flags(btp, - PAGE_SIZE, bdev_logical_block_size(bdev), 0); -} - -int -xfs_setsize_buftarg( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize) -{ - return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); + return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } xfs_buftarg_t * xfs_alloc_buftarg( struct xfs_mount *mp, - struct block_device *bdev, - int external, - const char *fsname) + struct block_device *bdev) { xfs_buftarg_t *btp; @@ -1799,7 +1812,7 @@ __xfs_buf_delwri_submit( blk_start_plug(&plug); list_for_each_entry_safe(bp, n, io_list, b_list) { - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); + bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); bp->b_flags |= XBF_WRITE; if (!wait) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e65683361017..c753183900b3 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -45,6 +45,7 @@ typedef enum { #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ /* I/O hints for the BIO layer */ #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ @@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ + { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ @@ -80,19 +82,34 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_DELWRI_Q, "DELWRI_Q" }, \ { _XBF_COMPOUND, "COMPOUND" } + /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +/* + * The xfs_buftarg contains 2 notions of "sector size" - + * + * 1) The metadata sector size, which is the minimum unit and + * alignment of IO which will be performed by metadata operations. + * 2) The device logical sector size + * + * The first is specified at mkfs time, and is stored on-disk in the + * superblock's sb_sectsize. + * + * The latter is derived from the underlying device, and controls direct IO + * alignment constraints. + */ typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; - unsigned int bt_bsize; - unsigned int bt_sshift; - size_t bt_smask; + unsigned int bt_meta_sectorsize; + size_t bt_meta_sectormask; + size_t bt_logical_sectorsize; + size_t bt_logical_sectormask; /* LRU control structures */ struct shrinker bt_shrinker; @@ -161,7 +178,7 @@ typedef struct xfs_buf { atomic_t b_io_remaining; /* #outstanding I/O requests */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ - unsigned short b_error; /* error code on I/O */ + int b_error; /* error code on I/O */ const struct xfs_buf_ops *b_ops; #ifdef XFS_BUF_LOCK_TRACKING @@ -269,9 +286,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); - -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); - extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); @@ -282,10 +296,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, #define xfs_buf_zero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ - return bp ? bp->b_error : ENOMEM; -} +extern int xfs_bioerror_relse(struct xfs_buf *); /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); @@ -301,7 +312,8 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_ZEROFLAGS(bp) \ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) + XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \ + XBF_WRITE_FAIL)) void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) @@ -352,14 +364,28 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) xfs_buf_rele(bp); } +static inline int +xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} + +static inline void +xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} + /* * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *, int, const char *); + struct block_device *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); -extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67ba25d3..76007deed31f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -182,21 +182,47 @@ xfs_buf_item_size( trace_xfs_buf_item_size(bip); } -static struct xfs_log_iovec * +static inline void +xfs_buf_item_copy_iovec( + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp, + struct xfs_buf *bp, + uint offset, + int first_bit, + uint nbits) +{ + offset += first_bit * XFS_BLF_CHUNK; + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, + xfs_buf_offset(bp, offset), + nbits * XFS_BLF_CHUNK); +} + +static inline bool +xfs_buf_item_straddle( + struct xfs_buf *bp, + uint offset, + int next_bit, + int last_bit) +{ + return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) != + (xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) + + XFS_BLF_CHUNK); +} + +static void xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, - struct xfs_log_iovec *vecp, + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp, uint offset, struct xfs_buf_log_format *blfp) { struct xfs_buf *bp = bip->bli_buf; uint base_size; - uint nvecs; int first_bit; int last_bit; int next_bit; uint nbits; - uint buffer_offset; /* copy the flags across from the base format item */ blfp->blf_flags = bip->__bli_format.blf_flags; @@ -208,21 +234,17 @@ xfs_buf_item_format_segment( */ base_size = xfs_buf_log_format_size(blfp); - nvecs = 0; first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { /* * If the map is not be dirty in the transaction, mark * the size as zero and do not advance the vector pointer. */ - goto out; + return; } - vecp->i_addr = blfp; - vecp->i_len = base_size; - vecp->i_type = XLOG_REG_TYPE_BFORMAT; - vecp++; - nvecs = 1; + blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); + blfp->blf_size = 1; if (bip->bli_flags & XFS_BLI_STALE) { /* @@ -232,14 +254,13 @@ xfs_buf_item_format_segment( */ trace_xfs_buf_item_format_stale(bip); ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); - goto out; + return; } /* * Fill in an iovec for each set of contiguous chunks. */ - last_bit = first_bit; nbits = 1; for (;;) { @@ -252,42 +273,22 @@ xfs_buf_item_format_segment( next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, (uint)last_bit + 1); /* - * If we run out of bits fill in the last iovec and get - * out of the loop. - * Else if we start a new set of bits then fill in the - * iovec for the series we were looking at and start - * counting the bits in the new one. - * Else we're still in the same set of bits so just - * keep counting and scanning. + * If we run out of bits fill in the last iovec and get out of + * the loop. Else if we start a new set of bits then fill in + * the iovec for the series we were looking at and start + * counting the bits in the new one. Else we're still in the + * same set of bits so just keep counting and scanning. */ if (next_bit == -1) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; - nvecs++; + xfs_buf_item_copy_iovec(lv, vecp, bp, offset, + first_bit, nbits); + blfp->blf_size++; break; - } else if (next_bit != last_bit + 1) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; - nvecs++; - vecp++; - first_bit = next_bit; - last_bit = next_bit; - nbits = 1; - } else if (xfs_buf_offset(bp, offset + - (next_bit << XFS_BLF_SHIFT)) != - (xfs_buf_offset(bp, offset + - (last_bit << XFS_BLF_SHIFT)) + - XFS_BLF_CHUNK)) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; - nvecs++; - vecp++; + } else if (next_bit != last_bit + 1 || + xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { + xfs_buf_item_copy_iovec(lv, vecp, bp, offset, + first_bit, nbits); + blfp->blf_size++; first_bit = next_bit; last_bit = next_bit; nbits = 1; @@ -296,9 +297,6 @@ xfs_buf_item_format_segment( nbits++; } } -out: - blfp->blf_size = nvecs; - return vecp; } /* @@ -310,10 +308,11 @@ out: STATIC void xfs_buf_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_iovec *vecp = NULL; uint offset = 0; int i; @@ -354,8 +353,8 @@ xfs_buf_item_format( } for (i = 0; i < bip->bli_format_count; i++) { - vecp = xfs_buf_item_format_segment(bip, vecp, offset, - &bip->bli_formats[i]); + xfs_buf_item_format_segment(bip, lv, &vecp, offset, + &bip->bli_formats[i]); offset += bp->b_maps[i].bm_len; } @@ -489,13 +488,21 @@ xfs_buf_item_unpin( xfs_buf_lock(bp); xfs_buf_hold(bp); bp->b_flags |= XBF_ASYNC; - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); } } +/* + * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 + * seconds so as to not spam logs too much on repeated detection of the same + * buffer being bad.. + */ + +DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); + STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, @@ -524,6 +531,14 @@ xfs_buf_item_push( trace_xfs_buf_item_push(bip); + /* has a previous flush failed due to IO errors? */ + if ((bp->b_flags & XBF_WRITE_FAIL) && + ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { + xfs_warn(bp->b_target->bt_mount, +"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", + (long long)bp->b_bn); + } + if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); @@ -710,7 +725,7 @@ xfs_buf_item_get_format( bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), KM_SLEEP); if (!bip->bli_formats) - return ENOMEM; + return -ENOMEM; return 0; } @@ -781,20 +796,6 @@ xfs_buf_item_init( bip->bli_formats[i].blf_map_size = map_size; } -#ifdef XFS_TRANS_DEBUG - /* - * Allocate the arrays for tracking what needs to be logged - * and what our callers request to be logged. bli_orig - * holds a copy of the original, clean buffer for comparison - * against, and bli_logged keeps a 1 bit flag per byte in - * the buffer to indicate which bytes the callers have asked - * to have logged. - */ - bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); - memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); - bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); -#endif - /* * Put the buf item into the list of items attached to the * buffer at the front. @@ -811,7 +812,6 @@ xfs_buf_item_init( */ static void xfs_buf_item_log_segment( - struct xfs_buf_log_item *bip, uint first, uint last, uint *map) @@ -919,7 +919,7 @@ xfs_buf_item_log( if (end > last) end = last; - xfs_buf_item_log_segment(bip, first, end, + xfs_buf_item_log_segment(first, end, &bip->bli_formats[i].blf_data_map[0]); start += bp->b_maps[i].bm_len; @@ -942,11 +942,6 @@ STATIC void xfs_buf_item_free( xfs_buf_log_item_t *bip) { -#ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig); - kmem_free(bip->bli_logged); -#endif /* XFS_TRANS_DEBUG */ - xfs_buf_item_free_format(bip); kmem_zone_free(xfs_buf_item_zone, bip); } @@ -1057,7 +1052,7 @@ xfs_buf_iodone_callbacks( static ulong lasttime; static xfs_buftarg_t *lasttarg; - if (likely(!xfs_buf_geterror(bp))) + if (likely(!bp->b_error)) goto do_callbacks; /* @@ -1096,8 +1091,9 @@ xfs_buf_iodone_callbacks( xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ - if (!XFS_BUF_ISSTALE(bp)) { - bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; + if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { + bp->b_flags |= XBF_WRITE | XBF_ASYNC | + XBF_DONE | XBF_WRITE_FAIL; xfs_buf_iorequest(bp); } else { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index c4e50c6ed584..f1b69edcdf31 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -76,27 +76,26 @@ const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { STATIC int xfs_dir2_sf_getdents( - xfs_inode_t *dp, /* incore directory inode */ + struct xfs_da_args *args, struct dir_context *ctx) { int i; /* shortform entry number */ - xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_dataptr_t dot_offset; xfs_dir2_dataptr_t dotdot_offset; xfs_ino_t ino; - - mp = dp->i_mount; + struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); /* * Give up if the directory is way too short. */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return XFS_ERROR(EIO); + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); @@ -109,18 +108,18 @@ xfs_dir2_sf_getdents( /* * If the block number in the offset is out of range, we're done. */ - if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) + if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; /* * Precalculate offsets for . and .. as we will always need them. * * XXX(hch): the second argument is sometimes 0 and sometimes - * mp->m_dirdatablk. + * geo->datablk */ - dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, dp->d_ops->data_dot_offset); - dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, dp->d_ops->data_dotdot_offset); /* @@ -149,7 +148,7 @@ xfs_dir2_sf_getdents( for (i = 0; i < sfp->count; i++) { __uint8_t filetype; - off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, xfs_dir2_sf_get_offset(sfep)); if (ctx->pos > off) { @@ -161,13 +160,13 @@ xfs_dir2_sf_getdents( filetype = dp->d_ops->sf_get_ftype(sfep); ctx->pos = off & 0x7fffffff; if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, - xfs_dir3_get_dtype(mp, filetype))) + xfs_dir3_get_dtype(dp->i_mount, filetype))) return 0; sfep = dp->d_ops->sf_nextentry(sfp, sfep); } - ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & - 0x7fffffff; + ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & + 0x7fffffff; return 0; } @@ -176,9 +175,10 @@ xfs_dir2_sf_getdents( */ STATIC int xfs_dir2_block_getdents( - xfs_inode_t *dp, /* incore inode */ + struct xfs_da_args *args, struct dir_context *ctx) { + struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ @@ -186,16 +186,15 @@ xfs_dir2_block_getdents( xfs_dir2_data_unused_t *dup; /* block unused entry */ char *endptr; /* end of the data entries */ int error; /* error return value */ - xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data entry */ int wantoff; /* starting block offset */ xfs_off_t cook; + struct xfs_da_geometry *geo = args->geo; - mp = dp->i_mount; /* * If the block number in the offset is out of range, we're done. */ - if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) + if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) return 0; error = xfs_dir3_block_read(NULL, dp, &bp); @@ -206,13 +205,13 @@ xfs_dir2_block_getdents( * Extract the byte offset we start at from the seek pointer. * We'll skip entries before this. */ - wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos); + wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); @@ -244,7 +243,7 @@ xfs_dir2_block_getdents( if ((char *)dep - (char *)hdr < wantoff) continue; - cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, + cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, (char *)dep - (char *)hdr); ctx->pos = cook & 0x7fffffff; @@ -254,7 +253,7 @@ xfs_dir2_block_getdents( */ if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), - xfs_dir3_get_dtype(mp, filetype))) { + xfs_dir3_get_dtype(dp->i_mount, filetype))) { xfs_trans_brelse(NULL, bp); return 0; } @@ -264,8 +263,8 @@ xfs_dir2_block_getdents( * Reached the end of the block. * Set the offset to a non-existent block 1 and return. */ - ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & - 0x7fffffff; + ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & + 0x7fffffff; xfs_trans_brelse(NULL, bp); return 0; } @@ -286,13 +285,13 @@ struct xfs_dir2_leaf_map_info { STATIC int xfs_dir2_leaf_readbuf( - struct xfs_inode *dp, + struct xfs_da_args *args, size_t bufsize, struct xfs_dir2_leaf_map_info *mip, xfs_dir2_off_t *curoff, struct xfs_buf **bpp) { - struct xfs_mount *mp = dp->i_mount; + struct xfs_inode *dp = args->dp; struct xfs_buf *bp = *bpp; struct xfs_bmbt_irec *map = mip->map; struct blk_plug plug; @@ -300,6 +299,7 @@ xfs_dir2_leaf_readbuf( int length; int i; int j; + struct xfs_da_geometry *geo = args->geo; /* * If we have a buffer, we need to release it and @@ -309,12 +309,12 @@ xfs_dir2_leaf_readbuf( if (bp) { xfs_trans_brelse(NULL, bp); bp = NULL; - mip->map_blocks -= mp->m_dirblkfsbs; + mip->map_blocks -= geo->fsbcount; /* * Loop to get rid of the extents for the * directory block. */ - for (i = mp->m_dirblkfsbs; i > 0; ) { + for (i = geo->fsbcount; i > 0; ) { j = min_t(int, map->br_blockcount, i); map->br_blockcount -= j; map->br_startblock += j; @@ -333,8 +333,7 @@ xfs_dir2_leaf_readbuf( /* * Recalculate the readahead blocks wanted. */ - mip->ra_want = howmany(bufsize + mp->m_dirblksize, - mp->m_sb.sb_blocksize) - 1; + mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1; ASSERT(mip->ra_want >= 0); /* @@ -342,14 +341,14 @@ xfs_dir2_leaf_readbuf( * run out of data blocks, get some more mappings. */ if (1 + mip->ra_want > mip->map_blocks && - mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { + mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) { /* * Get more bmaps, fill in after the ones * we already have in the table. */ mip->nmap = mip->map_size - mip->map_valid; error = xfs_bmapi_read(dp, mip->map_off, - xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - + xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) - mip->map_off, &map[mip->map_valid], &mip->nmap, 0); @@ -370,7 +369,7 @@ xfs_dir2_leaf_readbuf( i = mip->map_valid + mip->nmap - 1; mip->map_off = map[i].br_startoff + map[i].br_blockcount; } else - mip->map_off = xfs_dir2_byte_to_da(mp, + mip->map_off = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); /* @@ -396,18 +395,18 @@ xfs_dir2_leaf_readbuf( * No valid mappings, so no more data blocks. */ if (!mip->map_valid) { - *curoff = xfs_dir2_da_to_byte(mp, mip->map_off); + *curoff = xfs_dir2_da_to_byte(geo, mip->map_off); goto out; } /* * Read the directory block starting at the first mapping. */ - mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); + mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff); error = xfs_dir3_data_read(NULL, dp, map->br_startoff, - map->br_blockcount >= mp->m_dirblkfsbs ? - XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); - + map->br_blockcount >= geo->fsbcount ? + XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) : + -1, &bp); /* * Should just skip over the data block instead of giving up. */ @@ -419,7 +418,7 @@ xfs_dir2_leaf_readbuf( * was previously ra. */ if (mip->ra_current) - mip->ra_current -= mp->m_dirblkfsbs; + mip->ra_current -= geo->fsbcount; /* * Do we need more readahead? @@ -427,16 +426,16 @@ xfs_dir2_leaf_readbuf( blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; mip->ra_want > mip->ra_current && i < mip->map_blocks; - i += mp->m_dirblkfsbs) { + i += geo->fsbcount) { ASSERT(mip->ra_index < mip->map_valid); /* * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { - xfs_dir3_data_readahead(NULL, dp, + map[mip->ra_index].br_blockcount >= geo->fsbcount) { + xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, - XFS_FSB_TO_DADDR(mp, + XFS_FSB_TO_DADDR(dp->i_mount, map[mip->ra_index].br_startblock + mip->ra_offset)); mip->ra_current = i; @@ -447,7 +446,7 @@ xfs_dir2_leaf_readbuf( * use our mapping, but this is a very rare case. */ else if (i > mip->ra_current) { - xfs_dir3_data_readahead(NULL, dp, + xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, -1); mip->ra_current = i; @@ -456,15 +455,14 @@ xfs_dir2_leaf_readbuf( /* * Advance offset through the mapping table. */ - for (j = 0; j < mp->m_dirblkfsbs; j++) { + for (j = 0; j < geo->fsbcount; j += length ) { /* * The rest of this extent but not more than a dir * block. */ - length = min_t(int, mp->m_dirblkfsbs, + length = min_t(int, geo->fsbcount, map[mip->ra_index].br_blockcount - mip->ra_offset); - j += length; mip->ra_offset += length; /* @@ -489,22 +487,23 @@ out: */ STATIC int xfs_dir2_leaf_getdents( - xfs_inode_t *dp, /* incore directory inode */ + struct xfs_da_args *args, struct dir_context *ctx, size_t bufsize) { + struct xfs_inode *dp = args->dp; struct xfs_buf *bp = NULL; /* data block buffer */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ int error = 0; /* error return value */ int length; /* temporary length value */ - xfs_mount_t *mp; /* filesystem mount point */ int byteoff; /* offset in current block */ xfs_dir2_off_t curoff; /* current overall offset */ xfs_dir2_off_t newoff; /* new curoff after new blk */ char *ptr = NULL; /* pointer to current data */ struct xfs_dir2_leaf_map_info *map_info; + struct xfs_da_geometry *geo = args->geo; /* * If the offset is at or past the largest allowed value, @@ -513,15 +512,12 @@ xfs_dir2_leaf_getdents( if (ctx->pos >= XFS_DIR2_MAX_DATAPTR) return 0; - mp = dp->i_mount; - /* * Set up to bmap a number of blocks based on the caller's * buffer size, the directory block size, and the filesystem * block size. */ - length = howmany(bufsize + mp->m_dirblksize, - mp->m_sb.sb_blocksize); + length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + (length * sizeof(struct xfs_bmbt_irec)), KM_SLEEP | KM_NOFS); @@ -531,14 +527,14 @@ xfs_dir2_leaf_getdents( * Inside the loop we keep the main offset value as a byte offset * in the directory file. */ - curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); + curoff = xfs_dir2_dataptr_to_byte(ctx->pos); /* * Force this conversion through db so we truncate the offset * down to get the start of the data block. */ - map_info->map_off = xfs_dir2_db_to_da(mp, - xfs_dir2_byte_to_db(mp, curoff)); + map_info->map_off = xfs_dir2_db_to_da(geo, + xfs_dir2_byte_to_db(geo, curoff)); /* * Loop over directory entries until we reach the end offset. @@ -551,9 +547,9 @@ xfs_dir2_leaf_getdents( * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ - if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) { + if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { - error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info, + error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, &curoff, &bp); if (error || !map_info->map_valid) break; @@ -561,7 +557,8 @@ xfs_dir2_leaf_getdents( /* * Having done a read, we need to set a new offset. */ - newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0); + newoff = xfs_dir2_db_off_to_byte(geo, + map_info->curdb, 0); /* * Start of the current block. */ @@ -571,7 +568,7 @@ xfs_dir2_leaf_getdents( * Make sure we're in the right block. */ else if (curoff > newoff) - ASSERT(xfs_dir2_byte_to_db(mp, curoff) == + ASSERT(xfs_dir2_byte_to_db(geo, curoff) == map_info->curdb); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); @@ -579,7 +576,7 @@ xfs_dir2_leaf_getdents( * Find our position in the block. */ ptr = (char *)dp->d_ops->data_entry_p(hdr); - byteoff = xfs_dir2_byte_to_off(mp, curoff); + byteoff = xfs_dir2_byte_to_off(geo, curoff); /* * Skip past the header. */ @@ -608,10 +605,10 @@ xfs_dir2_leaf_getdents( * Now set our real offset. */ curoff = - xfs_dir2_db_off_to_byte(mp, - xfs_dir2_byte_to_db(mp, curoff), + xfs_dir2_db_off_to_byte(geo, + xfs_dir2_byte_to_db(geo, curoff), (char *)ptr - (char *)hdr); - if (ptr >= (char *)hdr + mp->m_dirblksize) { + if (ptr >= (char *)hdr + geo->blksize) { continue; } } @@ -635,10 +632,10 @@ xfs_dir2_leaf_getdents( length = dp->d_ops->data_entsize(dep->namelen); filetype = dp->d_ops->data_get_ftype(dep); - ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; + ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), - xfs_dir3_get_dtype(mp, filetype))) + xfs_dir3_get_dtype(dp->i_mount, filetype))) break; /* @@ -653,10 +650,10 @@ xfs_dir2_leaf_getdents( /* * All done. Set output offset value to current offset. */ - if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) + if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR)) ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else - ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; + ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; kmem_free(map_info); if (bp) xfs_trans_brelse(NULL, bp); @@ -668,28 +665,36 @@ xfs_dir2_leaf_getdents( */ int xfs_readdir( - xfs_inode_t *dp, - struct dir_context *ctx, - size_t bufsize) + struct xfs_inode *dp, + struct dir_context *ctx, + size_t bufsize) { - int rval; /* return value */ - int v; /* type-checking value */ + struct xfs_da_args args = { NULL }; + int rval; + int v; + uint lock_mode; trace_xfs_readdir(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return XFS_ERROR(EIO); + return -EIO; ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_getdents); + args.dp = dp; + args.geo = dp->i_mount->m_dir_geo; + + lock_mode = xfs_ilock_data_map_shared(dp); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) - rval = xfs_dir2_sf_getdents(dp, ctx); - else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) + rval = xfs_dir2_sf_getdents(&args, ctx); + else if ((rval = xfs_dir2_isblock(&args, &v))) ; else if (v) - rval = xfs_dir2_block_getdents(dp, ctx); + rval = xfs_dir2_block_getdents(&args, ctx); else - rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); + rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); + xfs_iunlock(dp, lock_mode); + return rval; } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 4f11ef011139..13d08a1b390e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -124,7 +124,7 @@ xfs_trim_extents( } trace_xfs_discard_extent(mp, agno, fbno, flen); - error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); + error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); if (error) goto out_del_cursor; *blocks_trimmed += flen; @@ -166,11 +166,11 @@ xfs_ioc_trim( int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (!blk_queue_discard(q)) - return -XFS_ERROR(EOPNOTSUPP); + return -EOPNOTSUPP; if (copy_from_user(&range, urange, sizeof(range))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* * Truncating down the len isn't actually quite correct, but using @@ -182,7 +182,7 @@ xfs_ioc_trim( if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || range.len < mp->m_sb.sb_blocksize) - return -XFS_ERROR(EINVAL); + return -EINVAL; start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; @@ -195,7 +195,7 @@ xfs_ioc_trim( end_agno = xfs_daddr_to_agno(mp, end); for (agno = start_agno; agno <= end_agno; agno++) { - error = -xfs_trim_extents(mp, agno, start, end, minlen, + error = xfs_trim_extents(mp, agno, start, end, minlen, &blocks_trimmed); if (error) last_error = error; @@ -206,7 +206,7 @@ xfs_ioc_trim( range.len = XFS_FSB_TO_B(mp, blocks_trimmed); if (copy_to_user(urange, &range, sizeof(range))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -222,11 +222,11 @@ xfs_discard_extents( trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, busyp->length); - error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, 0); - if (error && error != EOPNOTSUPP) { + if (error && error != -EOPNOTSUPP) { xfs_info(mp, "discard failed for extent [0x%llu,%u], error %d", (unsigned long long)busyp->bno, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 6b1e695caf0e..63c2de49f61d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -327,7 +327,7 @@ xfs_qm_dqalloc( */ if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (ESRCH); + return -ESRCH; } xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); @@ -353,10 +353,10 @@ xfs_qm_dqalloc( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); - - error = xfs_buf_geterror(bp); - if (error) + if (!bp) { + error = -ENOMEM; goto error1; + } bp->b_ops = &xfs_dquot_buf_ops; /* @@ -400,7 +400,7 @@ xfs_qm_dqalloc( error0: xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (error); + return error; } STATIC int @@ -426,7 +426,7 @@ xfs_qm_dqrepair( if (error) { ASSERT(*bpp == NULL); - return XFS_ERROR(error); + return error; } (*bpp)->b_ops = &xfs_dquot_buf_ops; @@ -442,7 +442,7 @@ xfs_qm_dqrepair( if (error) { /* repair failed, we're screwed */ xfs_trans_brelse(tp, *bpp); - return XFS_ERROR(EIO); + return -EIO; } } @@ -469,17 +469,18 @@ xfs_qm_dqtobp( struct xfs_mount *mp = dqp->q_mount; xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); struct xfs_trans *tp = (tpp ? *tpp : NULL); + uint lock_mode; dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - xfs_ilock(quotip, XFS_ILOCK_SHARED); + lock_mode = xfs_ilock_data_map_shared(quotip); if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { /* * Return if this type of quotas is turned off while we * didn't have the quota inode lock. */ - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - return ESRCH; + xfs_iunlock(quotip, lock_mode); + return -ESRCH; } /* @@ -488,7 +489,7 @@ xfs_qm_dqtobp( error = xfs_bmapi_read(quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); - xfs_iunlock(quotip, XFS_ILOCK_SHARED); + xfs_iunlock(quotip, lock_mode); if (error) return error; @@ -507,7 +508,7 @@ xfs_qm_dqtobp( * We don't allocate unless we're asked to */ if (!(flags & XFS_QMOPT_DQALLOC)) - return ENOENT; + return -ENOENT; ASSERT(tp); error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, @@ -529,7 +530,7 @@ xfs_qm_dqtobp( mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); - if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { + if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * mp->m_quotainfo->qi_dqperchunk; ASSERT(bp == NULL); @@ -538,7 +539,7 @@ xfs_qm_dqtobp( if (error) { ASSERT(bp == NULL); - return XFS_ERROR(error); + return error; } } @@ -546,7 +547,7 @@ xfs_qm_dqtobp( *O_bpp = bp; *O_ddpp = bp->b_addr + dqp->q_bufoffset; - return (0); + return 0; } @@ -614,7 +615,7 @@ xfs_qm_dqread( if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; @@ -714,7 +715,7 @@ xfs_qm_dqget( if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { - return (ESRCH); + return -ESRCH; } #ifdef DEBUG @@ -722,7 +723,7 @@ xfs_qm_dqget( if ((xfs_dqerror_target == mp->m_ddev_targp) && (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { xfs_debug(mp, "Returning error in dqget"); - return (EIO); + return -EIO; } } @@ -795,14 +796,14 @@ restart: } else { /* inode stays locked on return */ xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); + return -ESRCH; } } mutex_lock(&qi->qi_tree_lock); - error = -radix_tree_insert(tree, id, dqp); + error = radix_tree_insert(tree, id, dqp); if (unlikely(error)) { - WARN_ON(error != EEXIST); + WARN_ON(error != -EEXIST); /* * Duplicate found. Just throw away the new dquot and start @@ -828,48 +829,7 @@ restart: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); *O_dqpp = dqp; - return (0); -} - - -STATIC void -xfs_qm_dqput_final( - struct xfs_dquot *dqp) -{ - struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; - struct xfs_dquot *gdqp; - struct xfs_dquot *pdqp; - - trace_xfs_dqput_free(dqp); - - if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) - XFS_STATS_INC(xs_qm_dquot_unused); - - /* - * If we just added a udquot to the freelist, then we want to release - * the gdquot/pdquot reference that it (probably) has. Otherwise it'll - * keep the gdquot/pdquot from getting reclaimed. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - - pdqp = dqp->q_pdquot; - if (pdqp) { - xfs_dqlock(pdqp); - dqp->q_pdquot = NULL; - } - xfs_dqunlock(dqp); - - /* - * If we had a group/project quota hint, release it now. - */ - if (gdqp) - xfs_qm_dqput(gdqp); - if (pdqp) - xfs_qm_dqput(pdqp); + return 0; } /* @@ -887,10 +847,14 @@ xfs_qm_dqput( trace_xfs_dqput(dqp); - if (--dqp->q_nrefs > 0) - xfs_dqunlock(dqp); - else - xfs_qm_dqput_final(dqp); + if (--dqp->q_nrefs == 0) { + struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; + trace_xfs_dqput_free(dqp); + + if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) + XFS_STATS_INC(xs_qm_dquot_unused); + } + xfs_dqunlock(dqp); } /* @@ -1002,7 +966,7 @@ xfs_qm_dqflush( SHUTDOWN_CORRUPT_INCORE); else spin_unlock(&mp->m_ail->xa_lock); - error = XFS_ERROR(EIO); + error = -EIO; goto out_unlock; } @@ -1010,7 +974,8 @@ xfs_qm_dqflush( * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); + mp->m_quotainfo->qi_dqchunklen, 0, &bp, + &xfs_dquot_buf_ops); if (error) goto out_unlock; @@ -1028,7 +993,7 @@ xfs_qm_dqflush( xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return XFS_ERROR(EIO); + return -EIO; } /* This is the only portion of data that needs to persist */ @@ -1081,7 +1046,7 @@ xfs_qm_dqflush( out_unlock: xfs_dqfunlock(dqp); - return XFS_ERROR(EIO); + return -EIO; } /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index d22ed0053c32..c24c67e22a2a 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -52,8 +52,6 @@ typedef struct xfs_dquot { int q_bufoffset; /* off of dq in buffer (# dquots) */ xfs_fileoff_t q_fileoffset; /* offset in quotas file */ - struct xfs_dquot*q_gdquot; /* group dquot, hint only */ - struct xfs_dquot*q_pdquot; /* project dquot, hint only */ xfs_disk_dquot_t q_core; /* actual usage & quotas */ xfs_dq_logitem_t q_logitem; /* dquot log item */ xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ @@ -141,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) } } +/* + * Check whether a dquot is under low free space conditions. We assume the quota + * is enabled and enforced. + */ +static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) +{ + int64_t freesp; + + freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount; + if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) + return true; + + return false; +} + #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 92e5f62eefc6..f33fbaaa4d8a 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -57,20 +57,24 @@ xfs_qm_dquot_logitem_size( STATIC void xfs_qm_dquot_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *logvec) + struct xfs_log_vec *lv) { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - - logvec->i_addr = &qlip->qli_format; - logvec->i_len = sizeof(xfs_dq_logformat_t); - logvec->i_type = XLOG_REG_TYPE_QFORMAT; - logvec++; - logvec->i_addr = &qlip->qli_dquot->q_core; - logvec->i_len = sizeof(xfs_disk_dquot_t); - logvec->i_type = XLOG_REG_TYPE_DQUOT; - - qlip->qli_format.qlf_size = 2; - + struct xfs_log_iovec *vecp = NULL; + struct xfs_dq_logformat *qlf; + + qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); + qlf->qlf_type = XFS_LI_DQUOT; + qlf->qlf_size = 2; + qlf->qlf_id = be32_to_cpu(qlip->qli_dquot->q_core.d_id); + qlf->qlf_blkno = qlip->qli_dquot->q_blkno; + qlf->qlf_len = 1; + qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, + &qlip->qli_dquot->q_core, + sizeof(struct xfs_disk_dquot)); } /* @@ -257,18 +261,6 @@ xfs_qm_dquot_logitem_init( xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); lp->qli_dquot = dqp; - lp->qli_format.qlf_type = XFS_LI_DQUOT; - lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); - lp->qli_format.qlf_blkno = dqp->q_blkno; - lp->qli_format.qlf_len = 1; - /* - * This is just the offset of this dquot within its buffer - * (which is currently 1 FSB and probably won't change). - * Hence 32 bits for this offset should be just fine. - * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) - * here, and recompute it at recovery time. - */ - lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; } /*------------------ QUOTAOFF LOG ITEMS -------------------*/ @@ -294,26 +286,20 @@ xfs_qm_qoff_logitem_size( *nbytes += sizeof(struct xfs_qoff_logitem); } -/* - * This is called to fill in the vector of log iovecs for the - * given quotaoff log item. We use only 1 iovec, and we point that - * at the quotaoff_log_format structure embedded in the quotaoff item. - * It is at this point that we assert that all of the extent - * slots in the quotaoff item have been filled. - */ STATIC void xfs_qm_qoff_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_vec *lv) { struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); - - ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - - log_vector->i_addr = &qflip->qql_format; - log_vector->i_len = sizeof(xfs_qoff_logitem_t); - log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; - qflip->qql_format.qf_size = 1; + struct xfs_log_iovec *vecp = NULL; + struct xfs_qoff_logformat *qlf; + + qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF); + qlf->qf_type = XFS_LI_QUOTAOFF; + qlf->qf_size = 1; + qlf->qf_flags = qflip->qql_flags; + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem)); } /* @@ -453,8 +439,7 @@ xfs_qm_qoff_logitem_init( xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); qf->qql_item.li_mountp = mp; - qf->qql_format.qf_type = XFS_LI_QUOTAOFF; - qf->qql_format.qf_flags = flags; qf->qql_start_lip = start; + qf->qql_flags = flags; return qf; } diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70b..502e9464634a 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -27,13 +27,12 @@ typedef struct xfs_dq_logitem { xfs_log_item_t qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - xfs_dq_logformat_t qli_format; /* logged structure */ } xfs_dq_logitem_t; typedef struct xfs_qoff_logitem { xfs_log_item_t qql_item; /* common portion */ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ - xfs_qoff_logformat_t qql_format; /* logged structure */ + unsigned int qql_flags; } xfs_qoff_logitem_t; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 9995b807d627..b92fd7bc49e3 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -27,29 +27,6 @@ #ifdef DEBUG -int xfs_etrap[XFS_ERROR_NTRAP] = { - 0, -}; - -int -xfs_error_trap(int e) -{ - int i; - - if (!e) - return 0; - for (i = 0; i < XFS_ERROR_NTRAP; i++) { - if (xfs_etrap[i] == 0) - break; - if (e != xfs_etrap[i]) - continue; - xfs_notice(NULL, "%s: error %d", __func__, e); - BUG(); - break; - } - return e; -} - int xfs_etest[XFS_NUM_INJECT_ERROR]; int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; @@ -156,7 +133,7 @@ xfs_error_report( { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, - "Internal error %s at line %d of file %s. Caller 0x%p", + "Internal error %s at line %d of file %s. Caller %pF", tag, linenum, filename, ra); xfs_stack_trace(); @@ -178,3 +155,28 @@ xfs_corruption_error( xfs_error_report(tag, level, mp, filename, linenum, ra); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } + +/* + * Warnings specifically for verifier errors. Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +void +xfs_verifier_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", + bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", + __return_address, bp->b_bn); + + xfs_alert(mp, "Unmount and run xfs_repair"); + + if (xfs_error_level >= XFS_ERRLEVEL_LOW) { + xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); + xfs_hex_dump(xfs_buf_offset(bp, 0), 64); + } + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 079a367f44ee..279a76e52791 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -18,15 +18,6 @@ #ifndef __XFS_ERROR_H__ #define __XFS_ERROR_H__ -#ifdef DEBUG -#define XFS_ERROR_NTRAP 10 -extern int xfs_etrap[XFS_ERROR_NTRAP]; -extern int xfs_error_trap(int); -#define XFS_ERROR(e) xfs_error_trap(e) -#else -#define XFS_ERROR(e) (e) -#endif - struct xfs_mount; extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, @@ -34,6 +25,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, int linenum, inst_t *ra); +extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) @@ -55,7 +47,7 @@ extern void xfs_corruption_error(const char *tag, int level, if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ XFS_ERRLEVEL_LOW, NULL); \ - error = XFS_ERROR(EFSCORRUPTED); \ + error = -EFSCORRUPTED; \ goto l; \ } \ } @@ -67,7 +59,7 @@ extern void xfs_corruption_error(const char *tag, int level, if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ XFS_ERRLEVEL_LOW, NULL); \ - return XFS_ERROR(EFSCORRUPTED); \ + return -EFSCORRUPTED; \ } \ } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1399e187d425..5a6bd5d8779a 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -147,9 +147,9 @@ xfs_nfs_get_inode( * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ - if (error == EINVAL || error == ENOENT) - error = ESTALE; - return ERR_PTR(-error); + if (error == -EINVAL || error == -ENOENT) + error = -ESTALE; + return ERR_PTR(error); } if (ip->i_d.di_gen != generation) { @@ -217,7 +217,7 @@ xfs_fs_get_parent( error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); if (unlikely(error)) - return ERR_PTR(-error); + return ERR_PTR(error); return d_obtain_alias(VFS_I(cip)); } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3680d04f973f..c4327419dc5c 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -26,6 +26,7 @@ #include "xfs_trans_priv.h" #include "xfs_buf_item.h" #include "xfs_extfree_item.h" +#include "xfs_log.h" kmem_zone_t *xfs_efi_zone; @@ -101,9 +102,10 @@ xfs_efi_item_size( STATIC void xfs_efi_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_vec *lv) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&efip->efi_next_extent) == efip->efi_format.efi_nextents); @@ -111,10 +113,9 @@ xfs_efi_item_format( efip->efi_format.efi_type = XFS_LI_EFI; efip->efi_format.efi_size = 1; - log_vector->i_addr = &efip->efi_format; - log_vector->i_len = xfs_efi_item_sizeof(efip); - log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; - ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t)); + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, + &efip->efi_format, + xfs_efi_item_sizeof(efip)); } @@ -297,7 +298,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* @@ -368,19 +369,19 @@ xfs_efd_item_size( STATIC void xfs_efd_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_vec *lv) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); efdp->efd_format.efd_type = XFS_LI_EFD; efdp->efd_format.efd_size = 1; - log_vector->i_addr = &efdp->efd_format; - log_vector->i_len = xfs_efd_item_sizeof(efdp); - log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; - ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t)); + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, + &efdp->efd_format, + xfs_efd_item_sizeof(efdp)); } /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52c91e143725..de5368c803f9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_dinode.h" +#include "xfs_icache.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -179,7 +180,7 @@ xfs_file_fsync( return error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); @@ -225,46 +226,40 @@ xfs_file_fsync( !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); - return -error; + return error; } STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - size_t size = 0; + size_t size = iov_iter_count(to); ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; + loff_t pos = iocb->ki_pos; XFS_STATS_INC(xs_read_calls); - BUG_ON(iocb->ki_pos != pos); - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; + ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; - ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); - if (ret < 0) - return ret; - - if (unlikely(ioflags & IO_ISDIRECT)) { + if (unlikely(ioflags & XFS_IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (size & target->bt_smask)) { + /* DIO must be aligned to device logical sector size */ + if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; - return -XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -289,26 +284,36 @@ xfs_file_aio_read( * proceeed concurrently without serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { + if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { - ret = -filemap_write_and_wait_range( + ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } trace_xfs_file_read(ip, size, pos, ioflags); - ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); + ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -331,7 +336,7 @@ xfs_file_splice_read( XFS_STATS_INC(xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -349,47 +354,6 @@ xfs_file_splice_read( } /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased * without writing anything to that block and we don't want to read the @@ -571,7 +535,7 @@ restart: xfs_rw_ilock(ip, *iolock); goto restart; } - error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); + error = xfs_zero_eof(ip, *pos, i_size_read(inode)); if (error) return error; } @@ -624,10 +588,7 @@ restart: STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -635,15 +596,18 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - size_t count = ocount; int unaligned_io = 0; int iolock; + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return -XFS_ERROR(EINVAL); + /* DIO must be aligned to device logical sector size */ + if ((pos | count) & target->bt_logical_sectormask) + return -EINVAL; + /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; @@ -674,13 +638,23 @@ xfs_file_dio_aio_write( ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; + iov_iter_truncate(from, count); if (mapping->nrpages) { - ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* @@ -695,8 +669,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); + ret = generic_file_direct_write(iocb, from, pos); out: xfs_rw_iunlock(ip, iolock); @@ -709,10 +682,7 @@ out: STATIC ssize_t xfs_file_buffered_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -721,7 +691,8 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - size_t count = ocount; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); @@ -729,22 +700,37 @@ xfs_file_buffered_aio_write( if (ret) goto out; + iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, 0); + ret = generic_perform_write(file, from, pos); + if (likely(ret >= 0)) + iocb->ki_pos = pos + ret; /* - * If we just got an ENOSPC, try to write back all dirty inodes to - * convert delalloc space to free up some of the excess reserved - * metadata space. + * If we hit a space limit, try to free up some lingering preallocated + * space before returning an error. In the case of ENOSPC, first try to + * write back all dirty inodes to free up some of the excess reserved + * metadata space. This reduces the chances that the eofblocks scan + * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this + * also behaves as a filter to prevent too many eofblocks scans from + * running at the same time. */ - if (ret == -ENOSPC && !enospc) { + if (ret == -EDQUOT && !enospc) { + enospc = xfs_inode_free_quota_eofblocks(ip); + if (enospc) + goto write_retry; + } else if (ret == -ENOSPC && !enospc) { + struct xfs_eofblocks eofb = {0}; + enospc = 1; xfs_flush_inodes(ip->i_mount); + eofb.eof_scan_owner = ip->i_ino; /* for locking */ + eofb.eof_flags = XFS_EOF_FLAGS_SYNC; + xfs_icache_free_eofblocks(ip->i_mount, &eofb); goto write_retry; } @@ -755,40 +741,29 @@ out: } STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); ssize_t ret; - size_t ocount = 0; + size_t ocount = iov_iter_count(from); XFS_STATS_INC(xs_write_calls); - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - if (ocount == 0) return 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - ret = -EIO; - goto out; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); + ret = xfs_file_dio_aio_write(iocb, from); else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount); + ret = xfs_file_buffered_aio_write(iocb, from); if (ret > 0) { ssize_t err; @@ -796,12 +771,10 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } - -out: return ret; } @@ -820,7 +793,8 @@ xfs_file_fallocate( if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; xfs_ilock(ip, XFS_IOLOCK_EXCL); @@ -828,17 +802,42 @@ xfs_file_fallocate( error = xfs_free_file_space(ip, offset, len); if (error) goto out_unlock; + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + + if (offset & blksize_mask || len & blksize_mask) { + error = -EINVAL; + goto out_unlock; + } + + /* + * There is no need to overlap collapse range with EOF, + * in which case it is effectively a truncate operation + */ + if (offset + len >= i_size_read(inode)) { + error = -EINVAL; + goto out_unlock; + } + + new_size = i_size_read(inode) - len; + + error = xfs_collapse_file_space(ip, offset, len); + if (error) + goto out_unlock; } else { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; - error = -inode_newsize_ok(inode, new_size); + error = inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); + if (mode & FALLOC_FL_ZERO_RANGE) + error = xfs_zero_file_space(ip, offset, len); + else + error = xfs_alloc_file_space(ip, offset, len, + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } @@ -856,7 +855,7 @@ xfs_file_fallocate( if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; - if (!(mode & FALLOC_FL_PUNCH_HOLE)) + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -879,7 +878,7 @@ xfs_file_fallocate( out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; + return error; } @@ -912,9 +911,9 @@ xfs_dir_open( * If there are any blocks, read-ahead block 0 as we're almost * certain to have the next operation be a read there. */ - mode = xfs_ilock_map_shared(ip); + mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(NULL, ip, 0, -1); + xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); return 0; } @@ -924,7 +923,7 @@ xfs_file_release( struct inode *inode, struct file *filp) { - return -xfs_release(XFS_I(inode)); + return xfs_release(XFS_I(inode)); } STATIC int @@ -953,7 +952,7 @@ xfs_file_readdir( error = xfs_readdir(ip, ctx, bufsize); if (error) - return -error; + return error; return 0; } @@ -1215,11 +1214,11 @@ xfs_seek_data( uint lock; int error; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1241,7 +1240,7 @@ xfs_seek_data( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1272,7 +1271,7 @@ xfs_seek_data( * we are reading after EOF if nothing in map[1]. */ if (nmap == 1) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1285,7 +1284,7 @@ xfs_seek_data( fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } } @@ -1294,10 +1293,10 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } @@ -1317,13 +1316,13 @@ xfs_seek_hole( int error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1342,7 +1341,7 @@ xfs_seek_hole( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1402,10 +1401,10 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } @@ -1431,12 +1430,12 @@ xfs_file_llseek( const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = xfs_file_read_iter, + .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, @@ -1462,6 +1461,7 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 12b6e7701985..e92730c1d3ca 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * Copyright (c) 2014 Christoph Hellwig. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -32,100 +33,20 @@ #include "xfs_filestream.h" #include "xfs_trace.h" -#ifdef XFS_FILESTREAMS_TRACE - -ktrace_t *xfs_filestreams_trace_buf; - -STATIC void -xfs_filestreams_trace( - xfs_mount_t *mp, /* mount point */ - int type, /* type of trace */ - const char *func, /* source function */ - int line, /* source line number */ - __psunsigned_t arg0, - __psunsigned_t arg1, - __psunsigned_t arg2, - __psunsigned_t arg3, - __psunsigned_t arg4, - __psunsigned_t arg5) -{ - ktrace_enter(xfs_filestreams_trace_buf, - (void *)(__psint_t)(type | (line << 16)), - (void *)func, - (void *)(__psunsigned_t)current_pid(), - (void *)mp, - (void *)(__psunsigned_t)arg0, - (void *)(__psunsigned_t)arg1, - (void *)(__psunsigned_t)arg2, - (void *)(__psunsigned_t)arg3, - (void *)(__psunsigned_t)arg4, - (void *)(__psunsigned_t)arg5, - NULL, NULL, NULL, NULL, NULL, NULL); -} - -#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0) -#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0) -#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0) -#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0) -#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) -#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) -#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ - xfs_filestreams_trace(mp, t, __func__, __LINE__, \ - (__psunsigned_t)a0, (__psunsigned_t)a1, \ - (__psunsigned_t)a2, (__psunsigned_t)a3, \ - (__psunsigned_t)a4, (__psunsigned_t)a5) - -#define TRACE_AG_SCAN(mp, ag, ag2) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); -#define TRACE_AG_PICK1(mp, max_ag, maxfree) \ - TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ - TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ - cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ - TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ - TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ - TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) \ - TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); - - -#else -#define TRACE_AG_SCAN(mp, ag, ag2) -#define TRACE_AG_PICK1(mp, max_ag, maxfree) -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) -#endif - -static kmem_zone_t *item_zone; +struct xfs_fstrm_item { + struct xfs_mru_cache_elem mru; + struct xfs_inode *ip; + xfs_agnumber_t ag; /* AG in use for this directory */ +}; -/* - * Structure for associating a file or a directory with an allocation group. - * The parent directory pointer is only needed for files, but since there will - * generally be vastly more files than directories in the cache, using the same - * data structure simplifies the code with very little memory overhead. - */ -typedef struct fstrm_item -{ - xfs_agnumber_t ag; /* AG currently in use for the file/directory. */ - xfs_inode_t *ip; /* inode self-pointer. */ - xfs_inode_t *pip; /* Parent directory inode pointer. */ -} fstrm_item_t; +enum xfs_fstrm_alloc { + XFS_PICK_USERDATA = 1, + XFS_PICK_LOWSPACE = 2, +}; /* * Allocation group filestream associations are tracked with per-ag atomic - * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a + * counters. These counters allow xfs_filestream_pick_ag() to tell whether a * particular AG already has active filestreams associated with it. The mount * point's m_peraglock is used to protect these counters from per-ag array * re-allocation during a growfs operation. When xfs_growfs_data_private() is @@ -160,7 +81,7 @@ typedef struct fstrm_item * the cache that reference per-ag array elements that have since been * reallocated. */ -static int +int xfs_filestream_peek_ag( xfs_mount_t *mp, xfs_agnumber_t agno) @@ -200,23 +121,40 @@ xfs_filestream_put_ag( xfs_perag_put(pag); } +static void +xfs_fstrm_free_func( + struct xfs_mru_cache_elem *mru) +{ + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + + xfs_filestream_put_ag(item->ip->i_mount, item->ag); + + trace_xfs_filestream_free(item->ip, item->ag); + + kmem_free(item); +} + /* * Scan the AGs starting at startag looking for an AG that isn't in use and has * at least minlen blocks free. */ static int -_xfs_filestream_pick_ag( - xfs_mount_t *mp, - xfs_agnumber_t startag, - xfs_agnumber_t *agp, - int flags, - xfs_extlen_t minlen) +xfs_filestream_pick_ag( + struct xfs_inode *ip, + xfs_agnumber_t startag, + xfs_agnumber_t *agp, + int flags, + xfs_extlen_t minlen) { - int streams, max_streams; - int err, trylock, nscan; - xfs_extlen_t longest, free, minfree, maxfree = 0; - xfs_agnumber_t ag, max_ag = NULLAGNUMBER; - struct xfs_perag *pag; + struct xfs_mount *mp = ip->i_mount; + struct xfs_fstrm_item *item; + struct xfs_perag *pag; + xfs_extlen_t longest, free = 0, minfree, maxfree = 0; + xfs_agnumber_t ag, max_ag = NULLAGNUMBER; + int err, trylock, nscan; + + ASSERT(S_ISDIR(ip->i_d.di_mode)); /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; @@ -228,8 +166,9 @@ _xfs_filestream_pick_ag( trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { + trace_xfs_filestream_scan(ip, ag); + pag = xfs_perag_get(mp, ag); - TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); @@ -246,7 +185,6 @@ _xfs_filestream_pick_ag( /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; - max_streams = atomic_read(&pag->pagf_fstrms); max_ag = ag; } @@ -269,7 +207,6 @@ _xfs_filestream_pick_ag( /* Break out, retaining the reference on the AG. */ free = pag->pagf_freeblks; - streams = atomic_read(&pag->pagf_fstrms); xfs_perag_put(pag); *agp = ag; break; @@ -305,317 +242,98 @@ next_ag: */ if (max_ag != NULLAGNUMBER) { xfs_filestream_get_ag(mp, max_ag); - TRACE_AG_PICK1(mp, max_ag, maxfree); - streams = max_streams; free = maxfree; *agp = max_ag; break; } /* take AG 0 if none matched */ - TRACE_AG_PICK1(mp, max_ag, maxfree); + trace_xfs_filestream_pick(ip, *agp, free, nscan); *agp = 0; return 0; } - TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); - - return 0; -} + trace_xfs_filestream_pick(ip, *agp, free, nscan); -/* - * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. - */ -static int -_xfs_filestream_update_ag( - xfs_inode_t *ip, - xfs_inode_t *pip, - xfs_agnumber_t ag) -{ - int err = 0; - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t old_ag; - xfs_inode_t *old_pip; - - /* - * Either ip is a regular file and pip is a directory, or ip is a - * directory and pip is NULL. - */ - ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && - S_ISDIR(pip->i_d.di_mode)) || - (S_ISDIR(ip->i_d.di_mode) && !pip))); - - mp = ip->i_mount; - cache = mp->m_filestream; - - item = xfs_mru_cache_lookup(cache, ip->i_ino); - if (item) { - ASSERT(item->ip == ip); - old_ag = item->ag; - item->ag = ag; - old_pip = item->pip; - item->pip = pip; - xfs_mru_cache_done(cache); - - /* - * If the AG has changed, drop the old ref and take a new one, - * effectively transferring the reference from old to new AG. - */ - if (ag != old_ag) { - xfs_filestream_put_ag(mp, old_ag); - xfs_filestream_get_ag(mp, ag); - } - - /* - * If ip is a file and its pip has changed, drop the old ref and - * take a new one. - */ - if (pip && pip != old_pip) { - IRELE(old_pip); - IHOLD(pip); - } - - TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), - ag, xfs_filestream_peek_ag(mp, ag)); + if (*agp == NULLAGNUMBER) return 0; - } - item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); + err = -ENOMEM; + item = kmem_alloc(sizeof(*item), KM_MAYFAIL); if (!item) - return ENOMEM; + goto out_put_ag; - item->ag = ag; + item->ag = *agp; item->ip = ip; - item->pip = pip; - err = xfs_mru_cache_insert(cache, ip->i_ino, item); + err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { - kmem_zone_free(item_zone, item); - return err; + if (err == -EEXIST) + err = 0; + goto out_free_item; } - /* Take a reference on the AG. */ - xfs_filestream_get_ag(mp, ag); - - /* - * Take a reference on the inode itself regardless of whether it's a - * regular file or a directory. - */ - IHOLD(ip); - - /* - * In the case of a regular file, take a reference on the parent inode - * as well to ensure it remains in-core. - */ - if (pip) - IHOLD(pip); - - TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), - ag, xfs_filestream_peek_ag(mp, ag)); - return 0; -} - -/* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -STATIC void -xfs_fstrm_free_func( - unsigned long ino, - void *data) -{ - fstrm_item_t *item = (fstrm_item_t *)data; - xfs_inode_t *ip = item->ip; - - ASSERT(ip->i_ino == ino); - - xfs_iflags_clear(ip, XFS_IFILESTREAM); - - /* Drop the reference taken on the AG when the item was added. */ - xfs_filestream_put_ag(ip->i_mount, item->ag); - - TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, - xfs_filestream_peek_ag(ip->i_mount, item->ag)); - - /* - * _xfs_filestream_update_ag() always takes a reference on the inode - * itself, whether it's a file or a directory. Release it here. - * This can result in the inode being freed and so we must - * not hold any inode locks when freeing filesstreams objects - * otherwise we can deadlock here. - */ - IRELE(ip); - - /* - * In the case of a regular file, _xfs_filestream_update_ag() also - * takes a ref on the parent inode to keep it in-core. Release that - * too. - */ - if (item->pip) - IRELE(item->pip); - - /* Finally, free the memory allocated for the item. */ - kmem_zone_free(item_zone, item); -} - -/* - * xfs_filestream_init() is called at xfs initialisation time to set up the - * memory zone that will be used for filestream data structure allocation. - */ -int -xfs_filestream_init(void) -{ - item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); - if (!item_zone) - return -ENOMEM; - - return 0; -} - -/* - * xfs_filestream_uninit() is called at xfs termination time to destroy the - * memory zone that was used for filestream data structure allocation. - */ -void -xfs_filestream_uninit(void) -{ - kmem_zone_destroy(item_zone); -} - -/* - * xfs_filestream_mount() is called when a file system is mounted with the - * filestream option. It is responsible for allocating the data structures - * needed to track the new file system's file streams. - */ -int -xfs_filestream_mount( - xfs_mount_t *mp) -{ - int err; - unsigned int lifetime, grp_count; - - /* - * The filestream timer tunable is currently fixed within the range of - * one second to four minutes, with five seconds being the default. The - * group count is somewhat arbitrary, but it'd be nice to adhere to the - * timer tunable to within about 10 percent. This requires at least 10 - * groups. - */ - lifetime = xfs_fstrm_centisecs * 10; - grp_count = 10; - - err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, - xfs_fstrm_free_func); +out_free_item: + kmem_free(item); +out_put_ag: + xfs_filestream_put_ag(mp, *agp); return err; } -/* - * xfs_filestream_unmount() is called when a file system that was mounted with - * the filestream option is unmounted. It drains the data structures created - * to track the file system's file streams and frees all the memory that was - * allocated. - */ -void -xfs_filestream_unmount( - xfs_mount_t *mp) +static struct xfs_inode * +xfs_filestream_get_parent( + struct xfs_inode *ip) { - xfs_mru_cache_destroy(mp->m_filestream); -} + struct inode *inode = VFS_I(ip), *dir = NULL; + struct dentry *dentry, *parent; -/* - * Return the AG of the filestream the file or directory belongs to, or - * NULLAGNUMBER otherwise. - */ -xfs_agnumber_t -xfs_filestream_lookup_ag( - xfs_inode_t *ip) -{ - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t ag; - int ref; - - if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { - ASSERT(0); - return NULLAGNUMBER; - } + dentry = d_find_alias(inode); + if (!dentry) + goto out; - cache = ip->i_mount->m_filestream; - item = xfs_mru_cache_lookup(cache, ip->i_ino); - if (!item) { - TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); - return NULLAGNUMBER; - } + parent = dget_parent(dentry); + if (!parent) + goto out_dput; - ASSERT(ip == item->ip); - ag = item->ag; - ref = xfs_filestream_peek_ag(ip->i_mount, ag); - xfs_mru_cache_done(cache); + dir = igrab(parent->d_inode); + dput(parent); - TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); - return ag; +out_dput: + dput(dentry); +out: + return dir ? XFS_I(dir) : NULL; } /* - * xfs_filestream_associate() should only be called to associate a regular file - * with its parent directory. Calling it with a child directory isn't - * appropriate because filestreams don't apply to entire directory hierarchies. - * Creating a file in a child directory of an existing filestream directory - * starts a new filestream with its own allocation group association. + * Find the right allocation group for a file, either by finding an + * existing file stream or creating a new one. * - * Returns < 0 on error, 0 if successful association occurred, > 0 if - * we failed to get an association because of locking issues. + * Returns NULLAGNUMBER in case of an error. */ -int -xfs_filestream_associate( - xfs_inode_t *pip, - xfs_inode_t *ip) +xfs_agnumber_t +xfs_filestream_lookup_ag( + struct xfs_inode *ip) { - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - fstrm_item_t *item; - xfs_agnumber_t ag, rotorstep, startag; - int err = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_inode *pip = NULL; + xfs_agnumber_t startag, ag = NULLAGNUMBER; + struct xfs_mru_cache_elem *mru; - ASSERT(S_ISDIR(pip->i_d.di_mode)); ASSERT(S_ISREG(ip->i_d.di_mode)); - if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) - return -EINVAL; - mp = pip->i_mount; - cache = mp->m_filestream; + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto out; - /* - * We have a problem, Houston. - * - * Taking the iolock here violates inode locking order - we already - * hold the ilock. Hence if we block getting this lock we may never - * wake. Unfortunately, that means if we can't get the lock, we're - * screwed in terms of getting a stream association - we can't spin - * waiting for the lock because someone else is waiting on the lock we - * hold and we cannot drop that as we are in a transaction here. - * - * Lucky for us, this inversion is not a problem because it's a - * directory inode that we are trying to lock here. - * - * So, if we can't get the iolock without sleeping then just give up - */ - if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) - return 1; - - /* If the parent directory is already in the cache, use its AG. */ - item = xfs_mru_cache_lookup(cache, pip->i_ino); - if (item) { - ASSERT(item->ip == pip); - ag = item->ag; - xfs_mru_cache_done(cache); - - TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - err = _xfs_filestream_update_ag(ip, pip, ag); + mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); + if (mru) { + ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; + xfs_mru_cache_done(mp->m_filestream); - goto exit; + trace_xfs_filestream_lookup(ip, ag); + goto out; } /* @@ -623,202 +341,94 @@ xfs_filestream_associate( * use the directory inode's AG. */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { - rotorstep = xfs_rotorstep; + xfs_agnumber_t rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); - /* Pick a new AG for the parent inode starting at startag. */ - err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); - if (err || ag == NULLAGNUMBER) - goto exit_did_pick; - - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, ag); - if (err) - goto exit_did_pick; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, ag); - if (err) - goto exit_did_pick; - - TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - -exit_did_pick: - /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. - */ - if (ag != NULLAGNUMBER) - xfs_filestream_put_ag(mp, ag); - -exit: - xfs_iunlock(pip, XFS_IOLOCK_EXCL); - return -err; + if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) + ag = NULLAGNUMBER; +out: + IRELE(pip); + return ag; } /* - * Pick a new allocation group for the current file and its file stream. This - * function is called by xfs_bmap_filestreams() with the mount point's per-ag - * lock held. + * Pick a new allocation group for the current file and its file stream. + * + * This is called when the allocator can't find a suitable extent in the + * current AG, and we have to move the stream into a new AG with more space. */ int xfs_filestream_new_ag( struct xfs_bmalloca *ap, xfs_agnumber_t *agp) { - int flags, err; - xfs_inode_t *ip, *pip = NULL; - xfs_mount_t *mp; - xfs_mru_cache_t *cache; - xfs_extlen_t minlen; - fstrm_item_t *dir, *file; - xfs_agnumber_t ag = NULLAGNUMBER; - - ip = ap->ip; - mp = ip->i_mount; - cache = mp->m_filestream; - minlen = ap->length; - *agp = NULLAGNUMBER; + struct xfs_inode *ip = ap->ip, *pip; + struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t minlen = ap->length; + xfs_agnumber_t startag = 0; + int flags, err = 0; + struct xfs_mru_cache_elem *mru; - /* - * Look for the file in the cache, removing it if it's found. Doing - * this allows it to be held across the dir lookup that follows. - */ - file = xfs_mru_cache_remove(cache, ip->i_ino); - if (file) { - ASSERT(ip == file->ip); - - /* Save the file's parent inode and old AG number for later. */ - pip = file->pip; - ag = file->ag; - - /* Look for the file's directory in the cache. */ - dir = xfs_mru_cache_lookup(cache, pip->i_ino); - if (dir) { - ASSERT(pip == dir->ip); - - /* - * If the directory has already moved on to a new AG, - * use that AG as the new AG for the file. Don't - * forget to twiddle the AG refcounts to match the - * movement. - */ - if (dir->ag != file->ag) { - xfs_filestream_put_ag(mp, file->ag); - xfs_filestream_get_ag(mp, dir->ag); - *agp = file->ag = dir->ag; - } - - xfs_mru_cache_done(cache); - } + *agp = NULLAGNUMBER; - /* - * Put the file back in the cache. If this fails, the free - * function needs to be called to tidy up in the same way as if - * the item had simply expired from the cache. - */ - err = xfs_mru_cache_insert(cache, ip->i_ino, file); - if (err) { - xfs_fstrm_free_func(ip->i_ino, file); - return err; - } + pip = xfs_filestream_get_parent(ip); + if (!pip) + goto exit; - /* - * If the file's AG was moved to the directory's new AG, there's - * nothing more to be done. - */ - if (*agp != NULLAGNUMBER) { - TRACE_MOVEAG(mp, ip, pip, - ag, xfs_filestream_peek_ag(mp, ag), - *agp, xfs_filestream_peek_ag(mp, *agp)); - return 0; - } + mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); + if (mru) { + struct xfs_fstrm_item *item = + container_of(mru, struct xfs_fstrm_item, mru); + startag = (item->ag + 1) % mp->m_sb.sb_agcount; } - /* - * If the file's parent directory is known, take its iolock in exclusive - * mode to prevent two sibling files from racing each other to migrate - * themselves and their parent to different AGs. - * - * Note that we lock the parent directory iolock inside the child - * iolock here. That's fine as we never hold both parent and child - * iolock in any other place. This is different from the ilock, - * which requires locking of the child after the parent for namespace - * operations. - */ - if (pip) - xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); - - /* - * A new AG needs to be found for the file. If the file's parent - * directory is also known, it will be moved to the new AG as well to - * ensure that files created inside it in future use the new AG. - */ - ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount; flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); - err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); - if (err || *agp == NULLAGNUMBER) - goto exit; + err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); /* - * If the file wasn't found in the file cache, then its parent directory - * inode isn't known. For this to have happened, the file must either - * be pre-existing, or it was created long enough ago that its cache - * entry has expired. This isn't the sort of usage that the filestreams - * allocator is trying to optimise, so there's no point trying to track - * its new AG somehow in the filestream data structures. + * Only free the item here so we skip over the old AG earlier. */ - if (!pip) { - TRACE_ORPHAN(mp, ip, *agp); - goto exit; - } - - /* Associate the parent inode with the AG. */ - err = _xfs_filestream_update_ag(pip, NULL, *agp); - if (err) - goto exit; - - /* Associate the file inode with the AG. */ - err = _xfs_filestream_update_ag(ip, pip, *agp); - if (err) - goto exit; - - TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, - *agp, xfs_filestream_peek_ag(mp, *agp)); + if (mru) + xfs_fstrm_free_func(mru); + IRELE(pip); exit: - /* - * If _xfs_filestream_pick_ag() returned a valid AG, remove the - * reference it took on it, since the file and directory will have taken - * their own now if they were successfully cached. - */ - if (*agp != NULLAGNUMBER) - xfs_filestream_put_ag(mp, *agp); - else + if (*agp == NULLAGNUMBER) *agp = 0; - - if (pip) - xfs_iunlock(pip, XFS_IOLOCK_EXCL); - return err; } -/* - * Remove an association between an inode and a filestream object. - * Typically this is done on last close of an unlinked file. - */ void xfs_filestream_deassociate( - xfs_inode_t *ip) + struct xfs_inode *ip) { - xfs_mru_cache_t *cache = ip->i_mount->m_filestream; + xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); +} + +int +xfs_filestream_mount( + xfs_mount_t *mp) +{ + /* + * The filestream timer tunable is currently fixed within the range of + * one second to four minutes, with five seconds being the default. The + * group count is somewhat arbitrary, but it'd be nice to adhere to the + * timer tunable to within about 10 percent. This requires at least 10 + * groups. + */ + return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, + 10, xfs_fstrm_free_func); +} - xfs_mru_cache_delete(cache, ip->i_ino); +void +xfs_filestream_unmount( + xfs_mount_t *mp) +{ + xfs_mru_cache_destroy(mp->m_filestream); } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 6d61dbee8564..2ef43406e53b 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -20,50 +20,20 @@ struct xfs_mount; struct xfs_inode; -struct xfs_perag; struct xfs_bmalloca; -#ifdef XFS_FILESTREAMS_TRACE -#define XFS_FSTRM_KTRACE_INFO 1 -#define XFS_FSTRM_KTRACE_AGSCAN 2 -#define XFS_FSTRM_KTRACE_AGPICK1 3 -#define XFS_FSTRM_KTRACE_AGPICK2 4 -#define XFS_FSTRM_KTRACE_UPDATE 5 -#define XFS_FSTRM_KTRACE_FREE 6 -#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7 -#define XFS_FSTRM_KTRACE_ASSOCIATE 8 -#define XFS_FSTRM_KTRACE_MOVEAG 9 -#define XFS_FSTRM_KTRACE_ORPHAN 10 - -#define XFS_FSTRM_KTRACE_SIZE 16384 -extern ktrace_t *xfs_filestreams_trace_buf; - -#endif - -/* allocation selection flags */ -typedef enum xfs_fstrm_alloc { - XFS_PICK_USERDATA = 1, - XFS_PICK_LOWSPACE = 2, -} xfs_fstrm_alloc_t; - -/* prototypes for filestream.c */ -int xfs_filestream_init(void); -void xfs_filestream_uninit(void); int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); -xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip); +xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); +int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno); - -/* filestreams for the inode? */ static inline int xfs_inode_is_filestream( struct xfs_inode *ip) { return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || - xfs_iflags_test(ip, XFS_IFILESTREAM) || (ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM); } diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c5fc116dfaa3..18dc721ca19f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ +#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ /* * Minimum and maximum sizes need for growth checks. @@ -254,8 +255,8 @@ typedef struct xfs_fsop_resblks { ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) /* Used for sanity checks on superblock */ -#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) -#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ +#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks) +#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \ (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) /* @@ -374,6 +375,9 @@ struct xfs_fs_eofblocks { #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ +#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; + * kernel only, not included in + * valid mask */ #define XFS_EOF_FLAGS_VALID \ (XFS_EOF_FLAGS_SYNC | \ XFS_EOF_FLAGS_UID | \ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 02fb943cbf22..f91de1ef05e1 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -24,6 +24,8 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" @@ -74,23 +76,18 @@ xfs_fs_geometry( } if (new_version >= 3) { geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2 | (xfs_sb_version_hasattr(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasnlink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_NLINK : 0) | (xfs_sb_version_hasquota(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | (xfs_sb_version_hasalign(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | (xfs_sb_version_hasdalign(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | - (xfs_sb_version_hasshared(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SHARED : 0) | (xfs_sb_version_hasextflgbit(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | - (xfs_sb_version_hasdirv2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | (xfs_sb_version_hassector(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | (xfs_sb_version_hasasciici(&mp->m_sb) ? @@ -104,11 +101,13 @@ xfs_fs_geometry( (xfs_sb_version_hascrc(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_V5SB : 0) | (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0); + XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | + (xfs_sb_version_hasfinobt(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FINOBT : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; - geo->dirblocksize = mp->m_dirblksize; + geo->dirblocksize = mp->m_dir_geo->blksize; } if (new_version >= 4) { geo->flags |= @@ -169,7 +168,7 @@ xfs_growfs_data_private( nb = in->newblocks; pct = in->imaxpct; if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) - return XFS_ERROR(EINVAL); + return -EINVAL; if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) return error; dpct = pct - mp->m_sb.sb_imax_pct; @@ -177,7 +176,7 @@ xfs_growfs_data_private( XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, NULL); if (!bp) - return EIO; + return -EIO; if (bp->b_error) { error = bp->b_error; xfs_buf_relse(bp); @@ -192,7 +191,7 @@ xfs_growfs_data_private( nagcount--; nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; if (nb < mp->m_sb.sb_dblocks) - return XFS_ERROR(EINVAL); + return -EINVAL; } new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; @@ -230,7 +229,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -271,7 +270,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -299,7 +298,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -316,6 +315,10 @@ xfs_growfs_data_private( agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_sb_version_hascrc(&mp->m_sb)) uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); + agi->agi_free_level = cpu_to_be32(1); + } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); @@ -333,7 +336,7 @@ xfs_growfs_data_private( &xfs_allocbt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -362,7 +365,7 @@ xfs_growfs_data_private( BTOBB(mp->m_sb.sb_blocksize), 0, &xfs_allocbt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -392,7 +395,7 @@ xfs_growfs_data_private( BTOBB(mp->m_sb.sb_blocksize), 0, &xfs_inobt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -407,6 +410,34 @@ xfs_growfs_data_private( xfs_buf_relse(bp); if (error) goto error0; + + /* + * FINO btree root block + */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + bp = xfs_growfs_get_hdr_buf(mp, + XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), + BTOBB(mp->m_sb.sb_blocksize), 0, + &xfs_inobt_buf_ops); + if (!bp) { + error = -ENOMEM; + goto error0; + } + + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, + 0, 0, agno, + XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, + 0, agno, 0); + + error = xfs_bwrite(bp); + xfs_buf_relse(bp); + if (error) + goto error0; + } + } xfs_trans_agblocks_delta(tp, nfree); /* @@ -500,7 +531,7 @@ xfs_growfs_data_private( bp->b_ops = &xfs_sb_buf_ops; xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); } else - error = ENOMEM; + error = -ENOMEM; } /* @@ -545,17 +576,17 @@ xfs_growfs_log_private( nb = in->newblocks; if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) - return XFS_ERROR(EINVAL); + return -EINVAL; if (nb == mp->m_sb.sb_logblocks && in->isint == (mp->m_sb.sb_logstart != 0)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Moving the log is hard, need new interfaces to sync * the log first, hold off all activity while moving it. * Can have shorter or longer log in the same space, * or transform internal to external log or vice versa. */ - return XFS_ERROR(ENOSYS); + return -ENOSYS; } /* @@ -573,9 +604,9 @@ xfs_growfs_data( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (!mutex_trylock(&mp->m_growlock)) - return XFS_ERROR(EWOULDBLOCK); + return -EWOULDBLOCK; error = xfs_growfs_data_private(mp, in); mutex_unlock(&mp->m_growlock); return error; @@ -589,9 +620,9 @@ xfs_growfs_log( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (!mutex_trylock(&mp->m_growlock)) - return XFS_ERROR(EWOULDBLOCK); + return -EWOULDBLOCK; error = xfs_growfs_log_private(mp, in); mutex_unlock(&mp->m_growlock); return error; @@ -643,7 +674,7 @@ xfs_reserve_blocks( /* If inval is null, report current values and return */ if (inval == (__uint64_t *)NULL) { if (!outval) - return EINVAL; + return -EINVAL; outval->resblks = mp->m_resblks; outval->resblks_avail = mp->m_resblks_avail; return 0; @@ -726,7 +757,7 @@ out: int error; error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); - if (error == ENOSPC) + if (error == -ENOSPC) goto retry; } return 0; @@ -787,7 +818,7 @@ xfs_fs_goingdown( SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); break; default: - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 98d35244eecc..981b2cf51985 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,9 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -158,7 +161,7 @@ xfs_iget_cache_hit( if (ip->i_ino != ino) { trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -176,7 +179,7 @@ xfs_iget_cache_hit( if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -184,7 +187,7 @@ xfs_iget_cache_hit( * If lookup is racing with unlink return an error immediately. */ if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + error = -ENOENT; goto out_error; } @@ -206,7 +209,7 @@ xfs_iget_cache_hit( spin_unlock(&ip->i_flags_lock); rcu_read_unlock(); - error = -inode_init_always(mp->m_super, inode); + error = inode_init_always(mp->m_super, inode); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -243,7 +246,7 @@ xfs_iget_cache_hit( /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { trace_xfs_iget_skip(ip); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -285,7 +288,7 @@ xfs_iget_cache_miss( ip = xfs_inode_alloc(mp, ino); if (!ip) - return ENOMEM; + return -ENOMEM; error = xfs_iread(mp, tp, ip, flags); if (error) @@ -294,7 +297,7 @@ xfs_iget_cache_miss( trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + error = -ENOENT; goto out_destroy; } @@ -305,7 +308,7 @@ xfs_iget_cache_miss( * recurse into the file system. */ if (radix_tree_preload(GFP_NOFS)) { - error = EAGAIN; + error = -EAGAIN; goto out_destroy; } @@ -341,7 +344,7 @@ xfs_iget_cache_miss( if (unlikely(error)) { WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); - error = EAGAIN; + error = -EAGAIN; goto out_preload_end; } spin_unlock(&pag->pag_ici_lock); @@ -408,7 +411,7 @@ xfs_iget( /* reject inode numbers outside existing AGs */ if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) - return EINVAL; + return -EINVAL; /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); @@ -445,7 +448,7 @@ again: return 0; out_error_or_again: - if (error == EAGAIN) { + if (error == -EAGAIN) { delay(1); goto again; } @@ -489,26 +492,25 @@ xfs_inode_ag_walk_grab( /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EFSCORRUPTED; + return -EFSCORRUPTED; /* If we can't grab the inode, it must on it's way to reclaim. */ if (!igrab(inode)) - return ENOENT; + return -ENOENT; /* inode is valid */ return 0; out_unlock_noent: spin_unlock(&ip->i_flags_lock); - return ENOENT; + return -ENOENT; } STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, struct xfs_perag *pag, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, @@ -582,18 +584,18 @@ restart: for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; - error = execute(batch[i], pag, flags, args); + error = execute(batch[i], flags, args); IRELE(batch[i]); - if (error == EAGAIN) { + if (error == -EAGAIN) { skipped++; continue; } - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } /* bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; cond_resched(); @@ -636,8 +638,7 @@ xfs_eofblocks_worker( int xfs_inode_ag_iterator( struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args) @@ -654,18 +655,17 @@ xfs_inode_ag_iterator( xfs_perag_put(pag); if (error) { last_error = error; - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; } } - return XFS_ERROR(last_error); + return last_error; } int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags, + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, @@ -683,11 +683,11 @@ xfs_inode_ag_iterator_tag( xfs_perag_put(pag); if (error) { last_error = error; - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; } } - return XFS_ERROR(last_error); + return last_error; } /* @@ -947,7 +947,7 @@ restart: * see the stale flag set on the inode. */ error = xfs_iflush(ip, &bp); - if (error == EAGAIN) { + if (error == -EAGAIN) { xfs_iunlock(ip, XFS_ILOCK_EXCL); /* backoff longer than in xfs_ifree_cluster */ delay(2); @@ -1000,7 +1000,7 @@ out: xfs_iflags_clear(ip, XFS_IRECLAIM); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* - * We could return EAGAIN here to make reclaim rescan the inode tree in + * We could return -EAGAIN here to make reclaim rescan the inode tree in * a short while. However, this just burns CPU time scanning the tree * waiting for IO to complete and the reclaim work never goes back to * the idle state. Instead, return 0 to let the next scheduled @@ -1103,7 +1103,7 @@ restart: if (!batch[i]) continue; error = xfs_reclaim_inode(batch[i], pag, flags); - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } @@ -1132,7 +1132,7 @@ restart: trylock = 0; goto restart; } - return XFS_ERROR(last_error); + return last_error; } int @@ -1206,15 +1206,42 @@ xfs_inode_match_id( return 1; } +/* + * A union-based inode filtering algorithm. Process the inode if any of the + * criteria match. This is for global/internal scans only. + */ +STATIC int +xfs_inode_match_id_union( + struct xfs_inode *ip, + struct xfs_eofblocks *eofb) +{ + if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && + uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && + gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && + xfs_get_projid(ip) == eofb->eof_prid) + return 1; + + return 0; +} + STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, - struct xfs_perag *pag, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; + bool need_iolock = true; + int match; + + ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ @@ -1232,19 +1259,31 @@ xfs_inode_free_eofblocks( return 0; if (eofb) { - if (!xfs_inode_match_id(ip, eofb)) + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) + match = xfs_inode_match_id_union(ip, eofb); + else + match = xfs_inode_match_id(ip, eofb); + if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; + + /* + * A scan owner implies we already hold the iolock. Skip it in + * xfs_free_eofblocks() to avoid deadlock. This also eliminates + * the possibility of EAGAIN being returned. + */ + if (eofb->eof_scan_owner == ip->i_ino) + need_iolock = false; } - ret = xfs_free_eofblocks(ip->i_mount, ip, true); + ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); /* don't revisit the inode if we're not waiting */ - if (ret == EAGAIN && !(flags & SYNC_WAIT)) + if (ret == -EAGAIN && !(flags & SYNC_WAIT)) ret = 0; return ret; @@ -1264,6 +1303,55 @@ xfs_icache_free_eofblocks( eofb, XFS_ICI_EOFBLOCKS_TAG); } +/* + * Run eofblocks scans on the quotas applicable to the inode. For inodes with + * multiple quotas, we don't know exactly which quota caused an allocation + * failure. We make a best effort by including each quota under low free space + * conditions (less than 1% free space) in the scan. + */ +int +xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip) +{ + int scan = 0; + struct xfs_eofblocks eofb = {0}; + struct xfs_dquot *dq; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + /* + * Set the scan owner to avoid a potential livelock. Otherwise, the scan + * can repeatedly trylock on the inode we're currently processing. We + * run a sync scan to increase effectiveness and use the union filter to + * cover all applicable quotas in a single scan. + */ + eofb.eof_scan_owner = ip->i_ino; + eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; + + if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_USER); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_uid = VFS_I(ip)->i_uid; + eofb.eof_flags |= XFS_EOF_FLAGS_UID; + scan = 1; + } + } + + if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_gid = VFS_I(ip)->i_gid; + eofb.eof_flags |= XFS_EOF_FLAGS_GID; + scan = 1; + } + } + + if (scan) + xfs_icache_free_eofblocks(ip->i_mount, &eofb); + + return scan; +} + void xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 9ed68bb750f5..46748b86b12f 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -27,6 +27,7 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; + xfs_ino_t eof_scan_owner; }; #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ @@ -57,15 +58,14 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); +int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, - int flags, void *args), + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, - int flags, void *args), + int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); static inline int @@ -74,31 +74,32 @@ xfs_fs_eofblocks_from_user( struct xfs_eofblocks *dst) { if (src->eof_version != XFS_EOFBLOCKS_VERSION) - return EINVAL; + return -EINVAL; if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) - return EINVAL; + return -EINVAL; if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || memchr_inv(src->pad64, 0, sizeof(src->pad64))) - return EINVAL; + return -EINVAL; dst->eof_flags = src->eof_flags; dst->eof_prid = src->eof_prid; dst->eof_min_file_size = src->eof_min_file_size; + dst->eof_scan_owner = NULLFSINO; dst->eof_uid = INVALID_UID; if (src->eof_flags & XFS_EOF_FLAGS_UID) { dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); if (!uid_valid(dst->eof_uid)) - return EINVAL; + return -EINVAL; } dst->eof_gid = INVALID_GID; if (src->eof_flags & XFS_EOF_FLAGS_GID) { dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); if (!gid_valid(dst->eof_gid)) - return EINVAL; + return -EINVAL; } return 0; } diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d2eaccfa73f4..7e4549233251 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -28,6 +28,7 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_icreate_item.h" +#include "xfs_log.h" kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ @@ -58,13 +59,14 @@ xfs_icreate_item_size( STATIC void xfs_icreate_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_vec *lv) { struct xfs_icreate_item *icp = ICR_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; - log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; - log_vector->i_len = sizeof(struct xfs_icreate_log); - log_vector->i_type = XLOG_REG_TYPE_ICREATE; + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, + &icp->ic_format, + sizeof(struct xfs_icreate_log)); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 001aa893ed59..fea3c92fb3f0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -42,7 +42,6 @@ #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_cksum.h" #include "xfs_trace.h" @@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone; STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); +STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *); + /* * helper function to extract extent size hint from inode */ @@ -77,48 +78,44 @@ xfs_get_extsz_hint( } /* - * This is a wrapper routine around the xfs_ilock() routine used to centralize - * some grungy code. It is used in places that wish to lock the inode solely - * for reading the extents. The reason these places can't just call - * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the - * extents from disk for a file in b-tree format. If the inode is in b-tree - * format, then we need to lock the inode exclusively until the extents are read - * in. Locking it exclusively all the time would limit our parallelism - * unnecessarily, though. What we do instead is check to see if the extents - * have been read in yet, and only lock the inode exclusively if they have not. + * These two are wrapper routines around the xfs_ilock() routine used to + * centralize some grungy code. They are used in places that wish to lock the + * inode solely for reading the extents. The reason these places can't just + * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to + * bringing in of the extents from disk for a file in b-tree format. If the + * inode is in b-tree format, then we need to lock the inode exclusively until + * the extents are read in. Locking it exclusively all the time would limit + * our parallelism unnecessarily, though. What we do instead is check to see + * if the extents have been read in yet, and only lock the inode exclusively + * if they have not. * - * The function returns a value which should be given to the corresponding - * xfs_iunlock_map_shared(). This value is the mode in which the lock was - * actually taken. + * The functions return a value which should be given to the corresponding + * xfs_iunlock() call. */ uint -xfs_ilock_map_shared( - xfs_inode_t *ip) +xfs_ilock_data_map_shared( + struct xfs_inode *ip) { - uint lock_mode; + uint lock_mode = XFS_ILOCK_SHARED; - if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && - ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && + (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) lock_mode = XFS_ILOCK_EXCL; - } else { - lock_mode = XFS_ILOCK_SHARED; - } - xfs_ilock(ip, lock_mode); - return lock_mode; } -/* - * This is simply the unlock routine to go with xfs_ilock_map_shared(). - * All it does is call xfs_iunlock() with the given lock_mode. - */ -void -xfs_iunlock_map_shared( - xfs_inode_t *ip, - unsigned int lock_mode) +uint +xfs_ilock_attr_map_shared( + struct xfs_inode *ip) { - xfs_iunlock(ip, lock_mode); + uint lock_mode = XFS_ILOCK_SHARED; + + if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && + (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) + lock_mode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lock_mode); + return lock_mode; } /* @@ -586,11 +583,11 @@ xfs_lookup( trace_xfs_lookup(dp, name); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return XFS_ERROR(EIO); + return -EIO; - lock_mode = xfs_ilock_map_shared(dp); + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); - xfs_iunlock_map_shared(dp, lock_mode); + xfs_iunlock(dp, lock_mode); if (error) goto out; @@ -658,7 +655,6 @@ xfs_ialloc( uint flags; int error; timespec_t tv; - int filestreams = 0; /* * Call the space management code to pick @@ -685,6 +681,14 @@ xfs_ialloc( return error; ASSERT(ip != NULL); + /* + * We always convert v1 inodes to v2 now - we only support filesystems + * with >= v2 inode capability, so there is no reason for ever leaving + * an inode in v1 format. + */ + if (ip->i_d.di_version == 1) + ip->i_d.di_version = 2; + ip->i_d.di_mode = mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -694,27 +698,6 @@ xfs_ialloc( xfs_set_projid(ip, prid); memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - /* - * If the superblock version is up to where we support new format - * inodes and this is currently an old format inode, then change - * the inode version number now. This way we only do the conversion - * here rather than here and in the flush/logging code. - */ - if (xfs_sb_version_hasnlink(&mp->m_sb) && - ip->i_d.di_version == 1) { - ip->i_d.di_version = 2; - /* - * We've already zeroed the old link count, the projid field, - * and the pad field. - */ - } - - /* - * Project ids won't be stored on disk if we are using a version 1 inode. - */ - if ((prid != 0) && (ip->i_d.di_version == 1)) - xfs_bump_ino_vers2(tp, ip); - if (pip && XFS_INHERIT_GID(pip)) { ip->i_d.di_gid = pip->i_d.di_gid; if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { @@ -775,13 +758,6 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - /* - * we can't set up filestreams until after the VFS inode - * is set up properly. - */ - if (pip && xfs_inode_is_filestream(pip)) - filestreams = 1; - /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { uint di_flags = 0; @@ -847,15 +823,6 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); - /* now we have set up the vfs inode we can associate the filestream */ - if (filestreams) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); - } - *ipp = ip; return 0; } @@ -926,7 +893,7 @@ xfs_dir_ialloc( } if (!ialloc_context && !ip) { *ipp = NULL; - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* @@ -1076,40 +1043,6 @@ xfs_droplink( } /* - * This gets called when the inode's version needs to be changed from 1 to 2. - * Currently this happens when the nlink field overflows the old 16-bit value - * or when chproj is called to change the project for the first time. - * As a side effect the superblock version will also get rev'd - * to contain the NLINK bit. - */ -void -xfs_bump_ino_vers2( - xfs_trans_t *tp, - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_d.di_version == 1); - - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - mp = tp->t_mountp; - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - xfs_sb_version_addnlink(&mp->m_sb); - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM); - } else { - spin_unlock(&mp->m_sb_lock); - } - } - /* Caller must log the inode */ -} - -/* * Increment the link count on an inode & log the change. */ int @@ -1119,22 +1052,10 @@ xfs_bumplink( { xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - ASSERT(ip->i_d.di_nlink > 0); + ASSERT(ip->i_d.di_version > 1); + ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE)); ip->i_d.di_nlink++; inc_nlink(VFS_I(ip)); - if ((ip->i_d.di_version == 1) && - (ip->i_d.di_nlink > XFS_MAXLINK_1)) { - /* - * The inode has increased its number of links beyond - * what can fit in an old format inode. It now needs - * to be converted to a version 2 inode with a 32 bit - * link count. If this is the first inode in the file - * system to do this, then we need to bump the superblock - * version number as well. - */ - xfs_bump_ino_vers2(tp, ip); - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return 0; } @@ -1167,12 +1088,9 @@ xfs_create( trace_xfs_create(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = xfs_get_projid(dp); - else - prid = XFS_PROJID_DEFAULT; + prid = xfs_get_initial_prid(dp); /* * Make sure that we have allocated dquot(s) on disk. @@ -1207,12 +1125,12 @@ xfs_create( */ tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_reserve(tp, &tres, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { /* flush outstanding delalloc blocks and retry */ xfs_flush_inodes(mp); error = xfs_trans_reserve(tp, &tres, resblks, 0); } - if (error == ENOSPC) { + if (error == -ENOSPC) { /* No space at all so try a "no-allocation" reservation */ resblks = 0; error = xfs_trans_reserve(tp, &tres, 0, 0); @@ -1247,7 +1165,7 @@ xfs_create( error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, resblks > 0, &ip, &committed); if (error) { - if (error == ENOSPC) + if (error == -ENOSPC) goto out_trans_cancel; goto out_trans_abort; } @@ -1266,7 +1184,7 @@ xfs_create( &first_block, &free_list, resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); goto out_trans_abort; } xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -1337,6 +1255,114 @@ xfs_create( } int +xfs_create_tmpfile( + struct xfs_inode *dp, + struct dentry *dentry, + umode_t mode, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_inode *ip = NULL; + struct xfs_trans *tp = NULL; + int error; + uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES; + prid_t prid; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; + struct xfs_trans_res *tres; + uint resblks; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + prid = xfs_get_initial_prid(dp); + + /* + * Make sure that we have allocated dquot(s) on disk. + */ + error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), + xfs_kgid_to_gid(current_fsgid()), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) + return error; + + resblks = XFS_IALLOC_SPACE_RES(mp); + tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE); + + tres = &M_RES(mp)->tr_create_tmpfile; + error = xfs_trans_reserve(tp, tres, resblks, 0); + if (error == -ENOSPC) { + /* No space at all so try a "no-allocation" reservation */ + resblks = 0; + error = xfs_trans_reserve(tp, tres, 0, 0); + } + if (error) { + cancel_flags = 0; + goto out_trans_cancel; + } + + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, + pdqp, resblks, 1, 0); + if (error) + goto out_trans_cancel; + + error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, + prid, resblks > 0, &ip, NULL); + if (error) { + if (error == -ENOSPC) + goto out_trans_cancel; + goto out_trans_abort; + } + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + /* + * Attach the dquot(s) to the inodes and modify them incore. + * These ids of the inode couldn't have changed since the new + * inode has been locked ever since it was created. + */ + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); + + ip->i_d.di_nlink--; + error = xfs_iunlink(tp, ip); + if (error) + goto out_trans_abort; + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto out_release_inode; + + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); + + *ipp = ip; + return 0; + + out_trans_abort: + cancel_flags |= XFS_TRANS_ABORT; + out_trans_cancel: + xfs_trans_cancel(tp, cancel_flags); + out_release_inode: + /* + * Wait until after the current transaction is aborted to + * release the inode. This prevents recursive transactions + * and deadlocks from xfs_inactive. + */ + if (ip) + IRELE(ip); + + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); + + return error; +} + +int xfs_link( xfs_inode_t *tdp, xfs_inode_t *sip, @@ -1356,7 +1382,7 @@ xfs_link( ASSERT(!S_ISDIR(sip->i_d.di_mode)); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(sip, 0); if (error) @@ -1370,7 +1396,7 @@ xfs_link( cancel_flags = XFS_TRANS_RELEASE_LOG_RES; resblks = XFS_LINK_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); } @@ -1391,7 +1417,7 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { - error = XFS_ERROR(EXDEV); + error = -EXDEV; goto error_return; } @@ -1401,6 +1427,12 @@ xfs_link( xfs_bmap_init(&free_list, &first_block); + if (sip->i_d.di_nlink == 0) { + error = xfs_iunlink_remove(tp, sip); + if (error) + goto abort_return; + } + error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, &first_block, &free_list, resblks); if (error) @@ -1591,16 +1623,6 @@ xfs_release( int truncated; /* - * If we are using filestreams, and we have an unlinked - * file that we are processing the last close on, then nothing - * will be able to reopen and write to this file. Purge this - * inode from the filestreams cache so that it doesn't delay - * teardown of the inode. - */ - if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) - xfs_filestream_deassociate(ip); - - /* * If we previously truncated this file and removed old data * in the process, we want to initiate "early" writeout on * the last close. This is an attempt to combat the notorious @@ -1613,8 +1635,8 @@ xfs_release( truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); if (truncated) { xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); - if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { - error = -filemap_flush(VFS_I(ip)->i_mapping); + if (ip->i_delayed_blks > 0) { + error = filemap_flush(VFS_I(ip)->i_mapping); if (error) return error; } @@ -1651,7 +1673,7 @@ xfs_release( return 0; error = xfs_free_eofblocks(mp, ip, true); - if (error && error != EAGAIN) + if (error && error != -EAGAIN) return error; /* delalloc blocks after truncation means it really is dirty */ @@ -1730,9 +1752,33 @@ xfs_inactive_ifree( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); + + /* + * The ifree transaction might need to allocate blocks for record + * insertion to the finobt. We don't want to fail here at ENOSPC, so + * allow ifree to dip into the reserved block pool if necessary. + * + * Freeing large sets of inodes generally means freeing inode chunks, + * directory and file data blocks, so this should be relatively safe. + * Only under severe circumstances should it be possible to free enough + * inodes to exhaust the reserve block pool via finobt expansion while + * at the same time not creating free space in the filesystem. + * + * Send a warning if the reservation does happen to fail, as the inode + * now remains allocated and sits on the unlinked list until the fs is + * repaired. + */ + tp->t_flags |= XFS_TRANS_RESERVE; + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + if (error == -ENOSPC) { + xfs_warn_ratelimited(mp, + "Failed to remove inode(s) from unlinked list. " + "Please free space, unmount and run xfs_repair."); + } else { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); return error; } @@ -2141,8 +2187,8 @@ xfs_ifree_cluster( { xfs_mount_t *mp = free_ip->i_mount; int blks_per_cluster; + int inodes_per_cluster; int nbufs; - int ninodes; int i, j; xfs_daddr_t blkno; xfs_buf_t *bp; @@ -2152,18 +2198,11 @@ xfs_ifree_cluster( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); - if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { - blks_per_cluster = 1; - ninodes = mp->m_sb.sb_inopblock; - nbufs = XFS_IALLOC_BLOCKS(mp); - } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / - mp->m_sb.sb_blocksize; - ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; - nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; - } + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + nbufs = mp->m_ialloc_blks / blks_per_cluster; - for (j = 0; j < nbufs; j++, inum += ninodes) { + for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -2180,7 +2219,7 @@ xfs_ifree_cluster( XBF_UNMAPPED); if (!bp) - return ENOMEM; + return -ENOMEM; /* * This buffer may not have been correctly initialised as we @@ -2225,7 +2264,7 @@ xfs_ifree_cluster( * transaction stale above, which means there is no point in * even trying to lock them. */ - for (i = 0; i < ninodes; i++) { + for (i = 0; i < inodes_per_cluster; i++) { retry: rcu_read_lock(); ip = radix_tree_lookup(&pag->pag_ici_root, @@ -2452,7 +2491,7 @@ xfs_remove( trace_xfs_remove(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(dp, 0); if (error) @@ -2482,12 +2521,12 @@ xfs_remove( */ resblks = XFS_REMOVE_SPACE_RES(mp); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); } if (error) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); cancel_flags = 0; goto out_trans_cancel; } @@ -2504,11 +2543,11 @@ xfs_remove( if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { - error = XFS_ERROR(ENOTEMPTY); + error = -ENOTEMPTY; goto out_trans_cancel; } if (!xfs_dir_isempty(ip)) { - error = XFS_ERROR(ENOTEMPTY); + error = -ENOTEMPTY; goto out_trans_cancel; } @@ -2543,7 +2582,7 @@ xfs_remove( error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto out_bmap_cancel; } @@ -2563,13 +2602,7 @@ xfs_remove( if (error) goto std_return; - /* - * If we are using filestreams, kill the stream association. - * If the file is still open it may get a new one but that - * will get killed on last close in xfs_close() so we don't - * have to worry about that. - */ - if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) + if (is_dir && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); return 0; @@ -2669,7 +2702,7 @@ xfs_rename( cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); } @@ -2714,7 +2747,7 @@ xfs_rename( */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { - error = XFS_ERROR(EXDEV); + error = -EXDEV; goto error_return; } @@ -2737,7 +2770,7 @@ xfs_rename( error = xfs_dir_createname(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); - if (error == ENOSPC) + if (error == -ENOSPC) goto error_return; if (error) goto abort_return; @@ -2762,7 +2795,7 @@ xfs_rename( */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { - error = XFS_ERROR(EEXIST); + error = -EEXIST; goto error_return; } } @@ -2814,7 +2847,7 @@ xfs_rename( error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, target_dp->i_ino, &first_block, &free_list, spaceres); - ASSERT(error != EEXIST); + ASSERT(error != -EEXIST); if (error) goto abort_return; } @@ -2906,13 +2939,13 @@ xfs_iflush_cluster( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) goto out_put; - mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); + mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ @@ -3022,7 +3055,7 @@ cluster_corrupt_out: if (bp->b_iodone) { XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); xfs_buf_ioend(bp, 0); } else { xfs_buf_stale(bp); @@ -3036,7 +3069,7 @@ cluster_corrupt_out: xfs_iflush_abort(iq, false); kmem_free(ilist); xfs_perag_put(pag); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* @@ -3091,7 +3124,7 @@ xfs_iflush( * as we wait for an empty AIL as part of the unmount process. */ if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; goto abort_out; } @@ -3134,7 +3167,7 @@ corrupt_out: xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); cluster_corrupt_out: - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; abort_out: /* * Unlocks the flush lock @@ -3157,6 +3190,7 @@ xfs_iflush_int( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); ASSERT(iip != NULL && iip->ili_fields != 0); + ASSERT(ip->i_d.di_version > 1); /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3217,7 +3251,7 @@ xfs_iflush_int( } /* - * Inode item log recovery for v1/v2 inodes are dependent on the + * Inode item log recovery for v2 inodes are dependent on the * di_flushiter count for correct sequencing. We bump the flush * iteration count so we can detect flushes which postdate a log record * during recovery. This is redundant as we now log every change and @@ -3240,40 +3274,9 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - /* - * If this is really an old format inode and the superblock version - * has not been updated to support only new format inodes, then - * convert back to the old inode format. If the superblock version - * has been updated, then make the conversion permanent. - */ - ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == 1) { - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - dip->di_version = 2; - ip->i_d.di_onlink = 0; - dip->di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - memset(&(dip->di_pad[0]), 0, - sizeof(dip->di_pad)); - ASSERT(xfs_get_projid(ip) == 0); - } - } - - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); xfs_inobp_check(mp, bp); /* @@ -3328,5 +3331,5 @@ xfs_iflush_int( return 0; corrupt_out: - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9e6efccbae04..c10e3fadd9af 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -20,6 +20,7 @@ #include "xfs_inode_buf.h" #include "xfs_inode_fork.h" +#include "xfs_dinode.h" /* * Kernel only inode definitions @@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip, ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); } +static inline prid_t +xfs_get_initial_prid(struct xfs_inode *dp) +{ + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + return xfs_get_projid(dp); + + return XFS_PROJID_DEFAULT; +} + /* * In-core inode flags. */ @@ -199,7 +209,6 @@ xfs_set_projid(struct xfs_inode *ip, #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ #define XFS_INEW (1 << 3) /* inode has just been allocated */ -#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -215,8 +224,7 @@ xfs_set_projid(struct xfs_inode *ip, */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ - XFS_IFILESTREAM); + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) /* * Synchronize processes attempting to flush the in-core inode back to disk. @@ -323,6 +331,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); +int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, + umode_t mode, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, @@ -337,8 +347,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); -uint xfs_ilock_map_shared(xfs_inode_t *); -void xfs_iunlock_map_shared(xfs_inode_t *, uint); +uint xfs_ilock_data_map_shared(struct xfs_inode *); +uint xfs_ilock_attr_map_shared(struct xfs_inode *); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, struct xfs_buf **, xfs_inode_t **); @@ -367,7 +377,6 @@ int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, struct xfs_inode **, int *); int xfs_droplink(struct xfs_trans *, struct xfs_inode *); int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); -void xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *); /* from xfs_file.c */ int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); @@ -389,4 +398,14 @@ do { \ extern struct kmem_zone *xfs_inode_zone; +/* + * Flags for read/write calls + */ +#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */ +#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */ + +#define XFS_IO_FLAGS \ + { XFS_IO_ISDIRECT, "DIRECT" }, \ + { XFS_IO_INVIS, "INVIS"} + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c0d391f9a6e..de5a7be36e60 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -30,6 +30,7 @@ #include "xfs_trace.h" #include "xfs_trans_priv.h" #include "xfs_dinode.h" +#include "xfs_log.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ @@ -39,27 +40,14 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_inode_log_item, ili_item); } - -/* - * This returns the number of iovecs needed to log the given inode item. - * - * We need one iovec for the inode log format structure, one for the - * inode core, and possibly one for the inode data/extents/b-tree root - * and one for the inode attribute data/extents/b-tree root. - */ STATIC void -xfs_inode_item_size( - struct xfs_log_item *lip, +xfs_inode_item_data_fork_size( + struct xfs_inode_log_item *iip, int *nvecs, int *nbytes) { - struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - *nvecs += 2; - *nbytes += sizeof(struct xfs_inode_log_format) + - xfs_icdinode_size(ip->i_d.di_version); - switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_DEXT) && @@ -70,7 +58,6 @@ xfs_inode_item_size( *nvecs += 1; } break; - case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { @@ -78,7 +65,6 @@ xfs_inode_item_size( *nvecs += 1; } break; - case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { @@ -90,19 +76,20 @@ xfs_inode_item_size( case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_UUID: break; - default: ASSERT(0); break; } +} - if (!XFS_IFORK_Q(ip)) - return; - +STATIC void +xfs_inode_item_attr_fork_size( + struct xfs_inode_log_item *iip, + int *nvecs, + int *nbytes) +{ + struct xfs_inode *ip = iip->ili_inode; - /* - * Log any necessary attribute data. - */ switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_AEXT) && @@ -113,7 +100,6 @@ xfs_inode_item_size( *nvecs += 1; } break; - case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_afp->if_broot_bytes > 0) { @@ -121,7 +107,6 @@ xfs_inode_item_size( *nvecs += 1; } break; - case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { @@ -129,7 +114,6 @@ xfs_inode_item_size( *nvecs += 1; } break; - default: ASSERT(0); break; @@ -137,98 +121,39 @@ xfs_inode_item_size( } /* - * xfs_inode_item_format_extents - convert in-core extents to on-disk form - * - * For either the data or attr fork in extent format, we need to endian convert - * the in-core extent as we place them into the on-disk inode. In this case, we - * need to do this conversion before we write the extents into the log. Because - * we don't have the disk inode to write into here, we allocate a buffer and - * format the extents into it via xfs_iextents_copy(). We free the buffer in - * the unlock routine after the copy for the log has been made. + * This returns the number of iovecs needed to log the given inode item. * - * In the case of the data fork, the in-core and on-disk fork sizes can be - * different due to delayed allocation extents. We only log on-disk extents - * here, so always use the physical fork size to determine the size of the - * buffer we need to allocate. + * We need one iovec for the inode log format structure, one for the + * inode core, and possibly one for the inode data/extents/b-tree root + * and one for the inode attribute data/extents/b-tree root. */ STATIC void -xfs_inode_item_format_extents( - struct xfs_inode *ip, - struct xfs_log_iovec *vecp, - int whichfork, - int type) +xfs_inode_item_size( + struct xfs_log_item *lip, + int *nvecs, + int *nbytes) { - xfs_bmbt_rec_t *ext_buffer; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; - ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); - if (whichfork == XFS_DATA_FORK) - ip->i_itemp->ili_extents_buf = ext_buffer; - else - ip->i_itemp->ili_aextents_buf = ext_buffer; + *nvecs += 2; + *nbytes += sizeof(struct xfs_inode_log_format) + + xfs_icdinode_size(ip->i_d.di_version); - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); - vecp->i_type = type; + xfs_inode_item_data_fork_size(iip, nvecs, nbytes); + if (XFS_IFORK_Q(ip)) + xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } -/* - * This is called to fill in the vector of log iovecs for the - * given inode log item. It fills the first item with an inode - * log format structure, the second with the on-disk inode structure, - * and a possible third and/or fourth with the inode data/extents/b-tree - * root and inode attributes data/extents/b-tree root. - */ STATIC void -xfs_inode_item_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) +xfs_inode_item_format_data_fork( + struct xfs_inode_log_item *iip, + struct xfs_inode_log_format *ilf, + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp) { - struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - uint nvecs; size_t data_bytes; - xfs_mount_t *mp; - - vecp->i_addr = &iip->ili_format; - vecp->i_len = sizeof(xfs_inode_log_format_t); - vecp->i_type = XLOG_REG_TYPE_IFORMAT; - vecp++; - nvecs = 1; - - vecp->i_addr = &ip->i_d; - vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); - vecp->i_type = XLOG_REG_TYPE_ICORE; - vecp++; - nvecs++; - - /* - * If this is really an old format inode, then we need to - * log it as such. This means that we have to copy the link - * count from the new field to the old. We don't have to worry - * about the new fields, because nothing trusts them as long as - * the old inode version number is there. If the superblock already - * has a new version number, then we don't bother converting back. - */ - mp = ip->i_mount; - ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == 1) { - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - ip->i_d.di_onlink = ip->i_d.di_nlink; - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - } - } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: @@ -239,36 +164,23 @@ xfs_inode_item_format( if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_d.di_nextents > 0 && ip->i_df.if_bytes > 0) { + struct xfs_bmbt_rec *p; + ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); - ASSERT(iip->ili_extents_buf == NULL); - -#ifdef XFS_NATIVE_HOST - if (ip->i_d.di_nextents == ip->i_df.if_bytes / - (uint)sizeof(xfs_bmbt_rec_t)) { - /* - * There are no delayed allocation - * extents, so just point to the - * real extents array. - */ - vecp->i_addr = ip->i_df.if_u1.if_extents; - vecp->i_len = ip->i_df.if_bytes; - vecp->i_type = XLOG_REG_TYPE_IEXT; - } else -#endif - { - xfs_inode_item_format_extents(ip, vecp, - XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); - } - ASSERT(vecp->i_len <= ip->i_df.if_bytes); - iip->ili_format.ilf_dsize = vecp->i_len; - vecp++; - nvecs++; + + p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); + data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); + xlog_finish_iovec(lv, *vecp, data_bytes); + + ASSERT(data_bytes <= ip->i_df.if_bytes); + + ilf->ilf_dsize = data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } break; - case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | @@ -277,80 +189,70 @@ xfs_inode_item_format( if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); - vecp->i_addr = ip->i_df.if_broot; - vecp->i_len = ip->i_df.if_broot_bytes; - vecp->i_type = XLOG_REG_TYPE_IBROOT; - vecp++; - nvecs++; - iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, + ip->i_df.if_broot, + ip->i_df.if_broot_bytes); + ilf->ilf_dsize = ip->i_df.if_broot_bytes; + ilf->ilf_size++; } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; - case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { - ASSERT(ip->i_df.if_u1.if_data != NULL); - ASSERT(ip->i_d.di_size > 0); - - vecp->i_addr = ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); - ASSERT((ip->i_df.if_real_bytes == 0) || - (ip->i_df.if_real_bytes == data_bytes)); - vecp->i_len = (int)data_bytes; - vecp->i_type = XLOG_REG_TYPE_ILOCAL; - vecp++; - nvecs++; - iip->ili_format.ilf_dsize = (unsigned)data_bytes; + ASSERT(ip->i_df.if_real_bytes == 0 || + ip->i_df.if_real_bytes == data_bytes); + ASSERT(ip->i_df.if_u1.if_data != NULL); + ASSERT(ip->i_d.di_size > 0); + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, + ip->i_df.if_u1.if_data, data_bytes); + ilf->ilf_dsize = (unsigned)data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } break; - case XFS_DINODE_FMT_DEV: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_UUID); - if (iip->ili_fields & XFS_ILOG_DEV) { - iip->ili_format.ilf_u.ilfu_rdev = - ip->i_df.if_u2.if_rdev; - } + if (iip->ili_fields & XFS_ILOG_DEV) + ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; break; - case XFS_DINODE_FMT_UUID: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV); - if (iip->ili_fields & XFS_ILOG_UUID) { - iip->ili_format.ilf_u.ilfu_uuid = - ip->i_df.if_u2.if_uuid; - } + if (iip->ili_fields & XFS_ILOG_UUID) + ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; break; - default: ASSERT(0); break; } +} - /* - * If there are no attributes associated with the file, then we're done. - */ - if (!XFS_IFORK_Q(ip)) { - iip->ili_fields &= - ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); - goto out; - } +STATIC void +xfs_inode_item_format_attr_fork( + struct xfs_inode_log_item *iip, + struct xfs_inode_log_format *ilf, + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp) +{ + struct xfs_inode *ip = iip->ili_inode; + size_t data_bytes; switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: @@ -360,30 +262,22 @@ xfs_inode_item_format( if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_d.di_anextents > 0 && ip->i_afp->if_bytes > 0) { + struct xfs_bmbt_rec *p; + ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); -#ifdef XFS_NATIVE_HOST - /* - * There are not delayed allocation extents - * for attributes, so just point at the array. - */ - vecp->i_addr = ip->i_afp->if_u1.if_extents; - vecp->i_len = ip->i_afp->if_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; -#else - ASSERT(iip->ili_aextents_buf == NULL); - xfs_inode_item_format_extents(ip, vecp, - XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); -#endif - iip->ili_format.ilf_asize = vecp->i_len; - vecp++; - nvecs++; + + p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); + data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); + xlog_finish_iovec(lv, *vecp, data_bytes); + + ilf->ilf_asize = data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } break; - case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); @@ -392,61 +286,89 @@ xfs_inode_item_format( ip->i_afp->if_broot_bytes > 0) { ASSERT(ip->i_afp->if_broot != NULL); - vecp->i_addr = ip->i_afp->if_broot; - vecp->i_len = ip->i_afp->if_broot_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; - vecp++; - nvecs++; - iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, + ip->i_afp->if_broot, + ip->i_afp->if_broot_bytes); + ilf->ilf_asize = ip->i_afp->if_broot_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; - case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { - ASSERT(ip->i_afp->if_u1.if_data != NULL); - - vecp->i_addr = ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); - ASSERT((ip->i_afp->if_real_bytes == 0) || - (ip->i_afp->if_real_bytes == data_bytes)); - vecp->i_len = (int)data_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; - vecp++; - nvecs++; - iip->ili_format.ilf_asize = (unsigned)data_bytes; + ASSERT(ip->i_afp->if_real_bytes == 0 || + ip->i_afp->if_real_bytes == data_bytes); + ASSERT(ip->i_afp->if_u1.if_data != NULL); + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, + ip->i_afp->if_u1.if_data, + data_bytes); + ilf->ilf_asize = (unsigned)data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } break; - default: ASSERT(0); break; } - -out: - /* - * Now update the log format that goes out to disk from the in-core - * values. We always write the inode core to make the arithmetic - * games in recovery easier, which isn't a big deal as just about any - * transaction would dirty it anyway. - */ - iip->ili_format.ilf_fields = XFS_ILOG_CORE | - (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); - iip->ili_format.ilf_size = nvecs; } +/* + * This is called to fill in the vector of log iovecs for the given inode + * log item. It fills the first item with an inode log format structure, + * the second with the on-disk inode structure, and a possible third and/or + * fourth with the inode data/extents/b-tree root and inode attributes + * data/extents/b-tree root. + */ +STATIC void +xfs_inode_item_format( + struct xfs_log_item *lip, + struct xfs_log_vec *lv) +{ + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + struct xfs_inode_log_format *ilf; + struct xfs_log_iovec *vecp = NULL; + + ASSERT(ip->i_d.di_version > 1); + + ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); + ilf->ilf_type = XFS_LI_INODE; + ilf->ilf_ino = ip->i_ino; + ilf->ilf_blkno = ip->i_imap.im_blkno; + ilf->ilf_len = ip->i_imap.im_len; + ilf->ilf_boffset = ip->i_imap.im_boffset; + ilf->ilf_fields = XFS_ILOG_CORE; + ilf->ilf_size = 2; /* format + core */ + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, + &ip->i_d, + xfs_icdinode_size(ip->i_d.di_version)); + + xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); + if (XFS_IFORK_Q(ip)) { + xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); + } else { + iip->ili_fields &= + ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); + } + + /* update the format with the exact fields we actually logged */ + ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); +} /* * This is called to pin the inode associated with the inode log @@ -563,27 +485,6 @@ xfs_inode_item_unlock( ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - /* - * If the inode needed a separate buffer with which to log - * its extents, then free it now. - */ - if (iip->ili_extents_buf != NULL) { - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); - ASSERT(ip->i_d.di_nextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_DEXT); - ASSERT(ip->i_df.if_bytes > 0); - kmem_free(iip->ili_extents_buf); - iip->ili_extents_buf = NULL; - } - if (iip->ili_aextents_buf != NULL) { - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); - ASSERT(ip->i_d.di_anextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_AEXT); - ASSERT(ip->i_afp->if_bytes > 0); - kmem_free(iip->ili_aextents_buf); - iip->ili_aextents_buf = NULL; - } - lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) @@ -670,11 +571,6 @@ xfs_inode_item_init( iip->ili_inode = ip; xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, &xfs_inode_item_ops); - iip->ili_format.ilf_type = XFS_LI_INODE; - iip->ili_format.ilf_ino = ip->i_ino; - iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; - iip->ili_format.ilf_len = ip->i_imap.im_len; - iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; } /* @@ -892,5 +788,5 @@ xfs_inode_item_format_convert( in_f->ilf_boffset = in_f64->ilf_boffset; return 0; } - return EFSCORRUPTED; + return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index dce4d656768c..488d81254e28 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,11 +34,6 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ - struct xfs_bmbt_rec *ili_extents_buf; /* array of logged - data exts */ - struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged - attr exts */ - xfs_inode_log_format_t ili_format; /* logged structure */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 33ad9a77791f..3799695b9249 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -112,15 +112,11 @@ xfs_find_handle( memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); hsize = sizeof(xfs_fsid_t); } else { - int lock_mode; - - lock_mode = xfs_ilock_map_shared(ip); handle.ha_fid.fid_len = sizeof(xfs_fid_t) - sizeof(handle.ha_fid.fid_len); handle.ha_fid.fid_pad = 0; handle.ha_fid.fid_gen = ip->i_d.di_gen; handle.ha_fid.fid_ino = ip->i_ino; - xfs_iunlock_map_shared(ip, lock_mode); hsize = XFS_HSIZE(handle); } @@ -211,7 +207,7 @@ xfs_open_by_handle( struct path path; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; dentry = xfs_handlereq_to_dentry(parfilp, hreq); if (IS_ERR(dentry)) @@ -220,7 +216,7 @@ xfs_open_by_handle( /* Restrict xfs_open_by_handle to directories & regular files. */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out_dput; } @@ -232,18 +228,18 @@ xfs_open_by_handle( fmode = OPEN_FMODE(permflag); if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && (fmode & FMODE_WRITE) && IS_APPEND(inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out_dput; } if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -XFS_ERROR(EACCES); + error = -EACCES; goto out_dput; } /* Can't write directories. */ if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { - error = -XFS_ERROR(EISDIR); + error = -EISDIR; goto out_dput; } @@ -275,32 +271,6 @@ xfs_open_by_handle( return error; } -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - int xfs_readlink_by_handle( struct file *parfilp, @@ -312,7 +282,7 @@ xfs_readlink_by_handle( int error; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; dentry = xfs_handlereq_to_dentry(parfilp, hreq); if (IS_ERR(dentry)) @@ -320,25 +290,25 @@ xfs_readlink_by_handle( /* Restrict this handle operation to symlinks only. */ if (!S_ISLNK(dentry->d_inode->i_mode)) { - error = -XFS_ERROR(EINVAL); + error = -EINVAL; goto out_dput; } if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out_dput; } link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); if (!link) { - error = -XFS_ERROR(ENOMEM); + error = -ENOMEM; goto out_dput; } - error = -xfs_readlink(XFS_I(dentry->d_inode), link); + error = xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); + error = readlink_copy(hreq->ohandle, olen, link); if (error) goto out_kfree; @@ -360,10 +330,10 @@ xfs_set_dmattrs( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); @@ -394,9 +364,9 @@ xfs_fssetdm_by_handle( struct dentry *dentry; if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(parfilp); if (error) @@ -409,16 +379,16 @@ xfs_fssetdm_by_handle( } if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out; } if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out; } - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: @@ -439,18 +409,18 @@ xfs_attrlist_by_handle( char *kbuf; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); + return -EINVAL; /* * Reject flags, only allow namespaces. */ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); + return -EINVAL; dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) @@ -461,7 +431,7 @@ xfs_attrlist_by_handle( goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -485,20 +455,20 @@ xfs_attrmulti_attr_get( __uint32_t flags) { unsigned char *kbuf; - int error = EFAULT; + int error = -EFAULT; if (*len > XATTR_SIZE_MAX) - return EINVAL; + return -EINVAL; kbuf = kmem_zalloc_large(*len, KM_SLEEP); if (!kbuf) - return ENOMEM; + return -ENOMEM; error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); if (error) goto out_kfree; if (copy_to_user(ubuf, kbuf, *len)) - error = EFAULT; + error = -EFAULT; out_kfree: kmem_free(kbuf); @@ -514,20 +484,17 @@ xfs_attrmulti_attr_set( __uint32_t flags) { unsigned char *kbuf; - int error = EFAULT; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; + return -EPERM; if (len > XATTR_SIZE_MAX) - return EINVAL; + return -EINVAL; kbuf = memdup_user(ubuf, len); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - - return error; + return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); } int @@ -537,7 +504,7 @@ xfs_attrmulti_attr_remove( __uint32_t flags) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; + return -EPERM; return xfs_attr_remove(XFS_I(inode), name, flags); } @@ -554,9 +521,9 @@ xfs_attrmulti_by_handle( unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* overflow check */ if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) @@ -566,7 +533,7 @@ xfs_attrmulti_by_handle( if (IS_ERR(dentry)) return PTR_ERR(dentry); - error = E2BIG; + error = -E2BIG; size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) goto out_dput; @@ -577,6 +544,7 @@ xfs_attrmulti_by_handle( goto out_dput; } + error = -ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -617,19 +585,19 @@ xfs_attrmulti_by_handle( mnt_drop_write_file(parfilp); break; default: - ops[i].am_error = EINVAL; + ops[i].am_error = -EINVAL; } } if (copy_to_user(am_hreq.ops, ops, size)) - error = XFS_ERROR(EFAULT); + error = -EFAULT; kfree(attr_name); out_kfree_ops: kfree(ops); out_dput: dput(dentry); - return -error; + return error; } int @@ -654,16 +622,16 @@ xfs_ioc_space( */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -XFS_ERROR(EPERM); + return -EPERM; if (!(filp->f_mode & FMODE_WRITE)) - return -XFS_ERROR(EBADF); + return -EBADF; if (!S_ISREG(inode->i_mode)) - return -XFS_ERROR(EINVAL); + return -EINVAL; error = mnt_want_write_file(filp); if (error) @@ -681,7 +649,7 @@ xfs_ioc_space( bf->l_start += XFS_ISIZE(ip); break; default: - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } @@ -698,7 +666,7 @@ xfs_ioc_space( case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } break; @@ -711,7 +679,7 @@ xfs_ioc_space( bf->l_start > mp->m_super->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } @@ -752,7 +720,7 @@ xfs_ioc_space( break; default: ASSERT(0); - error = XFS_ERROR(EINVAL); + error = -EINVAL; } if (error) @@ -768,7 +736,7 @@ xfs_ioc_space( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - if (!(ioflags & IO_INVIS)) { + if (!(ioflags & XFS_IO_INVIS)) { ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; @@ -788,7 +756,7 @@ xfs_ioc_space( out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -810,41 +778,41 @@ xfs_ioc_bulkstat( return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (cmd == XFS_IOC_FSINUMBERS) error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt); else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_single(mp, &inlast, - bulkreq.ubuffer, &done); + error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, + sizeof(xfs_bstat_t), NULL, &done); else /* XFS_IOC_FSBULKSTAT */ error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, sizeof(xfs_bstat_t), bulkreq.ubuffer, &done); if (error) - return -error; + return error; if (bulkreq.ocount != NULL) { if (copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); + return -EFAULT; } return 0; @@ -860,7 +828,7 @@ xfs_ioc_fsgeometry_v1( error = xfs_fs_geometry(mp, &fsgeo, 3); if (error) - return -error; + return error; /* * Caller should have passed an argument of type @@ -868,7 +836,7 @@ xfs_ioc_fsgeometry_v1( * xfs_fsop_geom_t that xfs_fs_geometry() fills in. */ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -882,10 +850,10 @@ xfs_ioc_fsgeometry( error = xfs_fs_geometry(mp, &fsgeo, 4); if (error) - return -error; + return error; if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1070,16 +1038,16 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * If disk quotas is on, we make sure that the dquots do exist on disk, @@ -1117,7 +1085,7 @@ xfs_ioctl_setattr( * CAP_FSETID capability is applicable. */ if (!inode_owner_or_capable(VFS_I(ip))) { - code = XFS_ERROR(EPERM); + code = -EPERM; goto error_return; } @@ -1128,7 +1096,7 @@ xfs_ioctl_setattr( */ if (mask & FSX_PROJID) { if (current_user_ns() != &init_user_ns) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } @@ -1151,7 +1119,7 @@ xfs_ioctl_setattr( if (ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ + code = -EINVAL; /* EFBIG? */ goto error_return; } @@ -1170,7 +1138,7 @@ xfs_ioctl_setattr( extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); if (extsize_fsb > MAXEXTLEN) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } @@ -1182,13 +1150,13 @@ xfs_ioctl_setattr( } else { size = mp->m_sb.sb_blocksize; if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } if (fa->fsx_extsize % size) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } @@ -1202,7 +1170,7 @@ xfs_ioctl_setattr( if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (XFS_IS_REALTIME_INODE(ip)) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ + code = -EINVAL; /* EFBIG? */ goto error_return; } @@ -1213,7 +1181,7 @@ xfs_ioctl_setattr( if ((mp->m_sb.sb_rblocks == 0) || (mp->m_sb.sb_rextsize == 0) || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } @@ -1227,7 +1195,7 @@ xfs_ioctl_setattr( (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) { - code = XFS_ERROR(EPERM); + code = -EPERM; goto error_return; } } @@ -1245,7 +1213,7 @@ xfs_ioctl_setattr( * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !inode_capable(VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* @@ -1257,15 +1225,8 @@ xfs_ioctl_setattr( olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } + ASSERT(ip->i_d.di_version > 1); xfs_set_projid(ip, fa->fsx_projid); - - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == 1) - xfs_bump_ino_vers2(tp, ip); } } @@ -1337,7 +1298,7 @@ xfs_ioc_fssetxattr( return error; error = xfs_ioctl_setattr(ip, &fa, mask); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -1382,7 +1343,7 @@ xfs_ioc_setxflags( return error; error = xfs_ioctl_setattr(ip, &fa, mask); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -1392,7 +1353,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) /* copy only getbmap portion (not getbmapx) */ if (copy_to_user(base, bmv, sizeof(struct getbmap))) - return XFS_ERROR(EFAULT); + return -EFAULT; *ap += sizeof(struct getbmap); return 0; @@ -1409,23 +1370,23 @@ xfs_ioc_getbmap( int error; if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); + return -EINVAL; bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); - if (ioflags & IO_INVIS) + if (ioflags & XFS_IO_INVIS) bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, (struct getbmap *)arg+1); if (error) - return -error; + return error; /* copy back header - only size of getbmap */ if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1435,7 +1396,7 @@ xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) struct getbmapx __user *base = *ap; if (copy_to_user(base, bmv, sizeof(struct getbmapx))) - return XFS_ERROR(EFAULT); + return -EFAULT; *ap += sizeof(struct getbmapx); return 0; @@ -1450,22 +1411,22 @@ xfs_ioc_getbmapx( int error; if (copy_from_user(&bmx, arg, sizeof(bmx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bmx.bmv_iflags & (~BMV_IF_VALID)) - return -XFS_ERROR(EINVAL); + return -EINVAL; error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, (struct getbmapx *)arg+1); if (error) - return -error; + return error; /* copy back header */ if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1481,33 +1442,33 @@ xfs_ioc_swapext( /* Pull information for the target fd */ f = fdget((int)sxp->sx_fdtarget); if (!f.file) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out; } if (!(f.file->f_mode & FMODE_WRITE) || !(f.file->f_mode & FMODE_READ) || (f.file->f_flags & O_APPEND)) { - error = XFS_ERROR(EBADF); + error = -EBADF; goto out_put_file; } tmp = fdget((int)sxp->sx_fdtmp); if (!tmp.file) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_file; } if (!(tmp.file->f_mode & FMODE_WRITE) || !(tmp.file->f_mode & FMODE_READ) || (tmp.file->f_flags & O_APPEND)) { - error = XFS_ERROR(EBADF); + error = -EBADF; goto out_put_tmp_file; } if (IS_SWAPFILE(file_inode(f.file)) || IS_SWAPFILE(file_inode(tmp.file))) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } @@ -1515,17 +1476,17 @@ xfs_ioc_swapext( tip = XFS_I(file_inode(tmp.file)); if (ip->i_mount != tip->i_mount) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } if (ip->i_ino == tip->i_ino) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - error = XFS_ERROR(EIO); + error = -EIO; goto out_put_tmp_file; } @@ -1559,7 +1520,7 @@ xfs_file_ioctl( int error; if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; trace_xfs_file_ioctl(ip); @@ -1578,7 +1539,7 @@ xfs_file_ioctl( xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); } case XFS_IOC_DIOINFO: { @@ -1587,11 +1548,11 @@ xfs_file_ioctl( XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - da.d_mem = da.d_miniosz = 1 << target->bt_sshift; + da.d_mem = da.d_miniosz = target->bt_logical_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1624,7 +1585,7 @@ xfs_file_ioctl( struct fsdmidata dmi; if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) @@ -1633,7 +1594,7 @@ xfs_file_ioctl( error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_GETBMAP: @@ -1649,14 +1610,14 @@ xfs_file_ioctl( xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_find_handle(cmd, &hreq); } case XFS_IOC_OPEN_BY_HANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_FSSETDM_BY_HANDLE: @@ -1666,7 +1627,7 @@ xfs_file_ioctl( xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE: @@ -1679,13 +1640,13 @@ xfs_file_ioctl( struct xfs_swapext sxp; if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_ioc_swapext(&sxp); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSCOUNTS: { @@ -1693,10 +1654,10 @@ xfs_file_ioctl( error = xfs_fs_counts(mp, &out); if (error) - return -error; + return error; if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1708,10 +1669,10 @@ xfs_file_ioctl( return -EPERM; if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); + return -EROFS; if (copy_from_user(&inout, arg, sizeof(inout))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) @@ -1722,10 +1683,10 @@ xfs_file_ioctl( error = xfs_reserve_blocks(mp, &in, &inout); mnt_drop_write_file(filp); if (error) - return -error; + return error; if (copy_to_user(arg, &inout, sizeof(inout))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1737,10 +1698,10 @@ xfs_file_ioctl( error = xfs_reserve_blocks(mp, NULL, &out); if (error) - return -error; + return error; if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1749,42 +1710,42 @@ xfs_file_ioctl( xfs_growfs_data_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_data(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_log(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_rt(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_GOINGDOWN: { @@ -1794,10 +1755,9 @@ xfs_file_ioctl( return -EPERM; if (get_user(in, (__uint32_t __user *)arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; - error = xfs_fs_goingdown(mp, in); - return -error; + return xfs_fs_goingdown(mp, in); } case XFS_IOC_ERROR_INJECTION: { @@ -1807,18 +1767,16 @@ xfs_file_ioctl( return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; - error = xfs_errortag_add(in.errtag, mp); - return -error; + return xfs_errortag_add(in.errtag, mp); } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = xfs_errortag_clearall(mp, 1); - return -error; + return xfs_errortag_clearall(mp, 1); case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_fs_eofblocks eofb; @@ -1828,16 +1786,16 @@ xfs_file_ioctl( return -EPERM; if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); + return -EROFS; if (copy_from_user(&eofb, arg, sizeof(eofb))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = xfs_fs_eofblocks_from_user(&eofb, &keofb); if (error) - return -error; + return error; - return -xfs_icache_free_eofblocks(mp, &keofb); + return xfs_icache_free_eofblocks(mp, &keofb); } default: diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a7992f8de9d3..a554646ff141 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -28,7 +28,6 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_vnode.h" #include "xfs_inode.h" #include "xfs_itable.h" #include "xfs_error.h" @@ -56,7 +55,7 @@ xfs_compat_flock64_copyin( get_user(bf->l_sysid, &arg32->l_sysid) || get_user(bf->l_pid, &arg32->l_pid) || copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -70,10 +69,10 @@ xfs_compat_ioc_fsgeometry_v1( error = xfs_fs_geometry(mp, &fsgeo, 3); if (error) - return -error; + return error; /* The 32-bit variant simply has some padding at the end */ if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -84,7 +83,7 @@ xfs_compat_growfs_data_copyin( { if (get_user(in->newblocks, &arg32->newblocks) || get_user(in->imaxpct, &arg32->imaxpct)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -95,14 +94,14 @@ xfs_compat_growfs_rt_copyin( { if (get_user(in->newblocks, &arg32->newblocks) || get_user(in->extsize, &arg32->extsize)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, - const xfs_inogrp_t *buffer, + const struct xfs_inogrp *buffer, long count, long *written) { @@ -113,7 +112,7 @@ xfs_inumbers_fmt_compat( if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -XFS_ERROR(EFAULT); + return -EFAULT; } *written = count * sizeof(*p32); return 0; @@ -132,7 +131,7 @@ xfs_ioctl32_bstime_copyin( if (get_user(sec32, &bstime32->tv_sec) || get_user(bstime->tv_nsec, &bstime32->tv_nsec)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bstime->tv_sec = sec32; return 0; } @@ -164,7 +163,7 @@ xfs_ioctl32_bstat_copyin( get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || get_user(bstat->bs_aextents, &bstat32->bs_aextents)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -180,7 +179,7 @@ xfs_bstime_store_compat( sec32 = p->tv_sec; if (put_user(sec32, &p32->tv_sec) || put_user(p->tv_nsec, &p32->tv_nsec)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -195,7 +194,7 @@ xfs_bulkstat_one_fmt_compat( compat_xfs_bstat_t __user *p32 = ubuffer; if (ubsize < sizeof(*p32)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -218,7 +217,7 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return XFS_ERROR(EFAULT); + return -EFAULT; if (ubused) *ubused = sizeof(*p32); return 0; @@ -256,30 +255,30 @@ xfs_compat_ioc_bulkstat( /* should be called again (unused here, but used in dmapi) */ if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (get_user(addr, &p32->lastip)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.lastip = compat_ptr(addr); if (get_user(bulkreq.icount, &p32->icount) || get_user(addr, &p32->ubuffer)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.ubuffer = compat_ptr(addr); if (get_user(addr, &p32->ocount)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.ocount = compat_ptr(addr); if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, @@ -294,17 +293,17 @@ xfs_compat_ioc_bulkstat( xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, &done); } else - error = XFS_ERROR(EINVAL); + error = -EINVAL; if (error) - return -error; + return error; if (bulkreq.ocount != NULL) { if (copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); + return -EFAULT; } return 0; @@ -318,7 +317,7 @@ xfs_compat_handlereq_copyin( compat_xfs_fsop_handlereq_t hreq32; if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; hreq->fd = hreq32.fd; hreq->path = compat_ptr(hreq32.path); @@ -352,19 +351,19 @@ xfs_compat_attrlist_by_handle( char *kbuf; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&al_hreq, arg, sizeof(compat_xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); + return -EINVAL; /* * Reject flags, only allow namespaces. */ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); + return -EINVAL; dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) @@ -376,7 +375,7 @@ xfs_compat_attrlist_by_handle( goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -404,10 +403,10 @@ xfs_compat_attrmulti_by_handle( unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&am_hreq, arg, sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* overflow check */ if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) @@ -417,17 +416,18 @@ xfs_compat_attrmulti_by_handle( if (IS_ERR(dentry)) return PTR_ERR(dentry); - error = E2BIG; + error = -E2BIG; size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) goto out_dput; ops = memdup_user(compat_ptr(am_hreq.ops), size); if (IS_ERR(ops)) { - error = PTR_ERR(ops); + error = -PTR_ERR(ops); goto out_dput; } + error = -ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -469,19 +469,19 @@ xfs_compat_attrmulti_by_handle( mnt_drop_write_file(parfilp); break; default: - ops[i].am_error = EINVAL; + ops[i].am_error = -EINVAL; } } if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) - error = XFS_ERROR(EFAULT); + error = -EFAULT; kfree(attr_name); out_kfree_ops: kfree(ops); out_dput: dput(dentry); - return -error; + return error; } STATIC int @@ -495,26 +495,26 @@ xfs_compat_fssetdm_by_handle( struct dentry *dentry; if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&dmhreq, arg, sizeof(compat_xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out; } if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out; } - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: @@ -536,7 +536,7 @@ xfs_file_compat_ioctl( int error; if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; trace_xfs_file_compat_ioctl(ip); @@ -587,7 +587,7 @@ xfs_file_compat_ioctl( struct xfs_flock64 bf; if (xfs_compat_flock64_copyin(&bf, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; cmd = _NATIVE_IOC(cmd, struct xfs_flock64); return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); } @@ -597,25 +597,25 @@ xfs_file_compat_ioctl( struct xfs_growfs_data in; if (xfs_compat_growfs_data_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_data(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSRT_32: { struct xfs_growfs_rt in; if (xfs_compat_growfs_rt_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_rt(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } #endif /* long changes size, but xfs only copiese out 32 bits */ @@ -632,13 +632,13 @@ xfs_file_compat_ioctl( if (copy_from_user(&sxp, sxu, offsetof(struct xfs_swapext, sx_stat)) || xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_ioc_swapext(&sxp); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: @@ -650,7 +650,7 @@ xfs_file_compat_ioctl( struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); return xfs_find_handle(cmd, &hreq); } @@ -658,14 +658,14 @@ xfs_file_compat_ioctl( struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_READLINK_BY_HANDLE_32: { struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE_32: @@ -675,6 +675,6 @@ xfs_file_compat_ioctl( case XFS_IOC_FSSETDM_BY_HANDLE_32: return xfs_compat_fssetdm_by_handle(filp, arg); default: - return -XFS_ERROR(ENOIOCTLCMD); + return -ENOIOCTLCMD; } } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 22d1cbea283d..e9c47b6f5e5a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -110,7 +110,7 @@ xfs_alert_fsblock_zero( (unsigned long long)imap->br_startoff, (unsigned long long)imap->br_blockcount, imap->br_state); - return EFSCORRUPTED; + return -EFSCORRUPTED; } int @@ -128,7 +128,6 @@ xfs_iomap_write_direct( xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; - int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; @@ -139,7 +138,7 @@ xfs_iomap_write_direct( error = xfs_qm_dqattach(ip, 0); if (error) - return XFS_ERROR(error); + return error; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); @@ -149,7 +148,7 @@ xfs_iomap_write_direct( if ((offset + count) > XFS_ISIZE(ip)) { error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return XFS_ERROR(error); + return error; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -189,7 +188,7 @@ xfs_iomap_write_direct( */ if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -200,18 +199,15 @@ xfs_iomap_write_direct( xfs_trans_ijoin(tp, ip, 0); - bmapi_flag = 0; - if (offset < XFS_ISIZE(ip) || extsz) - bmapi_flag |= XFS_BMAPI_PREALLOC; - /* * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, imap, &nimaps, &free_list); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_PREALLOC, &firstfsb, 0, + imap, &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -229,7 +225,7 @@ xfs_iomap_write_direct( * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto out_unlock; } @@ -401,7 +397,8 @@ xfs_quota_calc_throttle( struct xfs_inode *ip, int type, xfs_fsblock_t *qblocks, - int *qshift) + int *qshift, + int64_t *qfreesp) { int64_t freesp; int shift = 0; @@ -410,6 +407,7 @@ xfs_quota_calc_throttle( /* over hi wmark, squash the prealloc completely */ if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { *qblocks = 0; + *qfreesp = 0; return; } @@ -422,6 +420,9 @@ xfs_quota_calc_throttle( shift += 2; } + if (freesp < *qfreesp) + *qfreesp = freesp; + /* only overwrite the throttle values if we are more aggressive */ if ((freesp >> shift) < (*qblocks >> *qshift)) { *qblocks = freesp; @@ -480,15 +481,18 @@ xfs_iomap_prealloc_size( } /* - * Check each quota to cap the prealloc size and provide a shift - * value to throttle with. + * Check each quota to cap the prealloc size, provide a shift value to + * throttle with and adjust amount of available space. */ if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, + &freesp); /* * The final prealloc size is set to the minimum of free space available @@ -556,7 +560,7 @@ xfs_iomap_write_delay( */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - return XFS_ERROR(error); + return error; extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -600,11 +604,11 @@ retry: imap, &nimaps, XFS_BMAPI_ENTIRE); switch (error) { case 0: - case ENOSPC: - case EDQUOT: + case -ENOSPC: + case -EDQUOT: break; default: - return XFS_ERROR(error); + return error; } /* @@ -618,7 +622,7 @@ retry: error = 0; goto retry; } - return XFS_ERROR(error ? error : ENOSPC); + return error ? error : -ENOSPC; } if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) @@ -667,7 +671,7 @@ xfs_iomap_write_allocate( */ error = xfs_qm_dqattach(ip, 0); if (error) - return XFS_ERROR(error); + return error; offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = imap->br_blockcount; @@ -694,7 +698,7 @@ xfs_iomap_write_allocate( nres, 0); if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -734,7 +738,7 @@ xfs_iomap_write_allocate( */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - error = xfs_bmap_last_offset(NULL, ip, &last_block, + error = xfs_bmap_last_offset(ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; @@ -743,7 +747,7 @@ xfs_iomap_write_allocate( if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { - error = EAGAIN; + error = -EAGAIN; goto trans_cancel; } } @@ -753,8 +757,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, + count_fsb, 0, &first_block, 1, imap, &nimaps, &free_list); if (error) @@ -798,7 +801,7 @@ trans_cancel: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; } int @@ -858,7 +861,7 @@ xfs_iomap_write_unwritten( resblks, 0); if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -897,7 +900,7 @@ xfs_iomap_write_unwritten( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) - return XFS_ERROR(error); + return error; if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_alert_fsblock_zero(ip, &imap); @@ -920,5 +923,5 @@ error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e544e963..72129493e9d3 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -39,6 +39,7 @@ #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_dinode.h" +#include "xfs_trans_space.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -48,6 +49,18 @@ #include <linux/fiemap.h> #include <linux/slab.h> +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; + static int xfs_initxattrs( struct inode *inode, @@ -60,7 +73,7 @@ xfs_initxattrs( for (xattr = xattr_array; xattr->name != NULL; xattr++) { error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + xattr->value_len, ATTR_SECURE); if (error < 0) break; } @@ -81,7 +94,7 @@ xfs_init_security( const struct qstr *qstr) { return security_inode_init_security(inode, dir, qstr, - &xfs_initxattrs, NULL); + &xfs_initxattrs, NULL); } static void @@ -111,19 +124,19 @@ xfs_cleanup_inode( xfs_dentry_to_name(&teardown, dentry, 0); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); } STATIC int -xfs_vn_mknod( +xfs_generic_create( struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) + dev_t rdev, + bool tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; - struct posix_acl *default_acl = NULL; + struct posix_acl *default_acl, *acl; struct xfs_name name; int error; @@ -139,17 +152,16 @@ xfs_vn_mknod( rdev = 0; } - if (IS_POSIXACL(dir)) { - default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(default_acl)) - return PTR_ERR(default_acl); + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + return error; - if (!default_acl) - mode &= ~current_umask(); + if (!tmpfile) { + xfs_dentry_to_name(&name, dentry, mode); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + } else { + error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); } - - xfs_dentry_to_name(&name, dentry, mode); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); if (unlikely(error)) goto out_free_acl; @@ -159,22 +171,46 @@ xfs_vn_mknod( if (unlikely(error)) goto out_cleanup_inode; +#ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = -xfs_inherit_acl(inode, default_acl); - default_acl = NULL; - if (unlikely(error)) + error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + if (error) + goto out_cleanup_inode; + } + if (acl) { + error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + if (error) goto out_cleanup_inode; } +#endif + if (tmpfile) + d_tmpfile(dentry, inode); + else + d_instantiate(dentry, inode); - d_instantiate(dentry, inode); - return -error; + out_free_acl: + if (default_acl) + posix_acl_release(default_acl); + if (acl) + posix_acl_release(acl); + return error; out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); - out_free_acl: - posix_acl_release(default_acl); - return -error; + if (!tmpfile) + xfs_cleanup_inode(dir, inode, dentry); + iput(inode); + goto out_free_acl; +} + +STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) +{ + return xfs_generic_create(dir, dentry, mode, rdev, false); } STATIC int @@ -212,8 +248,8 @@ xfs_vn_lookup( xfs_dentry_to_name(&name, dentry, 0); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); d_add(dentry, NULL); return NULL; } @@ -239,8 +275,8 @@ xfs_vn_ci_lookup( xfs_dentry_to_name(&xname, dentry, 0); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); /* * call d_add(dentry, NULL) here when d_drop_negative_children * is called in xfs_vn_mknod (ie. allow negative dentries @@ -275,7 +311,7 @@ xfs_vn_link( error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) - return -error; + return error; ihold(inode); d_instantiate(dentry, inode); @@ -292,7 +328,7 @@ xfs_vn_unlink( xfs_dentry_to_name(&name, dentry, 0); - error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); if (error) return error; @@ -337,8 +373,9 @@ xfs_vn_symlink( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); + iput(inode); out: - return -error; + return error; } STATIC int @@ -355,8 +392,8 @@ xfs_vn_rename( xfs_dentry_to_name(&oname, odentry, 0); xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); - return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? + return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), + XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL); } @@ -377,7 +414,7 @@ xfs_vn_follow_link( if (!link) goto out_err; - error = -xfs_readlink(XFS_I(dentry->d_inode), link); + error = xfs_readlink(XFS_I(dentry->d_inode), link); if (unlikely(error)) goto out_kfree; @@ -391,18 +428,6 @@ xfs_vn_follow_link( return NULL; } -STATIC void -xfs_vn_put_link( - struct dentry *dentry, - struct nameidata *nd, - void *p) -{ - char *s = nd_get_link(nd); - - if (!IS_ERR(s)) - kfree(s); -} - STATIC int xfs_vn_getattr( struct vfsmount *mnt, @@ -416,7 +441,7 @@ xfs_vn_getattr( trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; @@ -459,14 +484,12 @@ xfs_vn_getattr( static void xfs_setattr_mode( - struct xfs_trans *tp, struct xfs_inode *ip, struct iattr *iattr) { - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; + struct inode *inode = VFS_I(ip); + umode_t mode = iattr->ia_mode; - ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ip->i_d.di_mode &= S_IFMT; @@ -476,6 +499,32 @@ xfs_setattr_mode( inode->i_mode |= mode & ~S_IFMT; } +static void +xfs_setattr_time( + struct xfs_inode *ip, + struct iattr *iattr) +{ + struct inode *inode = VFS_I(ip); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (iattr->ia_valid & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + } + if (iattr->ia_valid & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + } + if (iattr->ia_valid & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + } +} + int xfs_setattr_nonsize( struct xfs_inode *ip, @@ -497,14 +546,14 @@ xfs_setattr_nonsize( /* If acls are being inherited, we already have this checked */ if (!(flags & XFS_ATTR_NOACL)) { if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - error = -inode_change_ok(inode, iattr); + error = inode_change_ok(inode, iattr); if (error) - return XFS_ERROR(error); + return error; } ASSERT((mask & ATTR_SIZE) == 0); @@ -618,7 +667,8 @@ xfs_setattr_nonsize( } if (!gid_eq(igid, gid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || + !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, @@ -629,30 +679,10 @@ xfs_setattr_nonsize( } } - /* - * Change file access modes. - */ if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + xfs_setattr_mode(ip, iattr); + if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -673,7 +703,7 @@ xfs_setattr_nonsize( xfs_qm_dqrele(gdqp); if (error) - return XFS_ERROR(error); + return error; /* * XXX(hch): Updating the ACL entries is not atomic vs the i_mode @@ -683,9 +713,9 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); + error = posix_acl_chmod(inode, inode->i_mode); if (error) - return XFS_ERROR(error); + return error; } return 0; @@ -709,7 +739,6 @@ xfs_setattr_size( { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; @@ -719,19 +748,19 @@ xfs_setattr_size( trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - error = -inode_change_ok(inode, iattr); + error = inode_change_ok(inode, iattr); if (error) - return XFS_ERROR(error); + return error; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; @@ -740,7 +769,7 @@ xfs_setattr_size( * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; /* @@ -789,7 +818,7 @@ xfs_setattr_size( * care about here. */ if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { - error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; @@ -800,22 +829,34 @@ xfs_setattr_size( */ inode_dio_wait(inode); - error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); + /* + * Do all the page cache truncate work outside the transaction context + * as the "lock" order is page lock->log space reservation. i.e. + * locking pages inside the transaction can ABBA deadlock with + * writeback. We have to do the VFS inode size update before we truncate + * the pagecache, however, to avoid racing with page faults beyond the + * new EOF they are not serialised against truncate operations except by + * page locks and size updates. + * + * Hence we are in a situation where a truncate can fail with ENOMEM + * from xfs_trans_reserve(), but having already truncated the in-memory + * version of the file (i.e. made user visible changes). There's not + * much we can do about this, except to hope that the caller sees ENOMEM + * and retries the truncate operation. + */ + error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; + truncate_setsize(inode, newsize); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; - truncate_setsize(inode, newsize); - commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); /* @@ -828,10 +869,11 @@ xfs_setattr_size( * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + if (newsize != oldsize && + !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* @@ -867,22 +909,10 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + if (iattr->ia_valid & ATTR_MODE) + xfs_setattr_mode(ip, iattr); + if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -920,7 +950,7 @@ xfs_vn_setattr( error = xfs_setattr_nonsize(ip, iattr, 0); } - return -error; + return error; } STATIC int @@ -940,7 +970,7 @@ xfs_vn_update_time( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { xfs_trans_cancel(tp, 0); - return -error; + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -961,7 +991,7 @@ xfs_vn_update_time( } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - return -xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp, 0); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) @@ -1006,7 +1036,7 @@ xfs_fiemap_format( *full = 1; /* user array now full */ } - return -error; + return error; } STATIC int @@ -1025,12 +1055,12 @@ xfs_vn_fiemap( return error; /* Set up bmap header for xfs internal routine */ - bm.bmv_offset = BTOBB(start); + bm.bmv_offset = BTOBBT(start); /* Special case for whole file */ if (length == FIEMAP_MAX_OFFSET) bm.bmv_length = -1LL; else - bm.bmv_length = BTOBB(length); + bm.bmv_length = BTOBB(start + length) - bm.bmv_offset; /* We add one because in getbmap world count includes the header */ bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : @@ -1045,13 +1075,23 @@ xfs_vn_fiemap( error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); if (error) - return -error; + return error; return 0; } +STATIC int +xfs_vn_tmpfile( + struct inode *dir, + struct dentry *dentry, + umode_t mode) +{ + return xfs_generic_create(dir, dentry, mode, 0, true); +} + static const struct inode_operations xfs_inode_operations = { .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1079,6 +1119,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1086,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1105,6 +1147,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1112,13 +1155,13 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, - .put_link = xfs_vn_put_link, - .get_acl = xfs_get_acl, + .put_link = kfree_put_link, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1205,6 +1248,7 @@ xfs_setup_inode( xfs_diflags_to_iflags(inode, ip); ip->d_ops = ip->i_mount->m_nondir_inode_ops; + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1212,6 +1256,7 @@ xfs_setup_inode( inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index d2c5057b5cc4..1c34e4335920 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -30,7 +30,7 @@ extern void xfs_setup_inode(struct xfs_inode *); /* * Internal setattr interfaces. */ -#define XFS_ATTR_NOACL 0x01 /* Don't call xfs_acl_chmod */ +#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c237ad15d500..f71be9c68017 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -67,19 +67,17 @@ xfs_bulkstat_one_int( *stat = BULKSTAT_RV_NOTHING; if (!buffer || xfs_internal_inum(mp, ino)) - return XFS_ERROR(EINVAL); + return -EINVAL; buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); if (!buf) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = xfs_iget(mp, NULL, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); - if (error) { - *stat = BULKSTAT_RV_NOTHING; + if (error) goto out_free; - } ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); @@ -136,7 +134,6 @@ xfs_bulkstat_one_int( IRELE(ip); error = formatter(buffer, ubsize, ubused, buf); - if (!error) *stat = BULKSTAT_RV_DIDONE; @@ -154,9 +151,9 @@ xfs_bulkstat_one_fmt( const xfs_bstat_t *buffer) { if (ubsize < sizeof(*buffer)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return XFS_ERROR(EFAULT); + return -EFAULT; if (ubused) *ubused = sizeof(*buffer); return 0; @@ -175,9 +172,170 @@ xfs_bulkstat_one( xfs_bulkstat_one_fmt, ubused, stat); } +/* + * Loop over all clusters in a chunk for a given incore inode allocation btree + * record. Do a readahead if there are any allocated inodes in that cluster. + */ +STATIC void +xfs_bulkstat_ichunk_ra( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec) +{ + xfs_agblock_t agbno; + struct blk_plug plug; + int blks_per_cluster; + int inodes_per_cluster; + int i; /* inode chunk index */ + + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + + blk_start_plug(&plug); + for (i = 0; i < XFS_INODES_PER_CHUNK; + i += inodes_per_cluster, agbno += blks_per_cluster) { + if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) { + xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster, + &xfs_inode_buf_ops); + } + } + blk_finish_plug(&plug); +} + +/* + * Lookup the inode chunk that the given inode lives in and then get the record + * if we found the chunk. If the inode was not the last in the chunk and there + * are some left allocated, update the data for the pointed-to record as well as + * return the count of grabbed inodes. + */ +STATIC int +xfs_bulkstat_grab_ichunk( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t agino, /* starting inode of chunk */ + int *icount,/* return # of inodes grabbed */ + struct xfs_inobt_rec_incore *irec) /* btree record */ +{ + int idx; /* index into inode chunk */ + int stat; + int error = 0; + + /* Lookup the inode chunk that this inode lives in */ + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); + if (error) + return error; + if (!stat) { + *icount = 0; + return error; + } + + /* Get the record, should always work */ + error = xfs_inobt_get_rec(cur, irec, &stat); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(stat == 1); + + /* Check if the record contains the inode in request */ + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) + return -EINVAL; + + idx = agino - irec->ir_startino + 1; + if (idx < XFS_INODES_PER_CHUNK && + (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { + int i; + + /* We got a right chunk with some left inodes allocated at it. + * Grab the chunk record. Mark all the uninteresting inodes + * free -- because they're before our start point. + */ + for (i = 0; i < idx; i++) { + if (XFS_INOBT_MASK(i) & ~irec->ir_free) + irec->ir_freecount++; + } + + irec->ir_free |= xfs_inobt_maskn(0, idx); + *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount; + } + + return 0; +} + #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) /* + * Process inodes in chunk with a pointer to a formatter function + * that will iget the inode and fill in the appropriate structure. + */ +int +xfs_bulkstat_ag_ichunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irbp, + bulkstat_one_pf formatter, + size_t statstruct_size, + struct xfs_bulkstat_agichunk *acp) +{ + xfs_ino_t lastino = acp->ac_lastino; + char __user **ubufp = acp->ac_ubuffer; + int ubleft = acp->ac_ubleft; + int ubelem = acp->ac_ubelem; + int chunkidx, clustidx; + int error = 0; + xfs_agino_t agino; + + for (agino = irbp->ir_startino, chunkidx = clustidx = 0; + XFS_BULKSTAT_UBLEFT(ubleft) && + irbp->ir_freecount < XFS_INODES_PER_CHUNK; + chunkidx++, clustidx++, agino++) { + int fmterror; /* bulkstat formatter result */ + int ubused; + xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); + + ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + + /* Skip if this inode is free */ + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { + lastino = ino; + continue; + } + + /* + * Count used inodes as free so we can tell when the + * chunk is used up. + */ + irbp->ir_freecount++; + + /* Get the inode and fill in a single buffer */ + ubused = statstruct_size; + error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); + if (fmterror == BULKSTAT_RV_NOTHING) { + if (error && error != -ENOENT && error != -EINVAL) { + ubleft = 0; + break; + } + lastino = ino; + continue; + } + if (fmterror == BULKSTAT_RV_GIVEUP) { + ubleft = 0; + ASSERT(error); + break; + } + if (*ubufp) + *ubufp += ubused; + ubleft -= ubused; + ubelem++; + lastino = ino; + } + + acp->ac_lastino = lastino; + acp->ac_ubleft = ubleft; + acp->ac_ubelem = ubelem; + + return error; +} + +/* * Return stat information in bulk (by-inode) for the filesystem. */ int /* error status */ @@ -190,13 +348,10 @@ xfs_bulkstat( char __user *ubuffer, /* buffer with inode stats */ int *done) /* 1 if there are more stats to get */ { - xfs_agblock_t agbno=0;/* allocation group block number */ xfs_buf_t *agbp; /* agi header buffer */ xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ - int chunkidx; /* current index into inode chunk */ - int clustidx; /* current index into inode cluster */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ int end_of_ag; /* set if we've seen the ag end */ int error; /* error code */ @@ -209,9 +364,6 @@ xfs_bulkstat( xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ xfs_ino_t lastino; /* last inode number returned */ - int nbcluster; /* # of blocks in a cluster */ - int nicluster; /* # of inodes in a cluster */ - int nimask; /* mask for inode clusters */ int nirbuf; /* size of irbuf */ int rval; /* return value error code */ int tmp; /* result value from btree calls */ @@ -219,7 +371,6 @@ xfs_bulkstat( int ubleft; /* bytes left in user's buffer */ char __user *ubufp; /* pointer into user's buffer */ int ubelem; /* spaces used in user's buffer */ - int ubused; /* bytes used by formatter */ /* * Get the last inode value, see if there's nothing to do. @@ -234,23 +385,16 @@ xfs_bulkstat( *ubcountp = 0; return 0; } - if (!ubcountp || *ubcountp <= 0) { - return EINVAL; - } + ubcount = *ubcountp; /* statstruct's */ ubleft = ubcount * statstruct_size; /* bytes */ *ubcountp = ubelem = 0; *done = 0; fmterror = 0; ubufp = ubuffer; - nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? - mp->m_sb.sb_inopblock : - (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); - nimask = ~(nicluster - 1); - nbcluster = nicluster >> mp->m_sb.sb_inopblog; irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) - return ENOMEM; + return -ENOMEM; nirbuf = irbsize / sizeof(*irbuf); @@ -262,112 +406,50 @@ xfs_bulkstat( while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) { - /* - * Skip this allocation group and go to the next one. - */ - agno++; - agino = 0; - continue; - } + if (error) + break; agi = XFS_BUF_TO_AGI(agbp); /* * Allocate and initialize a btree cursor for ialloc btree. */ - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, + XFS_BTNUM_INO); irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; - /* - * If we're returning in the middle of an allocation group, - * we need to get the remainder of the chunk we're in. - */ + icount = 0; if (agino > 0) { - xfs_inobt_rec_incore_t r; - /* - * Lookup the inode chunk that this inode lives in. + * In the middle of an allocation group, we need to get + * the remainder of the chunk we're in. */ - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, - &tmp); - if (!error && /* no I/O error */ - tmp && /* lookup succeeded */ - /* got the record, should always work */ - !(error = xfs_inobt_get_rec(cur, &r, &i)) && - i == 1 && - /* this is the right chunk */ - agino < r.ir_startino + XFS_INODES_PER_CHUNK && - /* lastino was not last in chunk */ - (chunkidx = agino - r.ir_startino + 1) < - XFS_INODES_PER_CHUNK && - /* there are some left allocated */ - xfs_inobt_maskn(chunkidx, - XFS_INODES_PER_CHUNK - chunkidx) & - ~r.ir_free) { - /* - * Grab the chunk record. Mark all the - * uninteresting inodes (because they're - * before our start point) free. - */ - for (i = 0; i < chunkidx; i++) { - if (XFS_INOBT_MASK(i) & ~r.ir_free) - r.ir_freecount++; - } - r.ir_free |= xfs_inobt_maskn(0, chunkidx); + struct xfs_inobt_rec_incore r; + + error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); + if (error) + break; + if (icount) { irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; agino = r.ir_startino + XFS_INODES_PER_CHUNK; - icount = XFS_INODES_PER_CHUNK - r.ir_freecount; - } else { - /* - * If any of those tests failed, bump the - * inode number (just in case). - */ - agino++; - icount = 0; } - /* - * In any case, increment to the next record. - */ - if (!error) - error = xfs_btree_increment(cur, 0, &tmp); + /* Increment to the next record */ + error = xfs_btree_increment(cur, 0, &tmp); } else { - /* - * Start of ag. Lookup the first inode chunk. - */ + /* Start of ag. Lookup the first inode chunk */ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); - icount = 0; } + if (error) + break; + /* * Loop through inode btree records in this ag, * until we run out of inodes or space in the buffer. */ while (irbp < irbufend && icount < ubcount) { - xfs_inobt_rec_incore_t r; - - /* - * Loop as long as we're unable to read the - * inode btree. - */ - while (error) { - agino += XFS_INODES_PER_CHUNK; - if (XFS_AGINO_TO_AGBNO(mp, agino) >= - be32_to_cpu(agi->agi_length)) - break; - error = xfs_inobt_lookup(cur, agino, - XFS_LOOKUP_GE, &tmp); - cond_resched(); - } - /* - * If ran off the end of the ag either with an error, - * or the normal way, set end and stop collecting. - */ - if (error) { - end_of_ag = 1; - break; - } + struct xfs_inobt_rec_incore r; error = xfs_inobt_get_rec(cur, &r, &i); if (error || i == 0) { @@ -380,25 +462,7 @@ xfs_bulkstat( * Also start read-ahead now for this chunk. */ if (r.ir_freecount < XFS_INODES_PER_CHUNK) { - struct blk_plug plug; - /* - * Loop over all clusters in the next chunk. - * Do a readahead if there are any allocated - * inodes in that cluster. - */ - blk_start_plug(&plug); - agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); - for (chunkidx = 0; - chunkidx < XFS_INODES_PER_CHUNK; - chunkidx += nicluster, - agbno += nbcluster) { - if (xfs_inobt_maskn(chunkidx, nicluster) - & ~r.ir_free) - xfs_btree_reada_bufs(mp, agno, - agbno, nbcluster, - &xfs_inode_buf_ops); - } - blk_finish_plug(&plug); + xfs_bulkstat_ichunk_ra(mp, agno, &r); irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; @@ -425,57 +489,20 @@ xfs_bulkstat( irbufend = irbp; for (irbp = irbuf; irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { - /* - * Now process this chunk of inodes. - */ - for (agino = irbp->ir_startino, chunkidx = clustidx = 0; - XFS_BULKSTAT_UBLEFT(ubleft) && - irbp->ir_freecount < XFS_INODES_PER_CHUNK; - chunkidx++, clustidx++, agino++) { - ASSERT(chunkidx < XFS_INODES_PER_CHUNK); - - ino = XFS_AGINO_TO_INO(mp, agno, agino); - /* - * Skip if this inode is free. - */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { - lastino = ino; - continue; - } - /* - * Count used inodes as free so we can tell - * when the chunk is used up. - */ - irbp->ir_freecount++; - - /* - * Get the inode and fill in a single buffer. - */ - ubused = statstruct_size; - error = formatter(mp, ino, ubufp, ubleft, - &ubused, &fmterror); - if (fmterror == BULKSTAT_RV_NOTHING) { - if (error && error != ENOENT && - error != EINVAL) { - ubleft = 0; - rval = error; - break; - } - lastino = ino; - continue; - } - if (fmterror == BULKSTAT_RV_GIVEUP) { - ubleft = 0; - ASSERT(error); - rval = error; - break; - } - if (ubufp) - ubufp += ubused; - ubleft -= ubused; - ubelem++; - lastino = ino; - } + struct xfs_bulkstat_agichunk ac; + + ac.ac_lastino = lastino; + ac.ac_ubuffer = &ubuffer; + ac.ac_ubleft = ubleft; + ac.ac_ubelem = ubelem; + error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, + formatter, statstruct_size, &ac); + if (error) + rval = error; + + lastino = ac.ac_lastino; + ubleft = ac.ac_ubleft; + ubelem = ac.ac_ubelem; cond_resched(); } @@ -515,58 +542,10 @@ xfs_bulkstat( return rval; } -/* - * Return stat information in bulk (by-inode) for the filesystem. - * Special case for non-sequential one inode bulkstat. - */ -int /* error status */ -xfs_bulkstat_single( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastinop, /* inode to return */ - char __user *buffer, /* buffer with inode stats */ - int *done) /* 1 if there are more stats to get */ -{ - int count; /* count value for bulkstat call */ - int error; /* return value */ - xfs_ino_t ino; /* filesystem inode number */ - int res; /* result from bs1 */ - - /* - * note that requesting valid inode numbers which are not allocated - * to inodes will most likely cause xfs_imap_to_bp to generate warning - * messages about bad magic numbers. This is ok. The fact that - * the inode isn't actually an inode is handled by the - * error check below. Done this way to make the usual case faster - * at the expense of the error case. - */ - - ino = *lastinop; - error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), - NULL, &res); - if (error) { - /* - * Special case way failed, do it the "long" way - * to see if that works. - */ - (*lastinop)--; - count = 1; - if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), buffer, done)) - return error; - if (count == 0 || (xfs_ino_t)*lastinop != ino) - return error == EFSCORRUPTED ? - XFS_ERROR(EINVAL) : error; - else - return 0; - } - *done = 0; - return 0; -} - int xfs_inumbers_fmt( void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ + const struct xfs_inogrp *buffer, /* buffer to read from */ long count, /* # of elements to read */ long *written) /* # of bytes written */ { @@ -581,126 +560,104 @@ xfs_inumbers_fmt( */ int /* error status */ xfs_inumbers( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastino, /* last inode returned */ - int *count, /* size of buffer/count returned */ - void __user *ubuffer,/* buffer with inode descriptions */ - inumbers_fmt_pf formatter) + struct xfs_mount *mp,/* mount point for filesystem */ + xfs_ino_t *lastino,/* last inode returned */ + int *count,/* size of buffer/count returned */ + void __user *ubuffer,/* buffer with inode descriptions */ + inumbers_fmt_pf formatter) { - xfs_buf_t *agbp; - xfs_agino_t agino; - xfs_agnumber_t agno; - int bcount; - xfs_inogrp_t *buffer; - int bufidx; - xfs_btree_cur_t *cur; - int error; - xfs_inobt_rec_incore_t r; - int i; - xfs_ino_t ino; - int left; - int tmp; - - ino = (xfs_ino_t)*lastino; - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); - left = *count; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); + struct xfs_btree_cur *cur = NULL; + struct xfs_buf *agbp = NULL; + struct xfs_inogrp *buffer; + int bcount; + int left = *count; + int bufidx = 0; + int error = 0; + *count = 0; + if (agno >= mp->m_sb.sb_agcount || + *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) + return error; + bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); - error = bufidx = 0; - cur = NULL; - agbp = NULL; - while (left > 0 && agno < mp->m_sb.sb_agcount) { - if (agbp == NULL) { + do { + struct xfs_inobt_rec_incore r; + int stat; + + if (!agbp) { error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) { - /* - * If we can't read the AGI of this ag, - * then just skip to the next one. - */ - ASSERT(cur == NULL); - agbp = NULL; - agno++; - agino = 0; - continue; - } - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); + if (error) + break; + + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, + XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, - &tmp); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - /* - * Move up the last inode in the current - * chunk. The lookup_ge will always get - * us the first inode in the next chunk. - */ - agino += XFS_INODES_PER_CHUNK - 1; - continue; - } - } - error = xfs_inobt_get_rec(cur, &r, &i); - if (error || i == 0) { - xfs_buf_relse(agbp); - agbp = NULL; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - cur = NULL; - agno++; - agino = 0; - continue; + &stat); + if (error) + break; + if (!stat) + goto next_ag; } + + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error) + break; + if (!stat) + goto next_ag; + agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, r.ir_startino); buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - r.ir_freecount; buffer[bufidx].xi_allocmask = ~r.ir_free; - bufidx++; - left--; - if (bufidx == bcount) { - long written; - if (formatter(ubuffer, buffer, bufidx, &written)) { - error = XFS_ERROR(EFAULT); + if (++bufidx == bcount) { + long written; + + error = formatter(ubuffer, buffer, bufidx, &written); + if (error) break; - } ubuffer += written; *count += bufidx; bufidx = 0; } - if (left) { - error = xfs_btree_increment(cur, 0, &tmp); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - /* - * The agino value has already been bumped. - * Just try to skip up to it. - */ - agino += XFS_INODES_PER_CHUNK; - continue; - } - } - } + if (!--left) + break; + + error = xfs_btree_increment(cur, 0, &stat); + if (error) + break; + if (stat) + continue; + +next_ag: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + cur = NULL; + xfs_buf_relse(agbp); + agbp = NULL; + agino = 0; + } while (++agno < mp->m_sb.sb_agcount); + if (!error) { if (bufidx) { - long written; - if (formatter(ubuffer, buffer, bufidx, &written)) - error = XFS_ERROR(EFAULT); - else + long written; + + error = formatter(ubuffer, buffer, bufidx, &written); + if (!error) *count += bufidx; } *lastino = XFS_AGINO_TO_INO(mp, agno, agino); } + kmem_free(buffer); if (cur) xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR)); if (agbp) xfs_buf_relse(agbp); + return error; } diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 97295d91d170..aaed08022eb9 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -30,6 +30,22 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, int *ubused, int *stat); +struct xfs_bulkstat_agichunk { + xfs_ino_t ac_lastino; /* last inode returned */ + char __user **ac_ubuffer;/* pointer into user's buffer */ + int ac_ubleft; /* bytes left in user's buffer */ + int ac_ubelem; /* spaces used in user's buffer */ +}; + +int +xfs_bulkstat_ag_ichunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irbp, + bulkstat_one_pf formatter, + size_t statstruct_size, + struct xfs_bulkstat_agichunk *acp); + /* * Values for stat return value. */ @@ -50,13 +66,6 @@ xfs_bulkstat( char __user *ubuffer,/* buffer with inode stats */ int *done); /* 1 if there are more stats to get */ -int -xfs_bulkstat_single( - xfs_mount_t *mp, - xfs_ino_t *lastinop, - char __user *buffer, - int *done); - typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ void __user *ubuffer, /* buffer to write to */ int ubsize, /* remaining user buffer sz */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9bb590acc0e..d10dc8f397c9 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -21,18 +21,6 @@ #include <linux/types.h> /* - * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. - */ -#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) -# define XFS_BIG_BLKNOS 1 -# define XFS_BIG_INUMS 1 -#else -# define XFS_BIG_BLKNOS 0 -# define XFS_BIG_INUMS 0 -#endif - -/* * Kernel specific type declarations for XFS */ typedef signed char __int8_t; @@ -113,12 +101,13 @@ typedef __uint64_t __psunsigned_t; #include <asm/byteorder.h> #include <asm/unaligned.h> -#include "xfs_vnode.h" +#include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" #include "xfs_iops.h" #include "xfs_aops.h" #include "xfs_super.h" +#include "xfs_cksum.h" #include "xfs_buf.h" #include "xfs_message.h" @@ -178,6 +167,7 @@ typedef __uint64_t __psunsigned_t; #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define SYNCHRONIZE() barrier() #define __return_address __builtin_return_address(0) @@ -189,6 +179,17 @@ typedef __uint64_t __psunsigned_t; #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) +/* + * XFS wrapper structure for sysfs support. It depends on external data + * structures and is embedded in various internal data structures to implement + * the XFS sysfs object heirarchy. Define it here for broad access throughout + * the codebase. + */ +struct xfs_kobj { + struct kobject kobject; + struct completion complete; +}; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we @@ -329,7 +330,7 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) { x += y - 1; do_div(x, y); - return(x * y); + return x * y; } static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8497a00e399d..ca4fd5bd8522 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -34,6 +34,7 @@ #include "xfs_trace.h" #include "xfs_fsops.h" #include "xfs_cksum.h" +#include "xfs_sysfs.h" kmem_zone_t *xfs_log_ticket_zone; @@ -283,7 +284,7 @@ xlog_grant_head_wait( return 0; shutdown: list_del_init(&tic->t_queue); - return XFS_ERROR(EIO); + return -EIO; } /* @@ -377,7 +378,7 @@ xfs_log_regrant( int error = 0; if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_try_logspace); @@ -446,7 +447,7 @@ xfs_log_reserve( ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_try_logspace); @@ -454,7 +455,7 @@ xfs_log_reserve( tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, KM_SLEEP | KM_MAYFAIL); if (!tic) - return XFS_ERROR(ENOMEM); + return -ENOMEM; tic->t_trans_type = t_type; *ticp = tic; @@ -590,7 +591,7 @@ xfs_log_release_iclog( { if (xlog_state_release_iclog(mp->m_log, iclog)) { xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); - return EIO; + return -EIO; } return 0; @@ -616,17 +617,19 @@ xfs_log_mount( int error = 0; int min_logfsbs; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - xfs_notice(mp, "Mounting Filesystem"); - else { + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { + xfs_notice(mp, "Mounting V%d Filesystem", + XFS_SB_VERSION_NUM(&mp->m_sb)); + } else { xfs_notice(mp, -"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); +"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", + XFS_SB_VERSION_NUM(&mp->m_sb)); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); if (IS_ERR(mp->m_log)) { - error = -PTR_ERR(mp->m_log); + error = PTR_ERR(mp->m_log); goto out; } @@ -650,18 +653,18 @@ xfs_log_mount( xfs_warn(mp, "Log size %d blocks too small, minimum size is %d blocks", mp->m_sb.sb_logblocks, min_logfsbs); - error = EINVAL; + error = -EINVAL; } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { xfs_warn(mp, "Log size %d blocks too large, maximum size is %lld blocks", mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); - error = EINVAL; + error = -EINVAL; } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { xfs_warn(mp, "log size %lld bytes too large, maximum size is %lld bytes", XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), XFS_MAX_LOG_BYTES); - error = EINVAL; + error = -EINVAL; } if (error) { if (xfs_sb_version_hascrc(&mp->m_sb)) { @@ -705,6 +708,11 @@ xfs_log_mount( } } + error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj, + "log"); + if (error) + goto out_destroy_ail; + /* Normal transactions can now occur */ mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; @@ -945,6 +953,9 @@ xfs_log_unmount( xfs_log_quiesce(mp); xfs_trans_ail_destroy(mp); + + xfs_sysfs_del(&mp->m_log->l_kobj); + xlog_dealloc_log(mp->m_log); } @@ -1163,7 +1174,7 @@ xlog_iodone(xfs_buf_t *bp) /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, + if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_stale(bp); @@ -1181,11 +1192,14 @@ xlog_iodone(xfs_buf_t *bp) /* log I/O is always issued ASYNC */ ASSERT(XFS_BUF_ISASYNC(bp)); xlog_state_done_syncing(iclog, aborted); + /* - * do not reference the buffer (bp) here as we could race - * with it being freed after writing the unmount record to the - * log. + * drop the buffer lock now that we are done. Nothing references + * the buffer after this, so an unmount waiting on this lock can now + * tear it down safely. As such, it is unsafe to reference the buffer + * (bp) after the unlock as we could race with it being freed. */ + xfs_buf_unlock(bp); } /* @@ -1308,7 +1322,7 @@ xlog_alloc_log( xlog_in_core_t *iclog, *prev_iclog=NULL; xfs_buf_t *bp; int i; - int error = ENOMEM; + int error = -ENOMEM; uint log2_size = 0; log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); @@ -1335,7 +1349,7 @@ xlog_alloc_log( xlog_grant_head_init(&log->l_reserve_head); xlog_grant_head_init(&log->l_write_head); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { log2_size = mp->m_sb.sb_logsectlog; if (log2_size < BBSHIFT) { @@ -1364,12 +1378,26 @@ xlog_alloc_log( xlog_get_iclog_buffer_size(mp, log); - error = ENOMEM; - bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); + /* + * Use a NULL block for the extra log buffer used during splits so that + * it will trigger errors if we ever try to do IO on it without first + * having set it up properly. + */ + error = -ENOMEM; + bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, + BTOBB(log->l_iclog_size), 0); if (!bp) goto out_free_log; - bp->b_iodone = xlog_iodone; + + /* + * The iclogbuf buffer locks are held over IO but we are not going to do + * IO yet. Hence unlock the buffer so that the log IO path can grab it + * when appropriately. + */ ASSERT(xfs_buf_islocked(bp)); + xfs_buf_unlock(bp); + + bp->b_iodone = xlog_iodone; log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); @@ -1398,6 +1426,9 @@ xlog_alloc_log( if (!bp) goto out_free_iclog; + ASSERT(xfs_buf_islocked(bp)); + xfs_buf_unlock(bp); + bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1422,7 +1453,6 @@ xlog_alloc_log( iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; - ASSERT(xfs_buf_islocked(iclog->ic_bp)); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1448,7 +1478,7 @@ out_free_iclog: out_free_log: kmem_free(log); out: - return ERR_PTR(-error); + return ERR_PTR(error); } /* xlog_alloc_log */ @@ -1631,6 +1661,12 @@ xlog_cksum( * we transition the iclogs to IOERROR state *after* flushing all existing * iclogs to disk. This is because we don't want anymore new transactions to be * started or completed afterwards. + * + * We lock the iclogbufs here so that we can serialise against IO completion + * during unmount. We might be processing a shutdown triggered during unmount, + * and that can occur asynchronously to the unmount thread, and hence we need to + * ensure that completes before tearing down the iclogbufs. Hence we need to + * hold the buffer lock across the log IO to acheive that. */ STATIC int xlog_bdstrat( @@ -1638,14 +1674,16 @@ xlog_bdstrat( { struct xlog_in_core *iclog = bp->b_fspriv; + xfs_buf_lock(bp); if (iclog->ic_state & XLOG_STATE_IOERROR) { - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of - * doing it here. + * doing it here. Similarly, IO completion will unlock the + * buffer, so we don't do it here. */ return 0; } @@ -1847,14 +1885,28 @@ xlog_dealloc_log( xlog_cil_destroy(log); /* - * always need to ensure that the extra buffer does not point to memory - * owned by another log buffer before we free it. + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. */ + iclog = log->l_iclog; + for (i = 0; i < log->l_iclog_bufs; i++) { + xfs_buf_lock(iclog->ic_bp); + xfs_buf_unlock(iclog->ic_bp); + iclog = iclog->ic_next; + } + + /* + * Always need to ensure that the extra buffer does not point to memory + * owned by another log buffer before we free it. Also, cycle the lock + * first to ensure we've completed IO on it. + */ + xfs_buf_lock(log->l_xbuf); + xfs_buf_unlock(log->l_xbuf); xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); xfs_buf_free(log->l_xbuf); iclog = log->l_iclog; - for (i=0; i<log->l_iclog_bufs; i++) { + for (i = 0; i < log->l_iclog_bufs; i++) { xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; kmem_free(iclog); @@ -2323,7 +2375,7 @@ xlog_write( ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); if (!ophdr) - return XFS_ERROR(EIO); + return -EIO; xlog_write_adv_cnt(&ptr, &len, &log_offset, sizeof(struct xlog_op_header)); @@ -2822,7 +2874,7 @@ restart: spin_lock(&log->l_icloglock); if (XLOG_FORCED_SHUTDOWN(log)) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } iclog = log->l_iclog; @@ -3010,7 +3062,7 @@ xlog_state_release_iclog( int sync = 0; /* do we sync? */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; ASSERT(atomic_read(&iclog->ic_refcnt) > 0); if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) @@ -3018,7 +3070,7 @@ xlog_state_release_iclog( if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_WANT_SYNC); @@ -3135,7 +3187,7 @@ _xfs_log_force( iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } /* If the head iclog is not active nor dirty, we just attach @@ -3173,7 +3225,7 @@ _xfs_log_force( spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; @@ -3209,7 +3261,7 @@ maybe_sleep: */ if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } XFS_STATS_INC(xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); @@ -3219,7 +3271,7 @@ maybe_sleep: * and the memory read should be atomic. */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; } else { @@ -3287,7 +3339,7 @@ try_again: iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } do { @@ -3338,7 +3390,7 @@ try_again: xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; spin_lock(&log->l_icloglock); @@ -3353,7 +3405,7 @@ try_again: */ if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } XFS_STATS_INC(xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); @@ -3363,7 +3415,7 @@ try_again: * and the memory read should be atomic. */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; @@ -3915,11 +3967,14 @@ xfs_log_force_umount( retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); } + /* - * Wake up everybody waiting on xfs_log_force. - * Callback all log item committed functions as if the - * log writes were completed. + * Wake up everybody waiting on xfs_log_force. Wake the CIL push first + * as if the log writes were completed. The abort handling in the log + * item committed callback functions will do this again under lock to + * avoid races. */ + wake_up_all(&log->l_cilp->xc_commit_wait); xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); #ifdef XFSERRORDEBUG diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index e148719e0a5d..84e0deb95abd 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -24,12 +24,65 @@ struct xfs_log_vec { struct xfs_log_iovec *lv_iovecp; /* iovec array */ struct xfs_log_item *lv_item; /* owner */ char *lv_buf; /* formatted buffer */ - int lv_buf_len; /* size of formatted buffer */ + int lv_bytes; /* accounted space in buffer */ + int lv_buf_len; /* aligned size of buffer */ int lv_size; /* size of allocated lv */ }; #define XFS_LOG_VEC_ORDERED (-1) +static inline void * +xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + uint type) +{ + struct xfs_log_iovec *vec = *vecp; + + if (vec) { + ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); + vec++; + } else { + vec = &lv->lv_iovecp[0]; + } + + vec->i_type = type; + vec->i_addr = lv->lv_buf + lv->lv_buf_len; + + ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); + + *vecp = vec; + return vec->i_addr; +} + +/* + * We need to make sure the next buffer is naturally aligned for the biggest + * basic data type we put into it. We already accounted for this padding when + * sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + */ +static inline void +xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) +{ + lv->lv_buf_len += round_up(len, sizeof(uint64_t)); + lv->lv_bytes += len; + vec->i_len = len; +} + +static inline void * +xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + uint type, void *data, int len) +{ + void *buf; + + buf = xlog_prepare_iovec(lv, vecp, type); + memcpy(buf, data, len); + xlog_finish_iovec(lv, *vecp, len); + return buf; +} + /* * Structure used to pass callback function and the function's argument * to the log manager. @@ -129,7 +182,7 @@ void xlog_iodone(struct xfs_buf *); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); -int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, +void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, int flags); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5eb51fc5eb84..f6b79e5325dd 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -78,38 +78,6 @@ xlog_cil_init_post_recovery( { log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); log->l_cilp->xc_ctx->sequence = 1; - log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, - log->l_curr_block); -} - -STATIC int -xlog_cil_lv_item_format( - struct xfs_log_item *lip, - struct xfs_log_vec *lv) -{ - int index; - char *ptr; - - /* format new vectors into array */ - lip->li_ops->iop_format(lip, lv->lv_iovecp); - - /* copy data into existing array */ - ptr = lv->lv_buf; - for (index = 0; index < lv->lv_niovecs; index++) { - struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; - - memcpy(ptr, vec->i_addr, vec->i_len); - vec->i_addr = ptr; - ptr += vec->i_len; - } - - /* - * some size calculations for log vectors over-estimate, so the caller - * doesn't know the amount of space actually used by the item. Return - * the byte count to the caller so they can check and store it - * appropriately. - */ - return ptr - lv->lv_buf; } /* @@ -127,7 +95,7 @@ xfs_cil_prepare_item( { /* Account for the new LV being passed in */ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { - *diff_len += lv->lv_buf_len; + *diff_len += lv->lv_bytes; *diff_iovecs += lv->lv_niovecs; } @@ -141,7 +109,7 @@ xfs_cil_prepare_item( else if (old_lv != lv) { ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); - *diff_len -= old_lv->lv_buf_len; + *diff_len -= old_lv->lv_bytes; *diff_iovecs -= old_lv->lv_niovecs; kmem_free(old_lv); } @@ -232,12 +200,28 @@ xlog_cil_insert_format_items( nbytes = 0; } + /* + * We 64-bit align the length of each iovec so that the start + * of the next one is naturally aligned. We'll need to + * account for that slack space here. Then round nbytes up + * to 64-bit alignment so that the initial buffer alignment is + * easy to calculate and verify. + */ + nbytes += niovecs * sizeof(uint64_t); + nbytes = round_up(nbytes, sizeof(uint64_t)); + /* grab the old item if it exists for reservation accounting */ old_lv = lip->li_lv; - /* calc buffer size */ - buf_size = sizeof(struct xfs_log_vec) + nbytes + - niovecs * sizeof(struct xfs_log_iovec); + /* + * The data buffer needs to start 64-bit aligned, so round up + * that space to ensure we can align it appropriately and not + * overrun the buffer. + */ + buf_size = nbytes + + round_up((sizeof(struct xfs_log_vec) + + niovecs * sizeof(struct xfs_log_iovec)), + sizeof(uint64_t)); /* compare to existing item size */ if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { @@ -253,35 +237,31 @@ xlog_cil_insert_format_items( * that the space reservation accounting is correct. */ *diff_iovecs -= lv->lv_niovecs; - *diff_len -= lv->lv_buf_len; - - /* Ensure the lv is set up according to ->iop_size */ - lv->lv_niovecs = niovecs; - lv->lv_buf = (char *)lv + buf_size - nbytes; - - lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); - goto insert; + *diff_len -= lv->lv_bytes; + } else { + /* allocate new data chunk */ + lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); + lv->lv_item = lip; + lv->lv_size = buf_size; + if (ordered) { + /* track as an ordered logvec */ + ASSERT(lip->li_lv == NULL); + lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + goto insert; + } + lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; } - /* allocate new data chunk */ - lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); - lv->lv_item = lip; - lv->lv_size = buf_size; + /* Ensure the lv is set up according to ->iop_size */ lv->lv_niovecs = niovecs; - if (ordered) { - /* track as an ordered logvec */ - ASSERT(lip->li_lv == NULL); - lv->lv_buf_len = XFS_LOG_VEC_ORDERED; - goto insert; - } - - /* The allocated iovec region lies beyond the log vector. */ - lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; /* The allocated data region lies beyond the iovec region */ + lv->lv_buf_len = 0; + lv->lv_bytes = 0; lv->lv_buf = (char *)lv + buf_size - nbytes; + ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); - lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); + lip->li_ops->iop_format(lip, lv); insert: ASSERT(lv->lv_buf_len <= nbytes); xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); @@ -404,7 +384,15 @@ xlog_cil_committed( xfs_extent_busy_clear(mp, &ctx->busy_extents, (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); + /* + * If we are aborting the commit, wake up anyone waiting on the + * committing list. If we don't, then a shutdown we can leave processes + * waiting in xlog_cil_force_lsn() waiting on a sequence commit that + * will never happen because we aborted it. + */ spin_lock(&ctx->cil->xc_push_lock); + if (abort) + wake_up_all(&ctx->cil->xc_commit_wait); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); @@ -518,13 +506,6 @@ xlog_cil_push( cil->xc_ctx = new_ctx; /* - * mirror the new sequence into the cil structure so that we can do - * unlocked checks against the current sequence in log forces without - * risking deferencing a freed context pointer. - */ - cil->xc_current_sequence = new_ctx->sequence; - - /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so @@ -542,8 +523,15 @@ xlog_cil_push( * Hence we need to add this context to the committing context list so * that higher sequences will wait for us to write out a commit record * before they do. + * + * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * structure atomically with the addition of this sequence to the + * committing list. This also ensures that we can do unlocked checks + * against the current sequence in log forces without risking + * deferencing a freed context pointer. */ spin_lock(&cil->xc_push_lock); + cil->xc_current_sequence = new_ctx->sequence; list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); @@ -583,8 +571,18 @@ restart: spin_lock(&cil->xc_push_lock); list_for_each_entry(new_ctx, &cil->xc_committing, committing) { /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_unlock(&cil->xc_push_lock); + goto out_abort_free_ticket; + } + + /* * Higher sequences will wait for this one so skip them. - * Don't wait for own own sequence, either. + * Don't wait for our own sequence, either. */ if (new_ctx->sequence >= ctx->sequence) continue; @@ -634,7 +632,7 @@ out_abort_free_ticket: xfs_log_ticket_put(tic); out_abort: xlog_cil_committed(ctx, XFS_LI_ABORTED); - return XFS_ERROR(EIO); + return -EIO; } static void @@ -681,8 +679,14 @@ xlog_cil_push_background( } +/* + * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence + * number that is passed. When it returns, the work will be queued for + * @push_seq, but it won't be completed. The caller is expected to do any + * waiting for push_seq to complete if it is required. + */ static void -xlog_cil_push_foreground( +xlog_cil_push_now( struct xlog *log, xfs_lsn_t push_seq) { @@ -707,10 +711,8 @@ xlog_cil_push_foreground( } cil->xc_push_seq = push_seq; + queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); spin_unlock(&cil->xc_push_lock); - - /* do the push now */ - xlog_cil_push(log); } bool @@ -740,7 +742,7 @@ xlog_cil_empty( * background commit, returns without it held once background commits are * allowed again. */ -int +void xfs_log_commit_cil( struct xfs_mount *mp, struct xfs_trans *tp, @@ -786,7 +788,6 @@ xfs_log_commit_cil( xlog_cil_push_background(log); up_read(&cil->xc_ctx_lock); - return 0; } /* @@ -815,7 +816,8 @@ xlog_cil_force_lsn( * xlog_cil_push() handles racing pushes for the same sequence, * so no need to deal with it here. */ - xlog_cil_push_foreground(log, sequence); +restart: + xlog_cil_push_now(log, sequence); /* * See if we can find a previous sequence still committing. @@ -823,9 +825,15 @@ xlog_cil_force_lsn( * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ -restart: spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { + /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) + goto out_shutdown; if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { @@ -841,8 +849,39 @@ restart: /* found it! */ commit_lsn = ctx->commit_lsn; } + + /* + * The call to xlog_cil_push_now() executes the push in the background. + * Hence by the time we have got here it our sequence may not have been + * pushed yet. This is true if the current sequence still matches the + * push sequence after the above wait loop and the CIL still contains + * dirty objects. + * + * When the push occurs, it will empty the CIL and atomically increment + * the currect sequence past the push sequence and move it into the + * committing list. Of course, if the CIL is clean at the time of the + * push, it won't have pushed the CIL at all, so in that case we should + * try the push for this sequence again from the start just in case. + */ + if (sequence == cil->xc_current_sequence && + !list_empty(&cil->xc_cil)) { + spin_unlock(&cil->xc_push_lock); + goto restart; + } + spin_unlock(&cil->xc_push_lock); return commit_lsn; + + /* + * We detected a shutdown in progress. We need to trigger the log force + * to pass through it's iclog state machine error handling, even though + * we are already in a shutdown state. Hence we can't return + * NULLCOMMITLSN here as that has special meaning to log forces (i.e. + * LSN is already stable), so we return a zero LSN instead. + */ +out_shutdown: + spin_unlock(&cil->xc_push_lock); + return 0; } /* @@ -887,12 +926,12 @@ xlog_cil_init( cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); if (!cil) - return ENOMEM; + return -ENOMEM; ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); if (!ctx) { kmem_free(cil); - return ENOMEM; + return -ENOMEM; } INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9bc403a9e54f..db7cbdeb2b42 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -405,6 +405,8 @@ struct xlog { struct xlog_grant_head l_reserve_head; struct xlog_grant_head l_write_head; + struct xfs_kobj l_kobj; + /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG char *l_iclog_bak[XLOG_MAX_ICLOGS]; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669df40f3..1fd5787add99 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -179,7 +179,7 @@ xlog_bread_noalign( xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } blk_no = round_down(blk_no, log->l_sectBBsize); @@ -193,7 +193,10 @@ xlog_bread_noalign( bp->b_io_length = nbblks; bp->b_error = 0; - xfsbdstrat(log->l_mp, bp); + if (XFS_FORCED_SHUTDOWN(log->l_mp)) + return -EIO; + + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) xfs_buf_ioerror_alert(bp, __func__); @@ -265,7 +268,7 @@ xlog_bwrite( xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } blk_no = round_down(blk_no, log->l_sectBBsize); @@ -327,14 +330,14 @@ xlog_header_check_recover( xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(1)", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "dirty log entry has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(2)", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -361,7 +364,7 @@ xlog_header_check_mount( xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_mount", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -459,7 +462,7 @@ xlog_find_verify_cycle( while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; if (bufblks < log->l_sectBBsize) - return ENOMEM; + return -ENOMEM; } for (i = start_blk; i < start_blk + nbblks; i += bufblks) { @@ -521,7 +524,7 @@ xlog_find_verify_log_record( if (!(bp = xlog_get_bp(log, num_blks))) { if (!(bp = xlog_get_bp(log, 1))) - return ENOMEM; + return -ENOMEM; smallmem = 1; } else { error = xlog_bread(log, start_blk, num_blks, bp, &offset); @@ -536,7 +539,7 @@ xlog_find_verify_log_record( xfs_warn(log->l_mp, "Log inconsistent (didn't find previous header)"); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; goto out; } @@ -561,7 +564,7 @@ xlog_find_verify_log_record( * will be called again for the end of the physical log. */ if (i == -1) { - error = -1; + error = 1; goto out; } @@ -625,7 +628,12 @@ xlog_find_head( int error, log_bbnum = log->l_logBBsize; /* Is the end of the log device zeroed? */ - if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { + error = xlog_find_zeroed(log, &first_blk); + if (error < 0) { + xfs_warn(log->l_mp, "empty log check failed"); + return error; + } + if (error == 1) { *return_head_blk = first_blk; /* Is the whole lot zeroed? */ @@ -638,15 +646,12 @@ xlog_find_head( } return 0; - } else if (error) { - xfs_warn(log->l_mp, "empty log check failed"); - return error; } first_blk = 0; /* get cycle # of 1st block */ bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, 0, 1, bp, &offset); if (error) @@ -815,29 +820,29 @@ validate_head: start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ /* start ptr at last block ptr before head_blk */ - if ((error = xlog_find_verify_log_record(log, start_blk, - &head_blk, 0)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) + error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); + if (error == 1) + error = -EIO; + if (error) goto bp_err; } else { start_blk = 0; ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, start_blk, - &head_blk, 0)) == -1) { + error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); + if (error < 0) + goto bp_err; + if (error == 1) { /* We hit the beginning of the log during our search */ start_blk = log_bbnum - (num_scan_bblks - head_blk); new_blk = log_bbnum; ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, - start_blk, &new_blk, - (int)head_blk)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) + error = xlog_find_verify_log_record(log, start_blk, + &new_blk, (int)head_blk); + if (error == 1) + error = -EIO; + if (error) goto bp_err; if (new_blk != log_bbnum) head_blk = new_blk; @@ -908,7 +913,7 @@ xlog_find_tail( bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; if (*head_blk == 0) { /* special case */ error = xlog_bread(log, 0, 1, bp, &offset); if (error) @@ -958,7 +963,7 @@ xlog_find_tail( xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); xlog_put_bp(bp); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } /* find blk_no of tail of log */ @@ -1089,8 +1094,8 @@ done: * * Return: * 0 => the log is completely written to - * -1 => use *blk_no as the first block of the log - * >0 => error has occurred + * 1 => use *blk_no as the first block of the log + * <0 => error has occurred */ STATIC int xlog_find_zeroed( @@ -1109,7 +1114,7 @@ xlog_find_zeroed( /* check totally zeroed log */ bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, 0, 1, bp, &offset); if (error) goto bp_err; @@ -1118,7 +1123,7 @@ xlog_find_zeroed( if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; xlog_put_bp(bp); - return -1; + return 1; } /* check partially zeroed log */ @@ -1138,7 +1143,7 @@ xlog_find_zeroed( */ xfs_warn(log->l_mp, "Log inconsistent or not a log (last==0, first!=1)"); - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto bp_err; } @@ -1176,19 +1181,18 @@ xlog_find_zeroed( * Potentially backup over partial log record write. We don't need * to search the end of the log because we know it is zero. */ - if ((error = xlog_find_verify_log_record(log, start_blk, - &last_blk, 0)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) - goto bp_err; + error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0); + if (error == 1) + error = -EIO; + if (error) + goto bp_err; *blk_no = last_blk; bp_err: xlog_put_bp(bp); if (error) return error; - return -1; + return 1; } /* @@ -1248,7 +1252,7 @@ xlog_write_log_records( while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; if (bufblks < sectbb) - return ENOMEM; + return -ENOMEM; } /* We may need to do a read at the start to fill in part of @@ -1351,7 +1355,7 @@ xlog_clear_stale_blocks( if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } tail_distance = tail_block + (log->l_logBBsize - head_block); } else { @@ -1363,7 +1367,7 @@ xlog_clear_stale_blocks( if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } tail_distance = tail_block - head_block; } @@ -1548,7 +1552,7 @@ xlog_recover_add_to_trans( xfs_warn(log->l_mp, "%s: bad header magic number", __func__); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); @@ -1578,7 +1582,7 @@ xlog_recover_add_to_trans( in_f->ilf_size); ASSERT(0); kmem_free(ptr); - return XFS_ERROR(EIO); + return -EIO; } item->ri_total = in_f->ilf_size; @@ -1651,6 +1655,7 @@ xlog_recover_reorder_trans( int pass) { xlog_recover_item_t *item, *n; + int error = 0; LIST_HEAD(sort_list); LIST_HEAD(cancel_list); LIST_HEAD(buffer_list); @@ -1692,9 +1697,17 @@ xlog_recover_reorder_trans( "%s: unrecognized type of log operation", __func__); ASSERT(0); - return XFS_ERROR(EIO); + /* + * return the remaining items back to the transaction + * item list so they can be freed in caller. + */ + if (!list_empty(&sort_list)) + list_splice_init(&sort_list, &trans->r_itemq); + error = -EIO; + goto out; } } +out: ASSERT(list_empty(&sort_list)); if (!list_empty(&buffer_list)) list_splice(&buffer_list, &trans->r_itemq); @@ -1704,7 +1717,7 @@ xlog_recover_reorder_trans( list_splice_tail(&inode_buffer_list, &trans->r_itemq); if (!list_empty(&cancel_list)) list_splice_tail(&cancel_list, &trans->r_itemq); - return 0; + return error; } /* @@ -1931,7 +1944,7 @@ xlog_recover_do_inode_buffer( item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, @@ -2113,6 +2126,17 @@ xlog_recover_validate_buf_type( __uint16_t magic16; __uint16_t magicda; + /* + * We can only do post recovery validation on items on CRC enabled + * fielsystems as we need to know when the buffer was written to be able + * to determine if we should have replayed the item. If we replay old + * metadata over a newer buffer, then it will enter a temporarily + * inconsistent state resulting in verification failures. Hence for now + * just avoid the verification stage for non-crc filesystems + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); magic16 = be16_to_cpu(*(__be16*)bp->b_addr); magicda = be16_to_cpu(info->magic); @@ -2126,7 +2150,9 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_allocbt_buf_ops; break; case XFS_IBT_CRC_MAGIC: + case XFS_FIBT_CRC_MAGIC: case XFS_IBT_MAGIC: + case XFS_FIBT_MAGIC: bp->b_ops = &xfs_inobt_buf_ops; break; case XFS_BMAP_CRC_MAGIC: @@ -2148,8 +2174,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_agf_buf_ops; break; case XFS_BLFT_AGFL_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_AGFL_MAGIC) { xfs_warn(mp, "Bad AGFL block magic!"); ASSERT(0); @@ -2182,10 +2206,6 @@ xlog_recover_validate_buf_type( #endif break; case XFS_BLFT_DINO_BUF: - /* - * we get here with inode allocation buffers, not buffers that - * track unlinked list changes. - */ if (magic16 != XFS_DINODE_MAGIC) { xfs_warn(mp, "Bad INODE block magic!"); ASSERT(0); @@ -2265,8 +2285,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_attr3_leaf_buf_ops; break; case XFS_BLFT_ATTR_RMT_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_ATTR3_RMT_MAGIC) { xfs_warn(mp, "Bad attr remote magic!"); ASSERT(0); @@ -2373,16 +2391,7 @@ xlog_recover_do_reg_buffer( /* Shouldn't be any more regions */ ASSERT(i == item->ri_total); - /* - * We can only do post recovery validation on items on CRC enabled - * fielsystems as we need to know when the buffer was written to be able - * to determine if we should have replayed the item. If we replay old - * metadata over a newer buffer, then it will enter a temporarily - * inconsistent state resulting in verification failures. Hence for now - * just avoid the verification stage for non-crc filesystems - */ - if (xfs_sb_version_hascrc(&mp->m_sb)) - xlog_recover_validate_buf_type(mp, bp, buf_f); + xlog_recover_validate_buf_type(mp, bp, buf_f); } /* @@ -2390,8 +2399,11 @@ xlog_recover_do_reg_buffer( * Simple algorithm: if we have found a QUOTAOFF log item of the same type * (ie. USR or GRP), then just toss this buffer away; don't recover it. * Else, treat it as a regular buffer and do recovery. + * + * Return false if the buffer was tossed and true if we recovered the buffer to + * indicate to the caller if the buffer needs writing. */ -STATIC void +STATIC bool xlog_recover_do_dquot_buffer( struct xfs_mount *mp, struct xlog *log, @@ -2406,9 +2418,8 @@ xlog_recover_do_dquot_buffer( /* * Filesystems are required to send in quota flags at mount time. */ - if (mp->m_qflags == 0) { - return; - } + if (!mp->m_qflags) + return false; type = 0; if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) @@ -2421,9 +2432,10 @@ xlog_recover_do_dquot_buffer( * This type of quotas was turned off, so ignore this buffer */ if (log->l_quotaoffs_flag & type) - return; + return false; xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + return true; } /* @@ -2482,7 +2494,7 @@ xlog_recover_buffer_pass2( bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, buf_flags, NULL); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = bp->b_error; if (error) { xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); @@ -2490,23 +2502,44 @@ xlog_recover_buffer_pass2( } /* - * recover the buffer only if we get an LSN from it and it's less than + * Recover the buffer only if we get an LSN from it and it's less than * the lsn of the transaction we are replaying. + * + * Note that we have to be extremely careful of readahead here. + * Readahead does not attach verfiers to the buffers so if we don't + * actually do any replay after readahead because of the LSN we found + * in the buffer if more recent than that current transaction then we + * need to attach the verifier directly. Failure to do so can lead to + * future recovery actions (e.g. EFI and unlinked list recovery) can + * operate on the buffers and they won't get the verifier attached. This + * can lead to blocks on disk having the correct content but a stale + * CRC. + * + * It is safe to assume these clean buffers are currently up to date. + * If the buffer is dirtied by a later transaction being replayed, then + * the verifier will be reset to match whatever recover turns that + * buffer into. */ lsn = xlog_recover_get_buf_lsn(mp, bp); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + xlog_recover_validate_buf_type(mp, bp, buf_f); goto out_release; + } if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); + if (error) + goto out_release; } else if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { - xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); + bool dirty; + + dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); + if (!dirty) + goto out_release; } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } - if (error) - goto out_release; /* * Perform delayed write on the buffer. Asynchronous writes will be @@ -2514,19 +2547,19 @@ xlog_recover_buffer_pass2( * * Also make sure that only inode buffers with good sizes stay in * the buffer cache. The kernel moves inodes in buffers of 1 block - * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode + * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel * running with a different inode cluster size. Regardless, if the - * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) - * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep + * the inode buffer size isn't MAX(blocksize, mp->m_inode_cluster_size) + * for *our* value of mp->m_inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, - (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { + (__uint32_t)log->l_mp->m_inode_cluster_size))) { xfs_buf_stale(bp); error = xfs_bwrite(bp); } else { @@ -2584,7 +2617,7 @@ xfs_recover_inode_owner_change( ip = xfs_inode_alloc(mp, in_f->ilf_ino); if (!ip) - return ENOMEM; + return -ENOMEM; /* instantiate the inode */ xfs_dinode_from_disk(&ip->i_d, dip); @@ -2662,7 +2695,7 @@ xlog_recover_inode_pass2( bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, &xfs_inode_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error; } error = bp->b_error; @@ -2683,7 +2716,7 @@ xlog_recover_inode_pass2( __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } dicp = item->ri_buf[1].i_addr; @@ -2693,7 +2726,7 @@ xlog_recover_inode_pass2( __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } @@ -2750,7 +2783,7 @@ xlog_recover_inode_pass2( "%s: Bad regular inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } } else if (unlikely(S_ISDIR(dicp->di_mode))) { @@ -2763,7 +2796,7 @@ xlog_recover_inode_pass2( "%s: Bad dir inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } } @@ -2776,7 +2809,7 @@ xlog_recover_inode_pass2( __func__, item, dip, bp, in_f->ilf_ino, dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { @@ -2786,7 +2819,7 @@ xlog_recover_inode_pass2( "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } isize = xfs_icdinode_size(dicp->di_version); @@ -2796,7 +2829,7 @@ xlog_recover_inode_pass2( xfs_alert(mp, "%s: Bad inode log record length %d, rec ptr 0x%p", __func__, item->ri_buf[1].i_len, item); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } @@ -2884,7 +2917,7 @@ xlog_recover_inode_pass2( default: xfs_warn(log->l_mp, "%s: Invalid flag", __func__); ASSERT(0); - error = EIO; + error = -EIO; goto out_release; } } @@ -2905,7 +2938,7 @@ out_release: error: if (need_free) kmem_free(in_f); - return XFS_ERROR(error); + return error; } /* @@ -2932,7 +2965,7 @@ xlog_recover_quotaoff_pass1( if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) log->l_quotaoffs_flag |= XFS_DQ_GROUP; - return (0); + return 0; } /* @@ -2957,17 +2990,17 @@ xlog_recover_dquot_pass2( * Filesystems are required to send in quota flags at mount time. */ if (mp->m_qflags == 0) - return (0); + return 0; recddq = item->ri_buf[1].i_addr; if (recddq == NULL) { xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); - return XFS_ERROR(EIO); + return -EIO; } if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { xfs_alert(log->l_mp, "dquot too small (%d) in %s.", item->ri_buf[1].i_len, __func__); - return XFS_ERROR(EIO); + return -EIO; } /* @@ -2976,7 +3009,7 @@ xlog_recover_dquot_pass2( type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) - return (0); + return 0; /* * At this point we know that quota was _not_ turned off. @@ -2993,12 +3026,19 @@ xlog_recover_dquot_pass2( error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2 (log copy)"); if (error) - return XFS_ERROR(EIO); + return -EIO; ASSERT(dq_f->qlf_len == 1); + /* + * At this point we are assuming that the dquots have been allocated + * and hence the buffer has valid dquots stamped in it. It should, + * therefore, pass verifier validation. If the dquot is bad, then the + * we'll return an error here, so we don't need to specifically check + * the dquot in the buffer after the verifier has run. + */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, - NULL); + &xfs_dquot_buf_ops); if (error) return error; @@ -3006,18 +3046,6 @@ xlog_recover_dquot_pass2( ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); /* - * At least the magic num portion should be on disk because this - * was among a chunk of dquots created earlier, and we did some - * minimal initialization then. - */ - error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2"); - if (error) { - xfs_buf_relse(bp); - return XFS_ERROR(EIO); - } - - /* * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ @@ -3133,7 +3161,7 @@ xlog_recover_efd_pass2( } lip = xfs_trans_ail_cursor_next(ailp, &cur); } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); return 0; @@ -3164,47 +3192,47 @@ xlog_recover_do_icreate_pass2( icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; if (icl->icl_type != XFS_LI_ICREATE) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); - return EINVAL; + return -EINVAL; } if (icl->icl_size != 1) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); - return EINVAL; + return -EINVAL; } agno = be32_to_cpu(icl->icl_ag); if (agno >= mp->m_sb.sb_agcount) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); - return EINVAL; + return -EINVAL; } agbno = be32_to_cpu(icl->icl_agbno); if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); - return EINVAL; + return -EINVAL; } isize = be32_to_cpu(icl->icl_isize); if (isize != mp->m_sb.sb_inodesize) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); - return EINVAL; + return -EINVAL; } count = be32_to_cpu(icl->icl_count); if (!count) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); - return EINVAL; + return -EINVAL; } length = be32_to_cpu(icl->icl_length); if (!length || length >= mp->m_sb.sb_agblocks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); - return EINVAL; + return -EINVAL; } /* existing allocation is fixed value */ - ASSERT(count == XFS_IALLOC_INODES(mp)); - ASSERT(length == XFS_IALLOC_BLOCKS(mp)); - if (count != XFS_IALLOC_INODES(mp) || - length != XFS_IALLOC_BLOCKS(mp)) { + ASSERT(count == mp->m_ialloc_inos); + ASSERT(length == mp->m_ialloc_blks); + if (count != mp->m_ialloc_inos || + length != mp->m_ialloc_blks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); - return EINVAL; + return -EINVAL; } /* @@ -3375,7 +3403,7 @@ xlog_recover_commit_pass1( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3411,7 +3439,7 @@ xlog_recover_commit_pass2( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3508,8 +3536,7 @@ out: STATIC int xlog_recover_unmount_trans( - struct xlog *log, - struct xlog_recover *trans) + struct xlog *log) { /* Do nothing now */ xfs_warn(log->l_mp, "%s: Unmount LR", __func__); @@ -3547,7 +3574,7 @@ xlog_recover_process_data( /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) - return (XFS_ERROR(EIO)); + return -EIO; while ((dp < lp) && num_logops) { ASSERT(dp + sizeof(xlog_op_header_t) <= lp); @@ -3558,7 +3585,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad clientid 0x%x", __func__, ohead->oh_clientid); ASSERT(0); - return (XFS_ERROR(EIO)); + return -EIO; } tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); @@ -3572,7 +3599,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, be32_to_cpu(ohead->oh_len)); WARN_ON(1); - return (XFS_ERROR(EIO)); + return -EIO; } flags = ohead->oh_flags & ~XLOG_END_TRANS; if (flags & XLOG_WAS_CONT_TRANS) @@ -3583,7 +3610,7 @@ xlog_recover_process_data( trans, pass); break; case XLOG_UNMOUNT_TRANS: - error = xlog_recover_unmount_trans(log, trans); + error = xlog_recover_unmount_trans(log); break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(log, @@ -3594,7 +3621,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad transaction", __func__); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; break; case 0: case XLOG_CONTINUE_TRANS: @@ -3605,11 +3632,13 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; break; } - if (error) + if (error) { + xlog_recover_free_trans(trans); return error; + } } dp += be32_to_cpu(ohead->oh_len); num_logops--; @@ -3654,7 +3683,7 @@ xlog_recover_process_efi( */ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); xfs_efi_release(efip, efip->efi_format.efi_nextents); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3743,7 +3772,7 @@ xlog_recover_process_efis( lip = xfs_trans_ail_cursor_next(ailp, &cur); } out: - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); return error; } @@ -3954,7 +3983,7 @@ xlog_unpack_data_crc( * CRC protection by punting an error back up the stack. */ if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) - return EFSCORRUPTED; + return -EFSCORRUPTED; } return 0; @@ -4003,14 +4032,14 @@ xlog_valid_rec_header( if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( (!rhead->h_version || (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", __func__, be32_to_cpu(rhead->h_version)); - return XFS_ERROR(EIO); + return -EIO; } /* LR body must have data or it wouldn't have been written */ @@ -4018,12 +4047,12 @@ xlog_valid_rec_header( if (unlikely( hlen <= 0 || hlen > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(2)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(3)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -4066,7 +4095,7 @@ xlog_do_recovery_pass( */ hbp = xlog_get_bp(log, 1); if (!hbp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, tail_blk, 1, hbp, &offset); if (error) @@ -4095,11 +4124,11 @@ xlog_do_recovery_pass( } if (!hbp) - return ENOMEM; + return -ENOMEM; dbp = xlog_get_bp(log, BTOBB(h_size)); if (!dbp) { xlog_put_bp(hbp); - return ENOMEM; + return -ENOMEM; } memset(rhash, 0, sizeof(rhash)); @@ -4373,7 +4402,7 @@ xlog_do_recover( * If IO errors happened during recovery, bail out. */ if (XFS_FORCED_SHUTDOWN(log->l_mp)) { - return (EIO); + return -EIO; } /* @@ -4397,7 +4426,13 @@ xlog_do_recover( XFS_BUF_READ(bp); XFS_BUF_UNASYNC(bp); bp->b_ops = &xfs_sb_buf_ops; - xfsbdstrat(log->l_mp, bp); + + if (XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_buf_relse(bp); + return -EIO; + } + + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, __func__); @@ -4471,7 +4506,7 @@ xlog_recover( "Please recover the log on a kernel that supports the unknown features.", (log->l_mp->m_sb.sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); - return EINVAL; + return -EINVAL; } xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 02df7b408a26..fbf0384a466f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -42,6 +42,7 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_dinode.h" +#include "xfs_sysfs.h" #ifdef HAVE_PERCPU_SB @@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +extern struct kset *xfs_kset; + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -76,7 +79,7 @@ xfs_uuid_mount( if (uuid_is_nil(uuid)) { xfs_warn(mp, "Filesystem has nil UUID - can't mount"); - return XFS_ERROR(EINVAL); + return -EINVAL; } mutex_lock(&xfs_uuid_table_mutex); @@ -104,7 +107,7 @@ xfs_uuid_mount( out_duplicate: mutex_unlock(&xfs_uuid_table_mutex); xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); - return XFS_ERROR(EINVAL); + return -EINVAL; } STATIC void @@ -173,13 +176,9 @@ xfs_sb_validate_fsb_count( ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); ASSERT(sbp->sb_blocklog >= BBSHIFT); -#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ + /* Limited by ULONG_MAX of page cache index */ if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) - return EFBIG; -#else /* Limited by UINT_MAX of sectors */ - if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) - return EFBIG; -#endif + return -EFBIG; return 0; } @@ -250,9 +249,9 @@ xfs_initialize_perag( mp->m_flags &= ~XFS_MOUNT_32BITINODES; if (mp->m_flags & XFS_MOUNT_32BITINODES) - index = xfs_set_inode32(mp); + index = xfs_set_inode32(mp, agcount); else - index = xfs_set_inode64(mp); + index = xfs_set_inode64(mp, agcount); if (maxagi) *maxagi = index; @@ -282,39 +281,59 @@ xfs_readsb( struct xfs_sb *sbp = &mp->m_sb; int error; int loud = !(flags & XFS_MFSI_QUIET); + const struct xfs_buf_ops *buf_ops; ASSERT(mp->m_sb_bp == NULL); ASSERT(mp->m_ddev_targp != NULL); /* + * For the initial read, we must guess at the sector + * size based on the block device. It's enough to + * get the sb_sectsize out of the superblock and + * then reread with the proper length. + * We don't verify it yet, because it may not be complete. + */ + sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + buf_ops = NULL; + + /* * Allocate a (locked) buffer to hold the superblock. * This will be kept around at all times to optimize * access to the superblock. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - reread: bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), 0, - loud ? &xfs_sb_buf_ops - : &xfs_sb_quiet_buf_ops); + BTOBB(sector_size), 0, buf_ops); if (!bp) { if (loud) xfs_warn(mp, "SB buffer read failed"); - return EIO; + return -EIO; } if (bp->b_error) { error = bp->b_error; if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; goto release_buf; } /* * Initialize the mount structure from the superblock. */ - xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - xfs_sb_quota_from_disk(&mp->m_sb); + xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + + /* + * If we haven't validated the superblock, do so now before we try + * to check the sector size and reread the superblock appropriately. + */ + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + if (loud) + xfs_warn(mp, "Invalid superblock magic number"); + error = -EINVAL; + goto release_buf; + } /* * We must be able to do sector-sized and sector-aligned IO. @@ -323,17 +342,18 @@ reread: if (loud) xfs_warn(mp, "device supports %u byte sectors (not %u)", sector_size, sbp->sb_sectsize); - error = ENOSYS; + error = -ENOSYS; goto release_buf; } - /* - * If device sector size is smaller than the superblock size, - * re-read the superblock so the buffer is correctly sized. - */ - if (sector_size < sbp->sb_sectsize) { + if (buf_ops == NULL) { + /* + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. + */ xfs_buf_relse(bp); sector_size = sbp->sb_sectsize; + buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; goto reread; } @@ -370,7 +390,7 @@ xfs_update_alignment(xfs_mount_t *mp) xfs_warn(mp, "alignment check failed: sunit/swidth vs. blocksize(%d)", sbp->sb_blocksize); - return XFS_ERROR(EINVAL); + return -EINVAL; } else { /* * Convert the stripe unit and width to FSBs. @@ -380,14 +400,14 @@ xfs_update_alignment(xfs_mount_t *mp) xfs_warn(mp, "alignment check failed: sunit/swidth vs. agsize(%d)", sbp->sb_agblocks); - return XFS_ERROR(EINVAL); + return -EINVAL; } else if (mp->m_dalign) { mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { xfs_warn(mp, "alignment check failed: sunit(%d) less than bsize(%d)", mp->m_dalign, sbp->sb_blocksize); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -407,7 +427,7 @@ xfs_update_alignment(xfs_mount_t *mp) } else { xfs_warn(mp, "cannot change alignment: superblock does not support data alignment"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && xfs_sb_version_hasdalign(&mp->m_sb)) { @@ -534,14 +554,14 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { xfs_warn(mp, "filesystem size mismatch detected"); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, NULL); if (!bp) { xfs_warn(mp, "last sector read failed"); - return EIO; + return -EIO; } xfs_buf_relse(bp); @@ -549,14 +569,14 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { xfs_warn(mp, "log size mismatch detected"); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), 0, NULL); if (!bp) { xfs_warn(mp, "log device read failed"); - return EIO; + return -EIO; } xfs_buf_relse(bp); } @@ -686,6 +706,12 @@ xfs_mountfs( mp->m_update_flags |= XFS_SB_VERSIONNUM; } + /* always use v2 inodes by default now */ + if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { + mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + mp->m_update_flags |= XFS_SB_VERSIONNUM; + } + /* * Check if sb_agblocks is aligned at stripe boundary * If sb_agblocks is NOT aligned turn off m_dalign since @@ -703,10 +729,15 @@ xfs_mountfs( xfs_set_maxicount(mp); - error = xfs_uuid_mount(mp); + mp->m_kobj.kobject.kset = xfs_kset; + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) goto out; + error = xfs_uuid_mount(mp); + if (error) + goto out_remove_sysfs; + /* * Set the minimum read and write sizes */ @@ -732,8 +763,6 @@ xfs_mountfs( new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) mp->m_inode_cluster_size = new_size; - xfs_info(mp, "Using inode cluster size of %d bytes", - mp->m_inode_cluster_size); } /* @@ -765,12 +794,11 @@ xfs_mountfs( mp->m_dmevmask = 0; /* not persistent; set after each mount */ - xfs_dir_mount(mp); - - /* - * Initialize the attribute manager's entries. - */ - mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; + error = xfs_da_mount(mp); + if (error) { + xfs_warn(mp, "Failed dir/attr init: %d", error); + goto out_remove_uuid; + } /* * Initialize the precomputed transaction reservations values. @@ -785,13 +813,13 @@ xfs_mountfs( error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); - goto out_remove_uuid; + goto out_free_dir; } if (!sbp->sb_logblocks) { xfs_warn(mp, "no log defined"); XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_free_perag; } @@ -830,7 +858,7 @@ xfs_mountfs( !mp->m_sb.sb_inprogress) { error = xfs_initialize_perag_data(mp, sbp->sb_agcount); if (error) - goto out_fail_wait; + goto out_log_dealloc; } /* @@ -851,7 +879,7 @@ xfs_mountfs( xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_rele_rip; } mp->m_rootip = rip; /* save it */ @@ -902,7 +930,7 @@ xfs_mountfs( xfs_notice(mp, "resetting quota flags"); error = xfs_mount_reset_sbqflags(mp); if (error) - return error; + goto out_rtunmount; } } @@ -960,8 +988,12 @@ xfs_mountfs( xfs_wait_buftarg(mp->m_ddev_targp); out_free_perag: xfs_free_perag(mp); + out_free_dir: + xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_remove_sysfs: + xfs_sysfs_del(&mp->m_kobj); out: return error; } @@ -1037,12 +1069,15 @@ xfs_unmountfs( "Freespace may not be correct on next mount."); xfs_log_unmount(mp); + xfs_da_unmount(mp); xfs_uuid_unmount(mp); #if defined(DEBUG) xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); + + xfs_sysfs_del(&mp->m_kobj); } int @@ -1124,7 +1159,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_icount = lcounter; return 0; @@ -1133,7 +1168,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_ifree = lcounter; return 0; @@ -1163,7 +1198,7 @@ xfs_mod_incore_sb_unlocked( * blocks if were allowed to. */ if (!rsvd) - return XFS_ERROR(ENOSPC); + return -ENOSPC; lcounter = (long long)mp->m_resblks_avail + delta; if (lcounter >= 0) { @@ -1174,7 +1209,7 @@ xfs_mod_incore_sb_unlocked( "Filesystem \"%s\": reserve blocks depleted! " "Consider increasing reserve pool size.", mp->m_fsname); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); @@ -1183,7 +1218,7 @@ xfs_mod_incore_sb_unlocked( lcounter = (long long)mp->m_sb.sb_frextents; lcounter += delta; if (lcounter < 0) { - return XFS_ERROR(ENOSPC); + return -ENOSPC; } mp->m_sb.sb_frextents = lcounter; return 0; @@ -1192,7 +1227,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_dblocks = lcounter; return 0; @@ -1201,7 +1236,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_agcount = scounter; return 0; @@ -1210,7 +1245,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_imax_pct = scounter; return 0; @@ -1219,7 +1254,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextsize = scounter; return 0; @@ -1228,7 +1263,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rbmblocks = scounter; return 0; @@ -1237,7 +1272,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rblocks = lcounter; return 0; @@ -1246,7 +1281,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextents = lcounter; return 0; @@ -1255,13 +1290,13 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextslog = scounter; return 0; default: ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -1424,7 +1459,7 @@ xfs_dev_is_read_only( (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { xfs_notice(mp, "%s required on read-only device.", message); xfs_notice(mp, "write access unavailable, cannot proceed."); - return EROFS; + return -EROFS; } return 0; } @@ -1967,7 +2002,7 @@ slow_path: * (e.g. lots of space just got freed). After that * we are done. */ - if (ret != ENOSPC) + if (ret != -ENOSPC) xfs_icsb_balance_counter(mp, field, 0); xfs_icsb_unlock(mp); return ret; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a466c5e5826e..b0447c86e7e2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -27,6 +27,7 @@ struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; struct xfs_dir_ops; +struct xfs_da_geometry; #ifdef HAVE_PERCPU_SB @@ -96,6 +97,8 @@ typedef struct xfs_mount { uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ uint m_writeio_blocks; /* min write size blocks */ + struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ + struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ @@ -131,8 +134,6 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ __uint64_t m_flags; /* global mount flags */ - uint m_dir_node_ents; /* #entries in a dir danode */ - uint m_attr_node_ents; /* #entries in attr danode */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ int m_inoalign_mask;/* mask sb_inoalignmt if used */ @@ -145,17 +146,10 @@ typedef struct xfs_mount { int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ int m_sinoalign; /* stripe unit inode alignment */ - int m_attr_magicpct;/* 37% of the blocksize */ - int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ - int m_dirblksize; /* directory block sz--bytes */ - int m_dirblkfsbs; /* directory block sz--fsbs */ - xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ - xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ - xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ uint m_chsize; /* size of next field */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB @@ -172,6 +166,7 @@ typedef struct xfs_mount { on the next remount,rw */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ + struct xfs_kobj m_kobj; struct workqueue_struct *m_data_workqueue; struct workqueue_struct *m_unwritten_workqueue; diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4aff56395732..1eb6f3df698c 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -100,14 +100,20 @@ * likely result in a loop in one of the lists. That's a sure-fire recipe for * an infinite loop in the code. */ -typedef struct xfs_mru_cache_elem -{ - struct list_head list_node; - unsigned long key; - void *value; -} xfs_mru_cache_elem_t; +struct xfs_mru_cache { + struct radix_tree_root store; /* Core storage data structure. */ + struct list_head *lists; /* Array of lists, one per grp. */ + struct list_head reap_list; /* Elements overdue for reaping. */ + spinlock_t lock; /* Lock to protect this struct. */ + unsigned int grp_count; /* Number of discrete groups. */ + unsigned int grp_time; /* Time period spanned by grps. */ + unsigned int lru_grp; /* Group containing time zero. */ + unsigned long time_zero; /* Time first element was added. */ + xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ + struct delayed_work work; /* Workqueue data for reaping. */ + unsigned int queued; /* work has been queued */ +}; -static kmem_zone_t *xfs_mru_elem_zone; static struct workqueue_struct *xfs_mru_reap_wq; /* @@ -129,12 +135,12 @@ static struct workqueue_struct *xfs_mru_reap_wq; */ STATIC unsigned long _xfs_mru_cache_migrate( - xfs_mru_cache_t *mru, - unsigned long now) + struct xfs_mru_cache *mru, + unsigned long now) { - unsigned int grp; - unsigned int migrated = 0; - struct list_head *lru_list; + unsigned int grp; + unsigned int migrated = 0; + struct list_head *lru_list; /* Nothing to do if the data store is empty. */ if (!mru->time_zero) @@ -193,11 +199,11 @@ _xfs_mru_cache_migrate( */ STATIC void _xfs_mru_cache_list_insert( - xfs_mru_cache_t *mru, - xfs_mru_cache_elem_t *elem) + struct xfs_mru_cache *mru, + struct xfs_mru_cache_elem *elem) { - unsigned int grp = 0; - unsigned long now = jiffies; + unsigned int grp = 0; + unsigned long now = jiffies; /* * If the data store is empty, initialise time zero, leave grp set to @@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert( */ STATIC void _xfs_mru_cache_clear_reap_list( - xfs_mru_cache_t *mru) __releases(mru->lock) __acquires(mru->lock) - + struct xfs_mru_cache *mru) + __releases(mru->lock) __acquires(mru->lock) { - xfs_mru_cache_elem_t *elem, *next; + struct xfs_mru_cache_elem *elem, *next; struct list_head tmp; INIT_LIST_HEAD(&tmp); @@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list( spin_unlock(&mru->lock); list_for_each_entry_safe(elem, next, &tmp, list_node) { - - /* Remove the element from the reap list. */ list_del_init(&elem->list_node); - - /* Call the client's free function with the key and value pointer. */ - mru->free_func(elem->key, elem->value); - - /* Free the element structure. */ - kmem_zone_free(xfs_mru_elem_zone, elem); + mru->free_func(elem); } spin_lock(&mru->lock); @@ -277,7 +276,8 @@ STATIC void _xfs_mru_cache_reap( struct work_struct *work) { - xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); + struct xfs_mru_cache *mru = + container_of(work, struct xfs_mru_cache, work.work); unsigned long now, next; ASSERT(mru && mru->lists); @@ -304,28 +304,16 @@ _xfs_mru_cache_reap( int xfs_mru_cache_init(void) { - xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), - "xfs_mru_cache_elem"); - if (!xfs_mru_elem_zone) - goto out; - xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); if (!xfs_mru_reap_wq) - goto out_destroy_mru_elem_zone; - + return -ENOMEM; return 0; - - out_destroy_mru_elem_zone: - kmem_zone_destroy(xfs_mru_elem_zone); - out: - return -ENOMEM; } void xfs_mru_cache_uninit(void) { destroy_workqueue(xfs_mru_reap_wq); - kmem_zone_destroy(xfs_mru_elem_zone); } /* @@ -336,33 +324,33 @@ xfs_mru_cache_uninit(void) */ int xfs_mru_cache_create( - xfs_mru_cache_t **mrup, + struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func) { - xfs_mru_cache_t *mru = NULL; - int err = 0, grp; - unsigned int grp_time; + struct xfs_mru_cache *mru = NULL; + int err = 0, grp; + unsigned int grp_time; if (mrup) *mrup = NULL; if (!mrup || !grp_count || !lifetime_ms || !free_func) - return EINVAL; + return -EINVAL; if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) - return EINVAL; + return -EINVAL; if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) - return ENOMEM; + return -ENOMEM; /* An extra list is needed to avoid reaping up to a grp_time early. */ mru->grp_count = grp_count + 1; mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); if (!mru->lists) { - err = ENOMEM; + err = -ENOMEM; goto exit; } @@ -400,7 +388,7 @@ exit: */ static void xfs_mru_cache_flush( - xfs_mru_cache_t *mru) + struct xfs_mru_cache *mru) { if (!mru || !mru->lists) return; @@ -420,7 +408,7 @@ xfs_mru_cache_flush( void xfs_mru_cache_destroy( - xfs_mru_cache_t *mru) + struct xfs_mru_cache *mru) { if (!mru || !mru->lists) return; @@ -438,38 +426,30 @@ xfs_mru_cache_destroy( */ int xfs_mru_cache_insert( - xfs_mru_cache_t *mru, - unsigned long key, - void *value) + struct xfs_mru_cache *mru, + unsigned long key, + struct xfs_mru_cache_elem *elem) { - xfs_mru_cache_elem_t *elem; + int error; ASSERT(mru && mru->lists); if (!mru || !mru->lists) - return EINVAL; + return -EINVAL; - elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); - if (!elem) - return ENOMEM; - - if (radix_tree_preload(GFP_KERNEL)) { - kmem_zone_free(xfs_mru_elem_zone, elem); - return ENOMEM; - } + if (radix_tree_preload(GFP_KERNEL)) + return -ENOMEM; INIT_LIST_HEAD(&elem->list_node); elem->key = key; - elem->value = value; spin_lock(&mru->lock); - - radix_tree_insert(&mru->store, key, elem); + error = radix_tree_insert(&mru->store, key, elem); radix_tree_preload_end(); - _xfs_mru_cache_list_insert(mru, elem); - + if (!error) + _xfs_mru_cache_list_insert(mru, elem); spin_unlock(&mru->lock); - return 0; + return error; } /* @@ -478,13 +458,12 @@ xfs_mru_cache_insert( * the client data pointer for the removed element is returned, otherwise this * function will return a NULL pointer. */ -void * +struct xfs_mru_cache_elem * xfs_mru_cache_remove( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - xfs_mru_cache_elem_t *elem; - void *value = NULL; + struct xfs_mru_cache_elem *elem; ASSERT(mru && mru->lists); if (!mru || !mru->lists) @@ -492,17 +471,11 @@ xfs_mru_cache_remove( spin_lock(&mru->lock); elem = radix_tree_delete(&mru->store, key); - if (elem) { - value = elem->value; + if (elem) list_del(&elem->list_node); - } - spin_unlock(&mru->lock); - if (elem) - kmem_zone_free(xfs_mru_elem_zone, elem); - - return value; + return elem; } /* @@ -511,13 +484,14 @@ xfs_mru_cache_remove( */ void xfs_mru_cache_delete( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - void *value = xfs_mru_cache_remove(mru, key); + struct xfs_mru_cache_elem *elem; - if (value) - mru->free_func(key, value); + elem = xfs_mru_cache_remove(mru, key); + if (elem) + mru->free_func(elem); } /* @@ -540,12 +514,12 @@ xfs_mru_cache_delete( * status, we need to help it get it right by annotating the path that does * not release the lock. */ -void * +struct xfs_mru_cache_elem * xfs_mru_cache_lookup( - xfs_mru_cache_t *mru, - unsigned long key) + struct xfs_mru_cache *mru, + unsigned long key) { - xfs_mru_cache_elem_t *elem; + struct xfs_mru_cache_elem *elem; ASSERT(mru && mru->lists); if (!mru || !mru->lists) @@ -560,7 +534,7 @@ xfs_mru_cache_lookup( } else spin_unlock(&mru->lock); - return elem ? elem->value : NULL; + return elem; } /* @@ -570,7 +544,8 @@ xfs_mru_cache_lookup( */ void xfs_mru_cache_done( - xfs_mru_cache_t *mru) __releases(mru->lock) + struct xfs_mru_cache *mru) + __releases(mru->lock) { spin_unlock(&mru->lock); } diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 36dd3ec8b4eb..fb5245ba5ff7 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -18,24 +18,15 @@ #ifndef __XFS_MRU_CACHE_H__ #define __XFS_MRU_CACHE_H__ +struct xfs_mru_cache; -/* Function pointer type for callback to free a client's data pointer. */ -typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); +struct xfs_mru_cache_elem { + struct list_head list_node; + unsigned long key; +}; -typedef struct xfs_mru_cache -{ - struct radix_tree_root store; /* Core storage data structure. */ - struct list_head *lists; /* Array of lists, one per grp. */ - struct list_head reap_list; /* Elements overdue for reaping. */ - spinlock_t lock; /* Lock to protect this struct. */ - unsigned int grp_count; /* Number of discrete groups. */ - unsigned int grp_time; /* Time period spanned by grps. */ - unsigned int lru_grp; /* Group containing time zero. */ - unsigned long time_zero; /* Time first element was added. */ - xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ - struct delayed_work work; /* Workqueue data for reaping. */ - unsigned int queued; /* work has been queued */ -} xfs_mru_cache_t; +/* Function pointer type for callback to free a client's data pointer. */ +typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem); int xfs_mru_cache_init(void); void xfs_mru_cache_uninit(void); @@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, xfs_mru_cache_free_func_t free_func); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, - void *value); -void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); + struct xfs_mru_cache_elem *elem); +struct xfs_mru_cache_elem * +xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); -void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); +struct xfs_mru_cache_elem * +xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); void xfs_mru_cache_done(struct xfs_mru_cache *mru); #endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996cfec6..10232102b4a6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -98,18 +98,18 @@ restart: next_index = be32_to_cpu(dqp->q_core.d_id) + 1; error = execute(batch[i], data); - if (error == EAGAIN) { + if (error == -EAGAIN) { skipped++; continue; } - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } mutex_unlock(&qi->qi_tree_lock); /* bail out if the filesystem is corrupted. */ - if (last_error == EFSCORRUPTED) { + if (last_error == -EFSCORRUPTED) { skipped = 0; break; } @@ -134,28 +134,11 @@ xfs_qm_dqpurge( { struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; xfs_dqlock(dqp); if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { xfs_dqunlock(dqp); - return EAGAIN; - } - - /* - * If this quota has a hint attached, prepare for releasing it now. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - - pdqp = dqp->q_pdquot; - if (pdqp) { - xfs_dqlock(pdqp); - dqp->q_pdquot = NULL; + return -EAGAIN; } dqp->dq_flags |= XFS_DQ_FREEING; @@ -206,11 +189,6 @@ xfs_qm_dqpurge( XFS_STATS_DEC(xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); - - if (gdqp) - xfs_qm_dqput(gdqp); - if (pdqp) - xfs_qm_dqput(pdqp); return 0; } @@ -243,100 +221,6 @@ xfs_qm_unmount( } } - -/* - * This is called from xfs_mountfs to start quotas and initialize all - * necessary data structures like quotainfo. This is also responsible for - * running a quotacheck as necessary. We are guaranteed that the superblock - * is consistently read in at this point. - * - * If we fail here, the mount will continue with quota turned off. We don't - * need to inidicate success or failure at all. - */ -void -xfs_qm_mount_quotas( - xfs_mount_t *mp) -{ - int error = 0; - uint sbf; - - /* - * If quotas on realtime volumes is not supported, we disable - * quotas immediately. - */ - if (mp->m_sb.sb_rextents) { - xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); - mp->m_qflags = 0; - goto write_changes; - } - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Allocate the quotainfo structure inside the mount struct, and - * create quotainode(s), and change/rev superblock if necessary. - */ - error = xfs_qm_init_quotainfo(mp); - if (error) { - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo == NULL); - mp->m_qflags = 0; - goto write_changes; - } - /* - * If any of the quotas are not consistent, do a quotacheck. - */ - if (XFS_QM_NEED_QUOTACHECK(mp)) { - error = xfs_qm_quotacheck(mp); - if (error) { - /* Quotacheck failed and disabled quotas. */ - return; - } - } - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - if (!XFS_IS_UQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_UQUOTA_CHKD; - if (!XFS_IS_GQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_GQUOTA_CHKD; - if (!XFS_IS_PQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_PQUOTA_CHKD; - - write_changes: - /* - * We actually don't have to acquire the m_sb_lock at all. - * This can only be called from mount, and that's single threaded. XXX - */ - spin_lock(&mp->m_sb_lock); - sbf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { - /* - * We could only have been turning quotas off. - * We aren't in very good shape actually because - * the incore structures are convinced that quotas are - * off, but the on disk superblock doesn't know that ! - */ - ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); - xfs_alert(mp, "%s: Superblock update failed!", - __func__); - } - } - - if (error) { - xfs_warn(mp, "Failed to initialize disk quotas."); - return; - } -} - /* * Called from the vfsops layer. */ @@ -380,7 +264,6 @@ xfs_qm_dqattach_one( xfs_dqid_t id, uint type, uint doalloc, - xfs_dquot_t *udqhint, /* hint */ xfs_dquot_t **IO_idqpp) { xfs_dquot_t *dqp; @@ -390,9 +273,9 @@ xfs_qm_dqattach_one( error = 0; /* - * See if we already have it in the inode itself. IO_idqpp is - * &i_udquot or &i_gdquot. This made the code look weird, but - * made the logic a lot simpler. + * See if we already have it in the inode itself. IO_idqpp is &i_udquot + * or &i_gdquot. This made the code look weird, but made the logic a lot + * simpler. */ dqp = *IO_idqpp; if (dqp) { @@ -401,49 +284,10 @@ xfs_qm_dqattach_one( } /* - * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is group/project. Its purpose is to save a - * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside - * the user dquot. - */ - if (udqhint) { - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - xfs_dqlock(udqhint); - - /* - * No need to take dqlock to look at the id. - * - * The ID can't change until it gets reclaimed, and it won't - * be reclaimed as long as we have a ref from inode and we - * hold the ilock. - */ - if (type == XFS_DQ_GROUP) - dqp = udqhint->q_gdquot; - else - dqp = udqhint->q_pdquot; - if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { - ASSERT(*IO_idqpp == NULL); - - *IO_idqpp = xfs_qm_dqhold(dqp); - xfs_dqunlock(udqhint); - return 0; - } - - /* - * We can't hold a dquot lock when we call the dqget code. - * We'll deadlock in no time, because of (not conforming to) - * lock ordering - the inodelock comes before any dquot lock, - * and we may drop and reacquire the ilock in xfs_qm_dqget(). - */ - xfs_dqunlock(udqhint); - } - - /* - * Find the dquot from somewhere. This bumps the - * reference count of dquot and returns it locked. - * This can return ENOENT if dquot didn't exist on - * disk and we didn't ask it to allocate; - * ESRCH if quotas got turned off suddenly. + * Find the dquot from somewhere. This bumps the reference count of + * dquot and returns it locked. This can return ENOENT if dquot didn't + * exist on disk and we didn't ask it to allocate; ESRCH if quotas got + * turned off suddenly. */ error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc | XFS_QMOPT_DOWARN, &dqp); @@ -461,48 +305,6 @@ xfs_qm_dqattach_one( return 0; } - -/* - * Given a udquot and group/project type, attach the group/project - * dquot pointer to the udquot as a hint for future lookups. - */ -STATIC void -xfs_qm_dqattach_hint( - struct xfs_inode *ip, - int type) -{ - struct xfs_dquot **dqhintp; - struct xfs_dquot *dqp; - struct xfs_dquot *udq = ip->i_udquot; - - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - - xfs_dqlock(udq); - - if (type == XFS_DQ_GROUP) { - dqp = ip->i_gdquot; - dqhintp = &udq->q_gdquot; - } else { - dqp = ip->i_pdquot; - dqhintp = &udq->q_pdquot; - } - - if (*dqhintp) { - struct xfs_dquot *tmp; - - if (*dqhintp == dqp) - goto done; - - tmp = *dqhintp; - *dqhintp = NULL; - xfs_qm_dqrele(tmp); - } - - *dqhintp = xfs_qm_dqhold(dqp); -done: - xfs_dqunlock(udq); -} - static bool xfs_qm_need_dqattach( struct xfs_inode *ip) @@ -533,7 +335,6 @@ xfs_qm_dqattach_locked( uint flags) { xfs_mount_t *mp = ip->i_mount; - uint nquotas = 0; int error = 0; if (!xfs_qm_need_dqattach(ip)) @@ -541,77 +342,39 @@ xfs_qm_dqattach_locked( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, flags & XFS_QMOPT_DQALLOC, - NULL, &ip->i_udquot); + &ip->i_udquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_udquot); } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ + &ip->i_gdquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_gdquot); } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_PQUOTA_ON(mp)) { + if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) { error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_pdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ + &ip->i_pdquot); if (error) goto done; - nquotas++; + ASSERT(ip->i_pdquot); } +done: /* - * Attach this group/project quota to the user quota as a hint. - * This WON'T, in general, result in a thrash. + * Don't worry about the dquots that we may have attached before any + * error - they'll get detached later if it has not already been done. */ - if (nquotas > 1 && ip->i_udquot) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); - ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp)); - - /* - * We do not have i_udquot locked at this point, but this check - * is OK since we don't depend on the i_gdquot to be accurate - * 100% all the time. It is just a hint, and this will - * succeed in general. - */ - if (ip->i_udquot->q_gdquot != ip->i_gdquot) - xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); - - if (ip->i_udquot->q_pdquot != ip->i_pdquot) - xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ); - } - - done: -#ifdef DEBUG - if (!error) { - if (XFS_IS_UQUOTA_ON(mp)) - ASSERT(ip->i_udquot); - if (XFS_IS_GQUOTA_ON(mp)) - ASSERT(ip->i_gdquot); - if (XFS_IS_PQUOTA_ON(mp)) - ASSERT(ip->i_pdquot); - } ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif return error; } @@ -814,22 +577,17 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - if ((error = list_lru_init(&qinf->qi_lru))) { - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = list_lru_init(&qinf->qi_lru); + if (error) + goto out_free_qinf; /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ - if ((error = xfs_qm_init_quotainos(mp))) { - list_lru_destroy(&qinf->qi_lru); - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = xfs_qm_init_quotainos(mp); + if (error) + goto out_free_lru; INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); @@ -841,8 +599,7 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp, - qinf->qi_dqchunklen); + qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen); mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); @@ -889,7 +646,7 @@ xfs_qm_init_quotainfo( qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - + xfs_qm_dqdestroy(dqp); } else { qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -906,6 +663,13 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; + +out_free_lru: + list_lru_destroy(&qinf->qi_lru); +out_free_qinf: + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; } @@ -1137,7 +901,7 @@ xfs_qm_dqiter_bufs( * will leave a trace in the log indicating corruption has * been detected. */ - if (error == EFSCORRUPTED) { + if (error == -EFSCORRUPTED) { error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, @@ -1147,6 +911,12 @@ xfs_qm_dqiter_bufs( if (error) break; + /* + * A corrupt buffer might not have a verifier attached, so + * make sure we have the correct one attached before writeback + * occurs. + */ + bp->b_ops = &xfs_dquot_buf_ops; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); @@ -1193,16 +963,18 @@ xfs_qm_dqiterate( lblkno = 0; maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); do { + uint lock_mode; + nmaps = XFS_DQITER_MAP_SIZE; /* * We aren't changing the inode itself. Just changing * some of its data. No new blocks are added here, and * the inode is never added to the transaction. */ - xfs_ilock(qip, XFS_ILOCK_SHARED); + lock_mode = xfs_ilock_data_map_shared(qip); error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, map, &nmaps, 0); - xfs_iunlock(qip, XFS_ILOCK_SHARED); + xfs_iunlock(qip, lock_mode); if (error) break; @@ -1230,7 +1002,7 @@ xfs_qm_dqiterate( xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen, - NULL); + &xfs_dquot_buf_ops); rablkno++; } } @@ -1278,8 +1050,8 @@ xfs_qm_quotacheck_dqadjust( /* * Shouldn't be able to turn off quotas here. */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); + ASSERT(error != -ESRCH); + ASSERT(error != -ENOENT); return error; } @@ -1366,7 +1138,7 @@ xfs_qm_dqusage_adjust( */ if (xfs_is_quota_inode(&mp->m_sb, ino)) { *res = BULKSTAT_RV_NOTHING; - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -1470,7 +1242,7 @@ out_unlock: * Walk thru all the filesystem inodes and construct a consistent view * of the disk quota world. If the quotacheck fails, disable quotas. */ -int +STATIC int xfs_qm_quotacheck( xfs_mount_t *mp) { @@ -1603,7 +1375,100 @@ xfs_qm_quotacheck( } } else xfs_notice(mp, "Quotacheck: Done."); - return (error); + return error; +} + +/* + * This is called from xfs_mountfs to start quotas and initialize all + * necessary data structures like quotainfo. This is also responsible for + * running a quotacheck as necessary. We are guaranteed that the superblock + * is consistently read in at this point. + * + * If we fail here, the mount will continue with quota turned off. We don't + * need to inidicate success or failure at all. + */ +void +xfs_qm_mount_quotas( + struct xfs_mount *mp) +{ + int error = 0; + uint sbf; + + /* + * If quotas on realtime volumes is not supported, we disable + * quotas immediately. + */ + if (mp->m_sb.sb_rextents) { + xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); + mp->m_qflags = 0; + goto write_changes; + } + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Allocate the quotainfo structure inside the mount struct, and + * create quotainode(s), and change/rev superblock if necessary. + */ + error = xfs_qm_init_quotainfo(mp); + if (error) { + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo == NULL); + mp->m_qflags = 0; + goto write_changes; + } + /* + * If any of the quotas are not consistent, do a quotacheck. + */ + if (XFS_QM_NEED_QUOTACHECK(mp)) { + error = xfs_qm_quotacheck(mp); + if (error) { + /* Quotacheck failed and disabled quotas. */ + return; + } + } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + if (!XFS_IS_GQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_GQUOTA_CHKD; + if (!XFS_IS_PQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_PQUOTA_CHKD; + + write_changes: + /* + * We actually don't have to acquire the m_sb_lock at all. + * This can only be called from mount, and that's single threaded. XXX + */ + spin_lock(&mp->m_sb_lock); + sbf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { + if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + /* + * We could only have been turning quotas off. + * We aren't in very good shape actually because + * the incore structures are convinced that quotas are + * off, but the on disk superblock doesn't know that ! + */ + ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + xfs_alert(mp, "%s: Superblock update failed!", + __func__); + } + } + + if (error) { + xfs_warn(mp, "Failed to initialize disk quotas."); + return; + } } /* @@ -1633,7 +1498,7 @@ xfs_qm_init_quotainos( error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip); if (error) - return XFS_ERROR(error); + return error; } if (XFS_IS_GQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { @@ -1703,7 +1568,7 @@ error_rele: IRELE(gip); if (pip) IRELE(pip); - return XFS_ERROR(error); + return error; } STATIC void @@ -1819,7 +1684,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &uq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); return error; } /* @@ -1846,7 +1711,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &gq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto error_rele; } xfs_dqunlock(gq); @@ -1866,7 +1731,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &pq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto error_rele; } xfs_dqunlock(pq); @@ -2035,7 +1900,7 @@ xfs_qm_vop_chown_reserve( -((xfs_qcnt_t)delblks), 0, blkflags); } - return (0); + return 0; } int @@ -2082,24 +1947,21 @@ xfs_qm_vop_create_dqattach( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - if (udqp) { + if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); - ASSERT(XFS_IS_UQUOTA_ON(mp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); ip->i_udquot = xfs_qm_dqhold(udqp); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (gdqp) { + if (gdqp && XFS_IS_GQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); - ASSERT(XFS_IS_GQUOTA_ON(mp)); ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); ip->i_gdquot = xfs_qm_dqhold(gdqp); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (pdqp) { + if (pdqp && XFS_IS_PQUOTA_ON(mp)) { ASSERT(ip->i_pdquot == NULL); - ASSERT(XFS_IS_PQUOTA_ON(mp)); ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); ip->i_pdquot = xfs_qm_dqhold(pdqp); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index a788b66a5cb1..3a07a937e232 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -20,13 +20,29 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" -#include "xfs_quota_priv.h" struct xfs_inode; extern struct kmem_zone *xfs_qm_dqtrxzone; /* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE 10 + +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ + !dqp->q_core.d_blk_hardlimit && \ + !dqp->q_core.d_blk_softlimit && \ + !dqp->q_core.d_rtb_hardlimit && \ + !dqp->q_core.d_rtb_softlimit && \ + !dqp->q_core.d_ino_hardlimit && \ + !dqp->q_core.d_ino_softlimit && \ + !dqp->q_core.d_bcount && \ + !dqp->q_core.d_rtbcount && \ + !dqp->q_core.d_icount) + +/* * This defines the unit of allocation of dquots. * Currently, it is just one file system block, and a 4K blk contains 30 * (136 * 30 = 4080) dquots. It's probably not worth trying to make @@ -141,7 +157,6 @@ struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -extern int xfs_qm_quotacheck(struct xfs_mount *); extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); /* dquot stuff */ diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index e9be63abd8d2..2c61e61b0205 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -117,7 +117,7 @@ xfs_qm_newmount( (uquotaondisk ? " usrquota" : ""), (gquotaondisk ? " grpquota" : ""), (pquotaondisk ? " prjquota" : "")); - return XFS_ERROR(EPERM); + return -EPERM; } if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 437c9198031a..80f2d77d929a 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -64,10 +64,10 @@ xfs_qm_scall_quotaoff( /* * No file system can have quotas enabled on disk but not in core. * Note that quota utilities (like quotaoff) _expect_ - * errno == EEXIST here. + * errno == -EEXIST here. */ if ((mp->m_qflags & flags) == 0) - return XFS_ERROR(EEXIST); + return -EEXIST; error = 0; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -94,7 +94,7 @@ xfs_qm_scall_quotaoff( /* XXX what to do if error ? Revert back to old vals incore ? */ error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return (error); + return error; } dqtype = 0; @@ -198,7 +198,7 @@ xfs_qm_scall_quotaoff( if (mp->m_qflags == 0) { mutex_unlock(&q->qi_quotaofflock); xfs_qm_destroy_quotainfo(mp); - return (0); + return 0; } /* @@ -278,22 +278,29 @@ xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { - int error = 0, error2 = 0; + int error = -EINVAL; - if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { + if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || + (flags & ~XFS_DQ_ALLTYPES)) { xfs_debug(mp, "%s: flags=%x m_qflags=%x", __func__, flags, mp->m_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } - if (flags & XFS_DQ_USER) + if (flags & XFS_DQ_USER) { error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); - if (flags & XFS_DQ_GROUP) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + if (error) + return error; + } + if (flags & XFS_DQ_GROUP) { + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + if (error) + return error; + } if (flags & XFS_DQ_PROJ) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); - return error ? error : error2; + return error; } /* @@ -321,7 +328,7 @@ xfs_qm_scall_quotaon( if (flags == 0) { xfs_debug(mp, "%s: zero flags, m_qflags=%x", __func__, mp->m_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* No fs can turn on quotas with a delayed effect */ @@ -344,13 +351,13 @@ xfs_qm_scall_quotaon( xfs_debug(mp, "%s: Can't enforce without acct, flags=%x sbflags=%x", __func__, flags, mp->m_sb.sb_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* * If everything's up to-date incore, then don't waste time. */ if ((mp->m_qflags & flags) == flags) - return XFS_ERROR(EEXIST); + return -EEXIST; /* * Change sb_qflags on disk but not incore mp->qflags @@ -365,11 +372,11 @@ xfs_qm_scall_quotaon( * There's nothing to change if it's the same. */ if ((qf & flags) == flags && sbflags == 0) - return XFS_ERROR(EEXIST); + return -EEXIST; sbflags |= XFS_SB_QFLAGS; if ((error = xfs_qm_write_sb_changes(mp, sbflags))) - return (error); + return error; /* * If we aren't trying to switch on quota enforcement, we are done. */ @@ -380,10 +387,10 @@ xfs_qm_scall_quotaon( ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != (mp->m_qflags & XFS_GQUOTA_ACCT)) || (flags & XFS_ALL_QUOTA_ENFD) == 0) - return (0); + return 0; if (! XFS_IS_QUOTA_RUNNING(mp)) - return XFS_ERROR(ESRCH); + return -ESRCH; /* * Switch on quota enforcement in core. @@ -392,7 +399,7 @@ xfs_qm_scall_quotaon( mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); - return (0); + return 0; } @@ -419,7 +426,7 @@ xfs_qm_scall_getqstat( if (!xfs_sb_version_hasquota(&mp->m_sb)) { out->qs_uquota.qfs_ino = NULLFSINO; out->qs_gquota.qfs_ino = NULLFSINO; - return (0); + return 0; } out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & @@ -507,7 +514,7 @@ xfs_qm_scall_getqstatv( out->qs_uquota.qfs_ino = NULLFSINO; out->qs_gquota.qfs_ino = NULLFSINO; out->qs_pquota.qfs_ino = NULLFSINO; - return (0); + return 0; } out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & @@ -588,7 +595,7 @@ xfs_qm_scall_setqlim( xfs_qcnt_t hard, soft; if (newlim->d_fieldmask & ~XFS_DQ_MASK) - return EINVAL; + return -EINVAL; if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) return 0; @@ -608,7 +615,7 @@ xfs_qm_scall_setqlim( */ error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto out_unlock; } xfs_dqunlock(dqp); @@ -751,7 +758,7 @@ xfs_qm_log_quotaoff_end( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); if (error) { xfs_trans_cancel(tp, 0); - return (error); + return error; } qoffi = xfs_trans_get_qoff_item(tp, startqoff, @@ -765,7 +772,7 @@ xfs_qm_log_quotaoff_end( */ xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); - return (error); + return error; } @@ -815,7 +822,7 @@ error0: spin_unlock(&mp->m_sb_lock); } *qoffstartp = qoffi; - return (error); + return error; } @@ -843,7 +850,7 @@ xfs_qm_scall_getquota( * our utility programs are concerned. */ if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { - error = XFS_ERROR(ENOENT); + error = -ENOENT; goto out_put; } @@ -946,14 +953,13 @@ xfs_qm_export_flags( uflags |= FS_QUOTA_GDQ_ENFD; if (flags & XFS_PQUOTA_ENFD) uflags |= FS_QUOTA_PDQ_ENFD; - return (uflags); + return uflags; } STATIC int xfs_dqrele_inode( struct xfs_inode *ip, - struct xfs_perag *pag, int flags, void *args) { diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h deleted file mode 100644 index 6d86219d93da..000000000000 --- a/fs/xfs/xfs_quota_priv.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QUOTA_PRIV_H__ -#define __XFS_QUOTA_PRIV_H__ - -/* - * Number of bmaps that we ask from bmapi when doing a quotacheck. - * We make this restriction to keep the memory usage to a minimum. - */ -#define XFS_DQITER_MAP_SIZE 10 - -#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ - !dqp->q_core.d_blk_hardlimit && \ - !dqp->q_core.d_blk_softlimit && \ - !dqp->q_core.d_rtb_hardlimit && \ - !dqp->q_core.d_rtb_softlimit && \ - !dqp->q_core.d_ino_hardlimit && \ - !dqp->q_core.d_ino_softlimit && \ - !dqp->q_core.d_bcount && \ - !dqp->q_core.d_rtbcount && \ - !dqp->q_core.d_icount) - -#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ - (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) - -#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index af33cafe69b6..b238027df987 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -51,7 +51,7 @@ xfs_fs_get_xstate( if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_qm_scall_getqstat(mp, fqs); + return xfs_qm_scall_getqstat(mp, fqs); } STATIC int @@ -63,7 +63,7 @@ xfs_fs_get_xstatev( if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_qm_scall_getqstatv(mp, fqs); + return xfs_qm_scall_getqstatv(mp, fqs); } STATIC int @@ -95,21 +95,41 @@ xfs_fs_set_xstate( switch (op) { case Q_XQUOTAON: - return -xfs_qm_scall_quotaon(mp, flags); + return xfs_qm_scall_quotaon(mp, flags); case Q_XQUOTAOFF: if (!XFS_IS_QUOTA_ON(mp)) return -EINVAL; - return -xfs_qm_scall_quotaoff(mp, flags); - case Q_XQUOTARM: - if (XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_trunc_qfiles(mp, flags); + return xfs_qm_scall_quotaoff(mp, flags); } return -EINVAL; } STATIC int +xfs_fs_rm_xquota( + struct super_block *sb, + unsigned int uflags) +{ + struct xfs_mount *mp = XFS_M(sb); + unsigned int flags = 0; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + + if (XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + + if (uflags & FS_USER_QUOTA) + flags |= XFS_DQ_USER; + if (uflags & FS_GROUP_QUOTA) + flags |= XFS_DQ_GROUP; + if (uflags & FS_PROJ_QUOTA) + flags |= XFS_DQ_PROJ; + + return xfs_qm_scall_trunc_qfiles(mp, flags); +} + +STATIC int xfs_fs_get_dqblk( struct super_block *sb, struct kqid qid, @@ -122,7 +142,7 @@ xfs_fs_get_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), + return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), fdq); } @@ -141,7 +161,7 @@ xfs_fs_set_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), + return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), fdq); } @@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = { .get_xstatev = xfs_fs_get_xstatev, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, + .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a6a76b2b6a85..909e143b87ae 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -842,7 +842,7 @@ xfs_growfs_rt_alloc( /* * Reserve space & log for one extent added to the file. */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, resblks, 0); if (error) goto error_cancel; @@ -863,7 +863,7 @@ xfs_growfs_rt_alloc( XFS_BMAPI_METADATA, &firstblock, resblks, &map, &nmap, &flist); if (!error && nmap < 1) - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; if (error) goto error_cancel; /* @@ -903,7 +903,7 @@ xfs_growfs_rt_alloc( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); if (bp == NULL) { - error = XFS_ERROR(EIO); + error = -EIO; error_cancel: xfs_trans_cancel(tp, cancelflags); goto error; @@ -944,9 +944,9 @@ xfs_growfs_rt( xfs_buf_t *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ - xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ + xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ - xfs_drtbno_t nrextents; /* new number of realtime extents */ + xfs_rtblock_t nrextents; /* new number of realtime extents */ uint8_t nrextslog; /* new log2 of sb_rextents */ xfs_extlen_t nrsumblocks; /* new number of summary blocks */ uint nrsumlevels; /* new rt summary levels */ @@ -962,11 +962,11 @@ xfs_growfs_rt( * Initial error checking. */ if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) - return XFS_ERROR(EINVAL); + return -EINVAL; if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) return error; /* @@ -976,7 +976,7 @@ xfs_growfs_rt( XFS_FSB_TO_BB(mp, nrblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, NULL); if (!bp) - return EIO; + return -EIO; if (bp->b_error) { error = bp->b_error; xfs_buf_relse(bp); @@ -1001,7 +1001,7 @@ xfs_growfs_rt( * since we'll log basically the whole summary file at once. */ if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Get the old block counts for bitmap and summary inodes. * These can't change since other growfs callers are locked out. @@ -1208,7 +1208,7 @@ xfs_rtallocate_extent( len, &sumbp, &sb, prod, &r); break; default: - error = EIO; + error = -EIO; ASSERT(0); } if (error) @@ -1247,7 +1247,7 @@ xfs_rtmount_init( if (mp->m_rtdev_targp == NULL) { xfs_warn(mp, "Filesystem has a realtime volume, use rtdev=device option"); - return XFS_ERROR(ENODEV); + return -ENODEV; } mp->m_rsumlevels = sbp->sb_rextslog + 1; mp->m_rsumsize = @@ -1263,7 +1263,7 @@ xfs_rtmount_init( xfs_warn(mp, "realtime mount -- %llu != %llu", (unsigned long long) XFS_BB_TO_FSB(mp, d), (unsigned long long) mp->m_sb.sb_rblocks); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_rtdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -1272,7 +1272,7 @@ xfs_rtmount_init( xfs_warn(mp, "realtime device size check failed"); if (bp) xfs_buf_relse(bp); - return EIO; + return -EIO; } xfs_buf_relse(bp); return 0; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 752b63d10300..c642795324af 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -132,7 +132,7 @@ xfs_rtmount_init( return 0; xfs_warn(mp, "Not built with CONFIG_XFS_RT"); - return ENOSYS; + return -ENOSYS; } # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) # define xfs_rtunmount_inodes(m) diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index ce372b7d5644..f2240383d4bb 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) { "abtc2", XFSSTAT_END_ABTC_V2 }, { "bmbt2", XFSSTAT_END_BMBT_V2 }, { "ibt2", XFSSTAT_END_IBT_V2 }, + { "fibt2", XFSSTAT_END_FIBT_V2 }, /* we print both series of quota information together */ { "qm", XFSSTAT_END_QM }, }; diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c03ad38ceaeb..c8f238b8299a 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -183,7 +183,23 @@ struct xfsstats { __uint32_t xs_ibt_2_alloc; __uint32_t xs_ibt_2_free; __uint32_t xs_ibt_2_moves; -#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) +#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15) + __uint32_t xs_fibt_2_lookup; + __uint32_t xs_fibt_2_compare; + __uint32_t xs_fibt_2_insrec; + __uint32_t xs_fibt_2_delrec; + __uint32_t xs_fibt_2_newroot; + __uint32_t xs_fibt_2_killroot; + __uint32_t xs_fibt_2_increment; + __uint32_t xs_fibt_2_decrement; + __uint32_t xs_fibt_2_lshift; + __uint32_t xs_fibt_2_rshift; + __uint32_t xs_fibt_2_split; + __uint32_t xs_fibt_2_join; + __uint32_t xs_fibt_2_alloc; + __uint32_t xs_fibt_2_free; + __uint32_t xs_fibt_2_moves; +#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) __uint32_t xs_qm_dqreclaims; __uint32_t xs_qm_dqreclaim_misses; __uint32_t xs_qm_dquot_dups; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488263dd..b194652033cd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -61,6 +61,7 @@ static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; +struct kset *xfs_kset; #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ @@ -185,7 +186,7 @@ xfs_parseargs( */ mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); if (!mp->m_fsname) - return ENOMEM; + return -ENOMEM; mp->m_fsname_len = strlen(mp->m_fsname) + 1; /* @@ -204,9 +205,6 @@ xfs_parseargs( */ mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; -#if !XFS_BIG_INUMS - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; -#endif /* * These can be overridden by the mount option parsing. @@ -227,57 +225,57 @@ xfs_parseargs( if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &mp->m_logbufs)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (suffix_kstrtoint(value, 10, &mp->m_logbsize)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); if (!mp->m_logname) - return ENOMEM; + return -ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { xfs_warn(mp, "%s option not allowed on this system", this_char); - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); if (!mp->m_rtname) - return ENOMEM; + return -ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &iosize)) - return EINVAL; + return -EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (suffix_kstrtoint(value, 10, &iosize)) - return EINVAL; + return -EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { @@ -297,27 +295,22 @@ xfs_parseargs( if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &dsunit)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &dswidth)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { mp->m_flags |= XFS_MOUNT_SMALL_INUMS; } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; -#if !XFS_BIG_INUMS - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; -#endif } else if (!strcmp(this_char, MNTOPT_NOUUID)) { mp->m_flags |= XFS_MOUNT_NOUUID; } else if (!strcmp(this_char, MNTOPT_BARRIER)) { @@ -390,7 +383,7 @@ xfs_parseargs( "irixsgid is now a sysctl(2) variable, option is deprecated."); } else { xfs_warn(mp, "unknown mount option [%s].", this_char); - return EINVAL; + return -EINVAL; } } @@ -400,32 +393,32 @@ xfs_parseargs( if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_warn(mp, "no-recovery mounts must be read-only."); - return EINVAL; + return -EINVAL; } if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); - return EINVAL; + return -EINVAL; } #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { xfs_warn(mp, "quota support not available in this kernel."); - return EINVAL; + return -EINVAL; } #endif if ((dsunit && !dswidth) || (!dsunit && dswidth)) { xfs_warn(mp, "sunit and swidth must be specified together"); - return EINVAL; + return -EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { xfs_warn(mp, "stripe width (%d) must be a multiple of the stripe unit (%d)", dswidth, dsunit); - return EINVAL; + return -EINVAL; } done: @@ -446,7 +439,7 @@ done: mp->m_logbufs > XLOG_MAX_ICLOGS)) { xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (mp->m_logbsize != -1 && mp->m_logbsize != 0 && @@ -456,7 +449,7 @@ done: xfs_warn(mp, "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (iosizelog) { @@ -465,7 +458,7 @@ done: xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", iosizelog, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; @@ -597,15 +590,20 @@ xfs_max_file_offset( return (((__uint64_t)pagefactor) << bitshift) - 1; } +/* + * xfs_set_inode32() and xfs_set_inode64() are passed an agcount + * because in the growfs case, mp->m_sb.sb_agcount is not updated + * yet to the potentially higher ag count. + */ xfs_agnumber_t -xfs_set_inode32(struct xfs_mount *mp) +xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index = 0; xfs_agnumber_t maxagi = 0; xfs_sb_t *sbp = &mp->m_sb; xfs_agnumber_t max_metadata; - xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino); + xfs_agino_t agino; + xfs_ino_t ino; xfs_perag_t *pag; /* Calculate how much should be reserved for inodes to meet @@ -620,10 +618,12 @@ xfs_set_inode32(struct xfs_mount *mp) do_div(icount, sbp->sb_agblocks); max_metadata = icount; } else { - max_metadata = sbp->sb_agcount; + max_metadata = agcount; } - for (index = 0; index < sbp->sb_agcount; index++) { + agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); + + for (index = 0; index < agcount; index++) { ino = XFS_AGINO_TO_INO(mp, index, agino); if (ino > XFS_MAXINUMBER_32) { @@ -648,11 +648,11 @@ xfs_set_inode32(struct xfs_mount *mp) } xfs_agnumber_t -xfs_set_inode64(struct xfs_mount *mp) +xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index = 0; - for (index = 0; index < mp->m_sb.sb_agcount; index++) { + for (index = 0; index < agcount; index++) { struct xfs_perag *pag; pag = xfs_perag_get(mp, index); @@ -686,7 +686,7 @@ xfs_blkdev_get( xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); } - return -error; + return error; } STATIC void @@ -756,7 +756,7 @@ xfs_open_devices( if (rtdev == ddev || rtdev == logdev) { xfs_warn(mp, "Cannot mount filesystem with identical rtdev and ddev/logdev."); - error = EINVAL; + error = -EINVAL; goto out_close_rtdev; } } @@ -764,21 +764,19 @@ xfs_open_devices( /* * Setup xfs_mount buffer target pointers */ - error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); + error = -ENOMEM; + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, - mp->m_fsname); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, - mp->m_fsname); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -811,8 +809,7 @@ xfs_setup_devices( { int error; - error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); + error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); if (error) return error; @@ -822,14 +819,12 @@ xfs_setup_devices( if (xfs_sb_version_hassector(&mp->m_sb)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_setsize_buftarg(mp->m_logdev_targp, - mp->m_sb.sb_blocksize, log_sector_size); if (error) return error; } if (mp->m_rtdev_targp) { error = xfs_setsize_buftarg(mp->m_rtdev_targp, - mp->m_sb.sb_blocksize, mp->m_sb.sb_sectsize); if (error) return error; @@ -913,7 +908,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb, jiffies); + sync_inodes_sb(sb); up_read(&sb->s_umount); } } @@ -996,7 +991,7 @@ xfs_fs_evict_inode( trace_xfs_evict_inode(ip); - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); @@ -1193,10 +1188,12 @@ xfs_fs_remount( char *options) { struct xfs_mount *mp = XFS_M(sb); + xfs_sb_t *sbp = &mp->m_sb; substring_t args[MAX_OPT_ARGS]; char *p; int error; + sync_filesystem(sb); while ((p = strsep(&options, ",")) != NULL) { int token; @@ -1212,10 +1209,10 @@ xfs_fs_remount( mp->m_flags &= ~XFS_MOUNT_BARRIER; break; case Opt_inode64: - mp->m_maxagi = xfs_set_inode64(mp); + mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount); break; case Opt_inode32: - mp->m_maxagi = xfs_set_inode32(mp); + mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount); break; default: /* @@ -1299,7 +1296,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); + return xfs_fs_log_dummy(mp); } STATIC int @@ -1318,7 +1315,7 @@ xfs_fs_show_options( struct seq_file *m, struct dentry *root) { - return -xfs_showargs(XFS_M(root->d_sb), m); + return xfs_showargs(XFS_M(root->d_sb), m); } /* @@ -1340,14 +1337,14 @@ xfs_finish_flags( mp->m_logbsize < mp->m_sb.sb_logsunit) { xfs_warn(mp, "logbuf size must be greater than or equal to log stripe size"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { xfs_warn(mp, "logbuf size for version 1 logs must be 16K or 32K"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -1359,7 +1356,7 @@ xfs_finish_flags( xfs_warn(mp, "Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.", MNTOPT_NOATTR2, MNTOPT_ATTR2); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -1376,7 +1373,7 @@ xfs_finish_flags( if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); - return XFS_ERROR(EROFS); + return -EROFS; } if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && @@ -1384,7 +1381,7 @@ xfs_finish_flags( !xfs_sb_version_has_pquotino(&mp->m_sb)) { xfs_warn(mp, "Super block does not support project and group quota together"); - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; @@ -1398,7 +1395,7 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - int flags = 0, error = ENOMEM; + int flags = 0, error = -ENOMEM; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) @@ -1478,12 +1475,12 @@ xfs_fs_fill_super( root = igrab(VFS_I(mp->m_rootip)); if (!root) { - error = ENOENT; + error = -ENOENT; goto out_unmount; } sb->s_root = d_make_root(root); if (!sb->s_root) { - error = ENOMEM; + error = -ENOMEM; goto out_unmount; } @@ -1503,7 +1500,7 @@ out_destroy_workqueues: xfs_free_fsname(mp); kfree(mp); out: - return -error; + return error; out_unmount: xfs_filestream_unmount(mp); @@ -1753,13 +1750,9 @@ init_xfs_fs(void) if (error) goto out_destroy_wq; - error = xfs_filestream_init(); - if (error) - goto out_mru_cache_uninit; - error = xfs_buf_init(); if (error) - goto out_filestream_uninit; + goto out_mru_cache_uninit; error = xfs_init_procfs(); if (error) @@ -1769,9 +1762,15 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; + xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); + if (!xfs_kset) { + error = -ENOMEM; + goto out_sysctl_unregister;; + } + error = xfs_qm_init(); if (error) - goto out_sysctl_unregister; + goto out_kset_unregister; error = register_filesystem(&xfs_fs_type); if (error) @@ -1780,14 +1779,14 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); + out_kset_unregister: + kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: xfs_cleanup_procfs(); out_buf_terminate: xfs_buf_terminate(); - out_filestream_uninit: - xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); out_destroy_wq: @@ -1803,10 +1802,10 @@ exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); + kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); - xfs_filestream_uninit(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_zones(); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index bbe3d15a7904..2b830c2f322e 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -44,16 +44,6 @@ extern void xfs_qm_exit(void); # define XFS_REALTIME_STRING #endif -#if XFS_BIG_BLKNOS -# if XFS_BIG_INUMS -# define XFS_BIGFS_STRING "large block/inode numbers, " -# else -# define XFS_BIGFS_STRING "large block numbers, " -# endif -#else -# define XFS_BIGFS_STRING -#endif - #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -64,7 +54,6 @@ extern void xfs_qm_exit(void); #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ - XFS_BIGFS_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; @@ -76,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int); extern void xfs_flush_inodes(struct xfs_mount *mp); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); -extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); -extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); +extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount); +extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount); extern const struct export_operations xfs_export_operations; extern const struct xattr_handler *xfs_xattr_handlers[]; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 14e58f2c96bd..6a944a2cd36f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -27,6 +27,7 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_ialloc.h" @@ -75,11 +76,15 @@ xfs_readlink_bmap( bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, &xfs_symlink_buf_ops); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = bp->b_error; if (error) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; goto out; } byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); @@ -88,9 +93,9 @@ xfs_readlink_bmap( cur_chunk = bp->b_addr; if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset, + if (!xfs_symlink_hdr_ok(ip->i_ino, offset, byte_cnt, bp)) { - error = EFSCORRUPTED; + error = -EFSCORRUPTED; xfs_alert(mp, "symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)", offset, byte_cnt, ip->i_ino); @@ -130,7 +135,7 @@ xfs_readlink( trace_xfs_readlink(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -143,7 +148,7 @@ xfs_readlink( __func__, (unsigned long long) ip->i_ino, (long long) pathlen); ASSERT(0); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out; } @@ -198,20 +203,17 @@ xfs_symlink( trace_xfs_symlink(dp, link_name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; /* * Check component lengths of the target path name. */ pathlen = strlen(target_path); if (pathlen >= MAXPATHLEN) /* total string too long */ - return XFS_ERROR(ENAMETOOLONG); + return -ENAMETOOLONG; udqp = gdqp = NULL; - if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = xfs_get_projid(dp); - else - prid = XFS_PROJID_DEFAULT; + prid = xfs_get_initial_prid(dp); /* * Make sure that we have allocated dquot(s) on disk. @@ -236,7 +238,7 @@ xfs_symlink( fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); - if (error == ENOSPC && fs_blocks == 0) { + if (error == -ENOSPC && fs_blocks == 0) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); } @@ -252,7 +254,7 @@ xfs_symlink( * Check whether the directory allows new symlinks or not. */ if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { - error = XFS_ERROR(EPERM); + error = -EPERM; goto error_return; } @@ -282,7 +284,7 @@ xfs_symlink( error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, resblks > 0, &ip, NULL); if (error) { - if (error == ENOSPC) + if (error == -ENOSPC) goto error_return; goto error1; } @@ -346,7 +348,7 @@ xfs_symlink( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error2; } bp->b_ops = &xfs_symlink_buf_ops; @@ -487,7 +489,7 @@ xfs_inactive_symlink_rmt( XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error_bmap_cancel; } xfs_trans_binval(tp, bp); @@ -560,7 +562,7 @@ xfs_inactive_symlink( trace_xfs_inactive_symlink(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -578,7 +580,7 @@ xfs_inactive_symlink( __func__, (unsigned long long)ip->i_ino, pathlen); xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (ip->i_df.if_flags & XFS_IFINLINE) { diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c new file mode 100644 index 000000000000..9835139ce1ec --- /dev/null +++ b/fs/xfs/xfs_sysfs.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_sysfs.h" +#include "xfs_log_format.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" + +struct xfs_sysfs_attr { + struct attribute attr; + ssize_t (*show)(char *buf, void *data); + ssize_t (*store)(const char *buf, size_t count, void *data); +}; + +static inline struct xfs_sysfs_attr * +to_attr(struct attribute *attr) +{ + return container_of(attr, struct xfs_sysfs_attr, attr); +} + +#define XFS_SYSFS_ATTR_RW(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) +#define XFS_SYSFS_ATTR_RO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) + +#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr + +/* + * xfs_mount kobject. This currently has no attributes and thus no need for show + * and store helpers. The mp kobject serves as the per-mount parent object that + * is identified by the fsname under sysfs. + */ + +struct kobj_type xfs_mp_ktype = { + .release = xfs_sysfs_release, +}; + +/* xlog */ + +STATIC ssize_t +log_head_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + spin_lock(&log->l_icloglock); + cycle = log->l_curr_cycle; + block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_head_lsn); + +STATIC ssize_t +log_tail_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_tail_lsn); + +STATIC ssize_t +reserve_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(reserve_grant_head); + +STATIC ssize_t +write_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(write_grant_head); + +static struct attribute *xfs_log_attrs[] = { + ATTR_LIST(log_head_lsn), + ATTR_LIST(log_tail_lsn), + ATTR_LIST(reserve_grant_head), + ATTR_LIST(write_grant_head), + NULL, +}; + +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + return container_of(kobj, struct xlog, l_kobj); +} + +STATIC ssize_t +xfs_log_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(buf, log) : 0; +} + +STATIC ssize_t +xfs_log_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; +} + +static struct sysfs_ops xfs_log_ops = { + .show = xfs_log_show, + .store = xfs_log_store, +}; + +struct kobj_type xfs_log_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_log_ops, + .default_attrs = xfs_log_attrs, +}; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h new file mode 100644 index 000000000000..54a2091183c0 --- /dev/null +++ b/fs/xfs/xfs_sysfs.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __XFS_SYSFS_H__ +#define __XFS_SYSFS_H__ + +extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ +extern struct kobj_type xfs_log_ktype; /* xlog */ + +static inline struct xfs_kobj * +to_kobj(struct kobject *kobject) +{ + return container_of(kobject, struct xfs_kobj, kobject); +} + +static inline void +xfs_sysfs_release(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + complete(&kobj->complete); +} + +static inline int +xfs_sysfs_init( + struct xfs_kobj *kobj, + struct kobj_type *ktype, + struct xfs_kobj *parent_kobj, + const char *name) +{ + init_completion(&kobj->complete); + return kobject_init_and_add(&kobj->kobject, ktype, + &parent_kobj->kobject, "%s", name); +} + +static inline void +xfs_sysfs_del( + struct xfs_kobj *kobj) +{ + kobject_del(&kobj->kobject); + kobject_put(&kobj->kobject); + wait_for_completion(&kobj->complete); +} + +#endif /* __XFS_SYSFS_H__ */ diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index dee3279c095e..1e85bcd0e418 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -46,6 +46,7 @@ #include "xfs_log_recover.h" #include "xfs_inode_item.h" #include "xfs_bmap_btree.h" +#include "xfs_filestream.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 425dfa45b9a0..152f82782630 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -538,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); +DECLARE_EVENT_CLASS(xfs_filestream_class, + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), + TP_ARGS(ip, agno), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_agnumber_t, agno) + __field(int, streams) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->agno = agno; + __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + ), + TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->agno, + __entry->streams) +) +#define DEFINE_FILESTREAM_EVENT(name) \ +DEFINE_EVENT(xfs_filestream_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ + TP_ARGS(ip, agno)) +DEFINE_FILESTREAM_EVENT(xfs_filestream_free); +DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); +DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); + +TRACE_EVENT(xfs_filestream_pick, + TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, + xfs_extlen_t free, int nscan), + TP_ARGS(ip, agno, free, nscan), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_agnumber_t, agno) + __field(int, streams) + __field(xfs_extlen_t, free) + __field(int, nscan) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->agno = agno; + __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + __entry->free = free; + __entry->nscan = nscan; + ), + TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->agno, + __entry->streams, + __entry->free, + __entry->nscan) +); + DECLARE_EVENT_CLASS(xfs_lock_class, TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, unsigned long caller_ip), @@ -603,6 +661,8 @@ DEFINE_INODE_EVENT(xfs_readlink); DEFINE_INODE_EVENT(xfs_inactive_symlink); DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_zero_file_space); +DEFINE_INODE_EVENT(xfs_collapse_file_space); DEFINE_INODE_EVENT(xfs_readdir); #ifdef CONFIG_XFS_POSIX_ACL DEFINE_INODE_EVENT(xfs_get_acl); @@ -1058,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write); DECLARE_EVENT_CLASS(xfs_page_class, TP_PROTO(struct inode *inode, struct page *page, unsigned long off, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c812c5c060de..30e8e3410955 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -190,7 +190,7 @@ xfs_trans_reserve( -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - return (XFS_ERROR(ENOSPC)); + return -ENOSPC; } tp->t_blk_res += blocks; } @@ -241,7 +241,7 @@ xfs_trans_reserve( error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, -((int64_t)rtextents), rsvd); if (error) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto undo_log; } tp->t_rtx_res += rtextents; @@ -827,7 +827,7 @@ xfs_trans_committed_bulk( xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); spin_lock(&ailp->xa_lock); - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); } @@ -874,7 +874,7 @@ xfs_trans_commit( goto out_unreserve; if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; goto out_unreserve; } @@ -887,12 +887,7 @@ xfs_trans_commit( xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); - error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); - if (error == ENOMEM) { - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); - error = XFS_ERROR(EIO); - goto out_unreserve; - } + xfs_log_commit_cil(mp, tp, &commit_lsn, flags); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free(tp); @@ -902,10 +897,7 @@ xfs_trans_commit( * log out now and wait for it. */ if (sync) { - if (!error) { - error = _xfs_log_force_lsn(mp, commit_lsn, - XFS_LOG_SYNC, NULL); - } + error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); XFS_STATS_INC(xs_trans_sync); } else { XFS_STATS_INC(xs_trans_async); @@ -925,7 +917,7 @@ out_unreserve: if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); if (commit_lsn == -1 && !error) - error = XFS_ERROR(EIO); + error = -EIO; } current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); @@ -1032,7 +1024,7 @@ xfs_trans_roll( */ error = xfs_trans_commit(trans, 0); if (error) - return (error); + return error; trans = *tpp; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9b96d35e483d..b5bc1ab3c4da 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -64,7 +64,7 @@ typedef struct xfs_log_item { struct xfs_item_ops { void (*iop_size)(xfs_log_item_t *, int *, int *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); + void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); void (*iop_pin)(xfs_log_item_t *); void (*iop_unpin)(xfs_log_item_t *, int remove); uint (*iop_push)(struct xfs_log_item *, struct list_head *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index a7287354e535..859482f53b5a 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -173,7 +173,6 @@ xfs_trans_ail_cursor_next( */ void xfs_trans_ail_cursor_done( - struct xfs_ail *ailp, struct xfs_ail_cursor *cur) { cur->item = NULL; @@ -368,7 +367,7 @@ xfsaild_push( * If the AIL is empty or our push has reached the end we are * done now. */ - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); goto out_done; } @@ -453,7 +452,7 @@ xfsaild_push( break; lsn = lip->li_lsn; } - xfs_trans_ail_cursor_done(ailp, &cur); + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) @@ -763,7 +762,7 @@ xfs_trans_ail_init( ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) - return ENOMEM; + return -ENOMEM; ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); @@ -782,7 +781,7 @@ xfs_trans_ail_init( out_free_ailp: kmem_free(ailp); - return ENOMEM; + return -ENOMEM; } void diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c035d11b7734..96c898e7ac9a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -166,7 +166,7 @@ xfs_trans_get_buf_map( ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_get_buf_recur(bip); - return (bp); + return bp; } bp = xfs_buf_get_map(target, map, nmaps, flags); @@ -178,7 +178,7 @@ xfs_trans_get_buf_map( _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_get_buf(bp->b_fspriv); - return (bp); + return bp; } /* @@ -201,9 +201,8 @@ xfs_trans_getsb(xfs_trans_t *tp, * Default to just trying to lock the superblock buffer * if tp is NULL. */ - if (tp == NULL) { - return (xfs_getsb(mp, flags)); - } + if (tp == NULL) + return xfs_getsb(mp, flags); /* * If the superblock buffer already has this transaction @@ -218,7 +217,7 @@ xfs_trans_getsb(xfs_trans_t *tp, ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_getsb_recur(bip); - return (bp); + return bp; } bp = xfs_getsb(mp, flags); @@ -227,7 +226,7 @@ xfs_trans_getsb(xfs_trans_t *tp, _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_getsb(bp->b_fspriv); - return (bp); + return bp; } #ifdef DEBUG @@ -267,7 +266,7 @@ xfs_trans_read_buf_map( bp = xfs_buf_read_map(target, map, nmaps, flags, ops); if (!bp) return (flags & XBF_TRYLOCK) ? - EAGAIN : XFS_ERROR(ENOMEM); + -EAGAIN : -ENOMEM; if (bp->b_error) { error = bp->b_error; @@ -275,6 +274,10 @@ xfs_trans_read_buf_map( XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } #ifdef DEBUG @@ -283,7 +286,7 @@ xfs_trans_read_buf_map( if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); xfs_debug(mp, "Returning error!"); - return XFS_ERROR(EIO); + return -EIO; } } } @@ -314,7 +317,18 @@ xfs_trans_read_buf_map( ASSERT(bp->b_iodone == NULL); XFS_BUF_READ(bp); bp->b_ops = ops; - xfsbdstrat(tp->t_mountp, bp); + + /* + * XXX(hch): clean up the error handling here to be less + * of a mess.. + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + } else { + xfs_buf_iorequest(bp); + } + error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, __func__); @@ -327,6 +341,9 @@ xfs_trans_read_buf_map( if (tp->t_flags & XFS_TRANS_DIRTY) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } } @@ -337,7 +354,7 @@ xfs_trans_read_buf_map( if (XFS_FORCED_SHUTDOWN(mp)) { trace_xfs_trans_read_buf_shut(bp, _RET_IP_); *bpp = NULL; - return XFS_ERROR(EIO); + return -EIO; } @@ -354,7 +371,7 @@ xfs_trans_read_buf_map( if (bp == NULL) { *bpp = NULL; return (flags & XBF_TRYLOCK) ? - 0 : XFS_ERROR(ENOMEM); + 0 : -ENOMEM; } if (bp->b_error) { error = bp->b_error; @@ -364,6 +381,10 @@ xfs_trans_read_buf_map( if (tp->t_flags & XFS_TRANS_DIRTY) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } #ifdef DEBUG @@ -374,7 +395,7 @@ xfs_trans_read_buf_map( SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); xfs_debug(mp, "Returning trans error!"); - return XFS_ERROR(EIO); + return -EIO; } } } @@ -392,7 +413,7 @@ shutdown_abort: trace_xfs_trans_read_buf_shut(bp, _RET_IP_); xfs_buf_relse(bp); *bpp = NULL; - return XFS_ERROR(EIO); + return -EIO; } /* diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd2a10e15d3a..846e061c2e98 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -295,8 +295,8 @@ xfs_trans_mod_dquot( /* * Given an array of dqtrx structures, lock all the dquots associated and join * them to the transaction, provided they have been modified. We know that the - * highest number of dquots of one type - usr, grp OR prj - involved in a - * transaction is 2 so we don't need to make this very generic. + * highest number of dquots of one type - usr, grp and prj - involved in a + * transaction is 3 so we don't need to make this very generic. */ STATIC void xfs_trans_dqlockedjoin( @@ -722,8 +722,8 @@ xfs_trans_dqresv( error_return: xfs_dqunlock(dqp); if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; + return -ENOSPC; + return -EDQUOT; } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 12e86af9d9b9..bd1281862ad7 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -133,8 +133,7 @@ struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, xfs_lsn_t lsn); struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, - struct xfs_ail_cursor *cur); +void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur); #if BITS_PER_LONG != 64 static inline void diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 82bbc34d54a3..b79dc66b2ecd 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -38,43 +38,18 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -/* - * These types are 64 bits on disk but are either 32 or 64 bits in memory. - * Disk based types: - */ -typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */ -typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */ -typedef __uint64_t xfs_dfiloff_t; /* block number in a file */ -typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */ - -/* - * Memory based types are conditional. - */ -#if XFS_BIG_BLKNOS typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -#else -typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ -typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -#endif typedef __uint64_t xfs_fileoff_t; /* block number in a file */ -typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ +typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ /* * Null values for the types. */ -#define NULLDFSBNO ((xfs_dfsbno_t)-1) -#define NULLDRFSBNO ((xfs_drfsbno_t)-1) -#define NULLDRTBNO ((xfs_drtbno_t)-1) -#define NULLDFILOFF ((xfs_dfiloff_t)-1) - #define NULLFSBLOCK ((xfs_fsblock_t)-1) #define NULLRFSBLOCK ((xfs_rfsblock_t)-1) #define NULLRTBLOCK ((xfs_rtblock_t)-1) @@ -134,7 +109,7 @@ typedef enum { typedef enum { XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, - XFS_BTNUM_MAX + XFS_BTNUM_FINOi, XFS_BTNUM_MAX } xfs_btnum_t; struct xfs_name { diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h deleted file mode 100644 index 3e8e797c6d11..000000000000 --- a/fs/xfs/xfs_vnode.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VNODE_H__ -#define __XFS_VNODE_H__ - -#include "xfs_fs.h" - -struct file; -struct xfs_inode; -struct attrlist_cursor_kern; - -/* - * Flags for read/write calls - same values as IRIX - */ -#define IO_ISDIRECT 0x00004 /* bypass page cache */ -#define IO_INVIS 0x00020 /* don't update inode timestamps */ - -#define XFS_IO_FLAGS \ - { IO_ISDIRECT, "DIRECT" }, \ - { IO_INVIS, "INVIS"} - -/* - * Flush/Invalidate options for vop_toss/flush/flushinval_pages. - */ -#define FI_NONE 0 /* none */ -#define FI_REMAPF 1 /* Do a remapf prior to the operation */ -#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. - Prevent VM access to the pages until - the operation completes. */ - -/* - * Some useful predicates. - */ -#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) -#define VN_CACHED(vp) (vp->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ - PAGECACHE_TAG_DIRTY) - - -#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 9d479073ba41..93455b998041 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -49,7 +49,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name, value = NULL; } - error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); + error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); if (error) return error; return asize; @@ -71,8 +71,8 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, xflags |= ATTR_REPLACE; if (!value) - return -xfs_attr_remove(ip, (unsigned char *)name, xflags); - return -xfs_attr_set(ip, (unsigned char *)name, + return xfs_attr_remove(ip, (unsigned char *)name, xflags); + return xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); } @@ -102,8 +102,8 @@ const struct xattr_handler *xfs_xattr_handlers[] = { &xfs_xattr_trusted_handler, &xfs_xattr_security_handler, #ifdef CONFIG_XFS_POSIX_ACL - &xfs_xattr_acl_access_handler, - &xfs_xattr_acl_default_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, #endif NULL }; |