aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/xfs.txt4
-rw-r--r--fs/inode.c209
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/linux-2.6/sv.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c66
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c87
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h30
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c189
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c223
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h82
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c849
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h214
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c122
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h65
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c884
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c762
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h55
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h77
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c145
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h72
-rw-r--r--fs/xfs/quota/xfs_dquot.c39
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c45
-rw-r--r--fs/xfs/quota/xfs_qm.c57
-rw-r--r--fs/xfs/quota/xfs_qm.h3
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c5
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c151
-rw-r--r--fs/xfs/support/debug.c39
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/support/ktrace.c9
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_ag.h15
-rw-r--r--fs/xfs/xfs_alloc.c264
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c2387
-rw-r--r--fs/xfs/xfs_alloc_btree.h107
-rw-r--r--fs/xfs/xfs_arch.h39
-rw-r--r--fs/xfs/xfs_bit.h3
-rw-r--r--fs/xfs/xfs_bmap.c410
-rw-r--r--fs/xfs/xfs_bmap.h72
-rw-r--r--fs/xfs/xfs_bmap_btree.c2617
-rw-r--r--fs/xfs/xfs_bmap_btree.h171
-rw-r--r--fs/xfs/xfs_btree.c3596
-rw-r--r--fs/xfs/xfs_btree.h392
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h116
-rw-r--r--fs/xfs/xfs_buf_item.c45
-rw-r--r--fs/xfs/xfs_clnt.h105
-rw-r--r--fs/xfs/xfs_da_btree.h24
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dfrag.h2
-rw-r--r--fs/xfs/xfs_dinode.h148
-rw-r--r--fs/xfs/xfs_dir2_sf.h7
-rw-r--r--fs/xfs/xfs_dmops.c5
-rw-r--r--fs/xfs/xfs_error.c15
-rw-r--r--fs/xfs/xfs_error.h12
-rw-r--r--fs/xfs/xfs_extfree_item.c45
-rw-r--r--fs/xfs/xfs_fs.h22
-rw-r--r--fs/xfs/xfs_fsops.c30
-rw-r--r--fs/xfs/xfs_ialloc.c449
-rw-r--r--fs/xfs/xfs_ialloc.h31
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2193
-rw-r--r--fs/xfs/xfs_ialloc_btree.h111
-rw-r--r--fs/xfs/xfs_iget.c735
-rw-r--r--fs/xfs/xfs_imap.h40
-rw-r--r--fs/xfs/xfs_inode.c587
-rw-r--r--fs/xfs/xfs_inode.h375
-rw-r--r--fs/xfs/xfs_inode_item.c45
-rw-r--r--fs/xfs/xfs_inode_item.h41
-rw-r--r--fs/xfs/xfs_iomap.c28
-rw-r--r--fs/xfs/xfs_itable.c102
-rw-r--r--fs/xfs/xfs_itable.h14
-rw-r--r--fs/xfs/xfs_log.c81
-rw-r--r--fs/xfs/xfs_log.h4
-rw-r--r--fs/xfs/xfs_log_priv.h48
-rw-r--r--fs/xfs/xfs_log_recover.c416
-rw-r--r--fs/xfs/xfs_mount.c81
-rw-r--r--fs/xfs/xfs_mount.h73
-rw-r--r--fs/xfs/xfs_qmops.c5
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rename.c61
-rw-r--r--fs/xfs/xfs_rtalloc.c41
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_sb.h167
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h322
-rw-r--r--fs/xfs/xfs_trans_ail.c362
-rw-r--r--fs/xfs/xfs_trans_buf.c7
-rw-r--r--fs/xfs/xfs_trans_inode.c30
-rw-r--r--fs/xfs/xfs_trans_item.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h98
-rw-r--r--fs/xfs/xfs_utils.c12
-rw-r--r--fs/xfs/xfs_vfsops.c757
-rw-r--r--fs/xfs/xfs_vfsops.h16
-rw-r--r--fs/xfs/xfs_vnodeops.c354
-rw-r--r--fs/xfs/xfs_vnodeops.h10
-rw-r--r--include/linux/fs.h10
-rw-r--r--kernel/sysctl_check.c1
112 files changed, 10496 insertions, 12850 deletions
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 0a1668ba2600..9878f50d6ed6 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -229,10 +229,6 @@ The following sysctls are available for the XFS filesystem:
ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
is set.
- fs.xfs.restrict_chown (Min: 0 Default: 1 Max: 1)
- Controls whether unprivileged users can use chown to "give away"
- a file to another user.
-
fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
Setting this to "1" will cause the "sync" flag set
by the xfs_io(8) chattr command on a directory to be
diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba1397..098a2443196f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
wake_up_bit(&inode->i_state, __I_LOCK);
}
-static struct inode *alloc_inode(struct super_block *sb)
+/**
+ * inode_init_always - perform inode structure intialisation
+ * @sb - superblock inode belongs to.
+ * @inode - inode to initialise
+ *
+ * These are initializations that need to be done on every inode
+ * allocation as the fields are not initialised by slab allocation.
+ */
+struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
static struct inode_operations empty_iops;
static const struct file_operations empty_fops;
- struct inode *inode;
-
- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
- else
- inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
- if (inode) {
- struct address_space * const mapping = &inode->i_data;
-
- inode->i_sb = sb;
- inode->i_blkbits = sb->s_blocksize_bits;
- inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
- inode->i_op = &empty_iops;
- inode->i_fop = &empty_fops;
- inode->i_nlink = 1;
- atomic_set(&inode->i_writecount, 0);
- inode->i_size = 0;
- inode->i_blocks = 0;
- inode->i_bytes = 0;
- inode->i_generation = 0;
+ struct address_space * const mapping = &inode->i_data;
+
+ inode->i_sb = sb;
+ inode->i_blkbits = sb->s_blocksize_bits;
+ inode->i_flags = 0;
+ atomic_set(&inode->i_count, 1);
+ inode->i_op = &empty_iops;
+ inode->i_fop = &empty_fops;
+ inode->i_nlink = 1;
+ atomic_set(&inode->i_writecount, 0);
+ inode->i_size = 0;
+ inode->i_blocks = 0;
+ inode->i_bytes = 0;
+ inode->i_generation = 0;
#ifdef CONFIG_QUOTA
- memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
+ memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
#endif
- inode->i_pipe = NULL;
- inode->i_bdev = NULL;
- inode->i_cdev = NULL;
- inode->i_rdev = 0;
- inode->dirtied_when = 0;
- if (security_inode_alloc(inode)) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
- }
+ inode->i_pipe = NULL;
+ inode->i_bdev = NULL;
+ inode->i_cdev = NULL;
+ inode->i_rdev = 0;
+ inode->dirtied_when = 0;
+ if (security_inode_alloc(inode)) {
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, (inode));
+ return NULL;
+ }
- spin_lock_init(&inode->i_lock);
- lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+ spin_lock_init(&inode->i_lock);
+ lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
- mutex_init(&inode->i_mutex);
- lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+ mutex_init(&inode->i_mutex);
+ lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
- init_rwsem(&inode->i_alloc_sem);
- lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+ init_rwsem(&inode->i_alloc_sem);
+ lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
- mapping->a_ops = &empty_aops;
- mapping->host = inode;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
- mapping->assoc_mapping = NULL;
- mapping->backing_dev_info = &default_backing_dev_info;
- mapping->writeback_index = 0;
+ mapping->a_ops = &empty_aops;
+ mapping->host = inode;
+ mapping->flags = 0;
+ mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+ mapping->assoc_mapping = NULL;
+ mapping->backing_dev_info = &default_backing_dev_info;
+ mapping->writeback_index = 0;
- /*
- * If the block_device provides a backing_dev_info for client
- * inodes then use that. Otherwise the inode share the bdev's
- * backing_dev_info.
- */
- if (sb->s_bdev) {
- struct backing_dev_info *bdi;
+ /*
+ * If the block_device provides a backing_dev_info for client
+ * inodes then use that. Otherwise the inode share the bdev's
+ * backing_dev_info.
+ */
+ if (sb->s_bdev) {
+ struct backing_dev_info *bdi;
- bdi = sb->s_bdev->bd_inode_backing_dev_info;
- if (!bdi)
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
- mapping->backing_dev_info = bdi;
- }
- inode->i_private = NULL;
- inode->i_mapping = mapping;
+ bdi = sb->s_bdev->bd_inode_backing_dev_info;
+ if (!bdi)
+ bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+ mapping->backing_dev_info = bdi;
}
+ inode->i_private = NULL;
+ inode->i_mapping = mapping;
+
return inode;
}
+EXPORT_SYMBOL(inode_init_always);
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+ struct inode *inode;
+
+ if (sb->s_op->alloc_inode)
+ inode = sb->s_op->alloc_inode(sb);
+ else
+ inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
+
+ if (inode)
+ return inode_init_always(sb, inode);
+ return NULL;
+}
void destroy_inode(struct inode *inode)
{
@@ -196,6 +212,7 @@ void destroy_inode(struct inode *inode)
else
kmem_cache_free(inode_cachep, (inode));
}
+EXPORT_SYMBOL(destroy_inode);
/*
@@ -534,6 +551,49 @@ repeat:
return node ? inode : NULL;
}
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+ return tmp & I_HASHMASK;
+}
+
+static inline void
+__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
+ struct inode *inode)
+{
+ inodes_stat.nr_inodes++;
+ list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_sb_list, &sb->s_inodes);
+ if (head)
+ hlist_add_head(&inode->i_hash, head);
+}
+
+/**
+ * inode_add_to_lists - add a new inode to relevant lists
+ * @sb - superblock inode belongs to.
+ * @inode - inode to mark in use
+ *
+ * When an inode is allocated it needs to be accounted for, added to the in use
+ * list, the owning superblock and the inode hash. This needs to be done under
+ * the inode_lock, so export a function to do this rather than the inode lock
+ * itself. We calculate the hash list to add to here so it is all internal
+ * which requires the caller to have already set up the inode number in the
+ * inode to add.
+ */
+void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+
+ spin_lock(&inode_lock);
+ __inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_add_to_lists);
+
/**
* new_inode - obtain an inode
* @sb: superblock
@@ -561,9 +621,7 @@ struct inode *new_inode(struct super_block *sb)
inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode_lock);
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
+ __inode_add_to_lists(sb, NULL, inode);
inode->i_ino = ++last_ino;
inode->i_state = 0;
spin_unlock(&inode_lock);
@@ -622,10 +680,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
if (set(inode, data))
goto set_failed;
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
- hlist_add_head(&inode->i_hash, head);
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW;
spin_unlock(&inode_lock);
@@ -671,10 +726,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
old = find_inode_fast(sb, head, ino);
if (!old) {
inode->i_ino = ino;
- inodes_stat.nr_inodes++;
- list_add(&inode->i_list, &inode_in_use);
- list_add(&inode->i_sb_list, &sb->s_inodes);
- hlist_add_head(&inode->i_hash, head);
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_LOCK|I_NEW;
spin_unlock(&inode_lock);
@@ -698,16 +750,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
return inode;
}
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
- return tmp & I_HASHMASK;
-}
-
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -1292,6 +1334,7 @@ int inode_wait(void *word)
schedule();
return 0;
}
+EXPORT_SYMBOL(inode_wait);
/*
* If we try to find an inode in the inode hash while it is being
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 737c9a425361..c3dc491fff89 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -85,13 +85,13 @@ xfs-y += xfs_alloc.o \
xfs_trans_inode.o \
xfs_trans_item.o \
xfs_utils.o \
- xfs_vfsops.o \
xfs_vnodeops.o \
xfs_rw.o \
xfs_dmops.o \
xfs_qmops.o
-xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o
+xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
+ xfs_dir2_trace.o
# Objects in linux/
xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -106,7 +106,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
- xfs_vnode.o \
+ xfs_sync.o \
xfs_xattr.o)
# Objects in support/
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 351a8f454bd1..4dfc7c370819 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -32,23 +32,15 @@ typedef struct sv_s {
wait_queue_head_t waiters;
} sv_t;
-#define SV_FIFO 0x0 /* sv_t is FIFO type */
-#define SV_LIFO 0x2 /* sv_t is LIFO type */
-#define SV_PRIO 0x4 /* sv_t is PRIO type */
-#define SV_KEYED 0x6 /* sv_t is KEYED type */
-#define SV_DEFAULT SV_FIFO
-
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
- unsigned long timeout)
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
{
DECLARE_WAITQUEUE(wait, current);
add_wait_queue_exclusive(&sv->waiters, &wait);
- __set_current_state(state);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(lock);
- schedule_timeout(timeout);
+ schedule();
remove_wait_queue(&sv->waiters, &wait);
}
@@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
#define sv_destroy(sv) \
/*NOTHING*/
#define sv_wait(sv, pri, lock, s) \
- _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_wait_sig(sv, pri, lock, s) \
- _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
- _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
- _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+ _sv_wait(sv, lock)
#define sv_signal(sv) \
wake_up(&(sv)->waiters)
#define sv_broadcast(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b5..de3a198f771e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -42,6 +42,40 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
+
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC 37
+#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+
+void __init
+xfs_ioend_init(void)
+{
+ int i;
+
+ for (i = 0; i < NVSYNC; i++)
+ init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+
+void
+xfs_ioend_wait(
+ xfs_inode_t *ip)
+{
+ wait_queue_head_t *wq = to_ioend_wq(ip);
+
+ wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+
+STATIC void
+xfs_ioend_wake(
+ xfs_inode_t *ip)
+{
+ if (atomic_dec_and_test(&ip->i_iocount))
+ wake_up(to_ioend_wq(ip));
+}
+
STATIC void
xfs_count_page_state(
struct page *page,
@@ -146,16 +180,25 @@ xfs_destroy_ioend(
xfs_ioend_t *ioend)
{
struct buffer_head *bh, *next;
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io(bh, !ioend->io_error);
}
- if (unlikely(ioend->io_error)) {
- vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error,
- __FILE__,__LINE__);
+
+ /*
+ * Volume managers supporting multiple paths can send back ENODEV
+ * when the final path disappears. In this case continuing to fill
+ * the page cache with dirty data which cannot be written out is
+ * evil, so prevent that.
+ */
+ if (unlikely(ioend->io_error == -ENODEV)) {
+ xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+ __FILE__, __LINE__);
}
- vn_iowake(XFS_I(ioend->io_inode));
+
+ xfs_ioend_wake(ip);
mempool_free(ioend, xfs_ioend_pool);
}
@@ -191,7 +234,7 @@ xfs_setfilesize(
ip->i_d.di_size = isize;
ip->i_update_core = 1;
ip->i_update_size = 1;
- mark_inode_dirty_sync(ioend->io_inode);
+ xfs_mark_inode_dirty_sync(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -317,14 +360,9 @@ xfs_map_blocks(
xfs_iomap_t *mapp,
int flags)
{
- xfs_inode_t *ip = XFS_I(inode);
- int error, nmaps = 1;
-
- error = xfs_iomap(ip, offset, count,
- flags, mapp, &nmaps);
- if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
- xfs_iflags_set(ip, XFS_IMODIFIED);
- return -error;
+ int nmaps = 1;
+
+ return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
}
STATIC_INLINE int
@@ -512,7 +550,7 @@ xfs_cancel_ioend(
unlock_buffer(bh);
} while ((bh = next_bh) != NULL);
- vn_iowake(XFS_I(ioend->io_inode));
+ xfs_ioend_wake(XFS_I(ioend->io_inode));
mempool_free(ioend, xfs_ioend_pool);
} while ((ioend = next) != NULL);
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 3ba0631a3818..7b26f5ff9692 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -43,4 +43,7 @@ typedef struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations;
extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
+
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 36d5fcd3f593..cb329edc925b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -630,6 +630,29 @@ xfs_buf_get_flags(
return NULL;
}
+STATIC int
+_xfs_buf_read(
+ xfs_buf_t *bp,
+ xfs_buf_flags_t flags)
+{
+ int status;
+
+ XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
+
+ ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+ ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+ bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+
+ status = xfs_buf_iorequest(bp);
+ if (!status && !(flags & XBF_ASYNC))
+ status = xfs_buf_iowait(bp);
+ return status;
+}
+
xfs_buf_t *
xfs_buf_read_flags(
xfs_buftarg_t *target,
@@ -646,7 +669,7 @@ xfs_buf_read_flags(
if (!XFS_BUF_ISDONE(bp)) {
XB_TRACE(bp, "read", (unsigned long)flags);
XFS_STATS_INC(xb_get_read);
- xfs_buf_iostart(bp, flags);
+ _xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) {
XB_TRACE(bp, "read_async", (unsigned long)flags);
/*
@@ -1048,50 +1071,39 @@ xfs_buf_ioerror(
XB_TRACE(bp, "ioerror", (unsigned long)error);
}
-/*
- * Initiate I/O on a buffer, based on the flags supplied.
- * The b_iodone routine in the buffer supplied will only be called
- * when all of the subsidiary I/O requests, if any, have been completed.
- */
int
-xfs_buf_iostart(
- xfs_buf_t *bp,
- xfs_buf_flags_t flags)
+xfs_bawrite(
+ void *mp,
+ struct xfs_buf *bp)
{
- int status = 0;
+ XB_TRACE(bp, "bawrite", 0);
- XB_TRACE(bp, "iostart", (unsigned long)flags);
+ ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
- if (flags & XBF_DELWRI) {
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
- bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
- xfs_buf_delwri_queue(bp, 1);
- return 0;
- }
+ xfs_buf_delwri_dequeue(bp);
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
+ bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
+
+ bp->b_mount = mp;
+ bp->b_strat = xfs_bdstrat_cb;
+ return xfs_bdstrat_cb(bp);
+}
- BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
+void
+xfs_bdwrite(
+ void *mp,
+ struct xfs_buf *bp)
+{
+ XB_TRACE(bp, "bdwrite", 0);
- /* For writes allow an alternate strategy routine to precede
- * the actual I/O request (which may not be issued at all in
- * a shutdown situation, for example).
- */
- status = (flags & XBF_WRITE) ?
- xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
+ bp->b_strat = xfs_bdstrat_cb;
+ bp->b_mount = mp;
- /* Wait for I/O if we are not an async request.
- * Note: async I/O request completion will release the buffer,
- * and that can already be done by this point. So using the
- * buffer pointer from here on, after async I/O, is invalid.
- */
- if (!status && !(flags & XBF_ASYNC))
- status = xfs_buf_iowait(bp);
+ bp->b_flags &= ~XBF_READ;
+ bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
- return status;
+ xfs_buf_delwri_queue(bp, 1);
}
STATIC_INLINE void
@@ -1114,8 +1126,7 @@ xfs_buf_bio_end_io(
unsigned int blocksize = bp->b_target->bt_bsize;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- bp->b_error = EIO;
+ xfs_buf_ioerror(bp, -error);
do {
struct page *page = bvec->bv_page;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 456519a088c7..288ae7c4c800 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -168,7 +168,7 @@ typedef struct xfs_buf {
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
- void *b_fspriv3;
+ struct xfs_mount *b_mount;
unsigned short b_error; /* error code on I/O */
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
@@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *);
extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */
+extern int xfs_bawrite(void *mp, xfs_buf_t *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
extern void xfs_buf_ioend(xfs_buf_t *, int);
extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
extern int xfs_buf_iorequest(xfs_buf_t *);
extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
@@ -311,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
-#define XFS_BUF_SHUT(bp) do { } while (0)
-#define XFS_BUF_UNSHUT(bp) do { } while (0)
-#define XFS_BUF_ISSHUT(bp) (0)
-
#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -334,8 +331,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
-#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
#define XFS_BUF_SET_START(bp) do { } while (0)
#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
@@ -366,14 +361,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_TARGET(bp) ((bp)->b_target)
#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
-static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
-{
- bp->b_fspriv3 = mp;
- bp->b_strat = xfs_bdstrat_cb;
- xfs_buf_delwri_dequeue(bp);
- return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
-}
-
static inline void xfs_buf_relse(xfs_buf_t *bp)
{
if (!bp->b_relse)
@@ -414,17 +401,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
return error;
}
-/*
- * No error can be returned from xfs_buf_iostart for delwri
- * buffers as they are queued and no I/O is issued.
- */
-static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
-{
- bp->b_strat = xfs_bdstrat_cb;
- bp->b_fspriv3 = mp;
- (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
-}
-
#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
#define xfs_iowait(bp) xfs_buf_iowait(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 8c022cd0ad67..55bddf3b6091 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -25,12 +25,4 @@
*/
typedef const struct cred cred_t;
-extern cred_t *sys_cred;
-
-/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
-static inline int capable_cred(cred_t *cr, int cid)
-{
- return (cr == sys_cred) ? 1 : capable(cid);
-}
-
#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 7f7abec25e14..595751f78350 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,7 +29,6 @@
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
-#include "xfs_vfsops.h"
/*
* Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3fee790f138b..e14c4e3aea0c 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -36,89 +36,54 @@
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_ioctl32.h"
#include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
#include <linux/dcache.h>
#include <linux/smp_lock.h>
static struct vm_operations_struct xfs_file_vm_ops;
-STATIC_INLINE ssize_t
-__xfs_file_read(
+STATIC ssize_t
+xfs_file_aio_read(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
- int ioflags,
loff_t pos)
{
struct file *file = iocb->ki_filp;
+ int ioflags = IO_ISAIO;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
+ if (file->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
nr_segs, &iocb->ki_pos, ioflags);
}
STATIC ssize_t
-xfs_file_aio_read(
- struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
-{
- return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-
-STATIC ssize_t
-xfs_file_aio_read_invis(
- struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
-{
- return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-
-STATIC_INLINE ssize_t
-__xfs_file_write(
+xfs_file_aio_write(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
- int ioflags,
loff_t pos)
{
- struct file *file = iocb->ki_filp;
+ struct file *file = iocb->ki_filp;
+ int ioflags = IO_ISAIO;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
+ if (file->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
&iocb->ki_pos, ioflags);
}
STATIC ssize_t
-xfs_file_aio_write(
- struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
-{
- return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-
-STATIC ssize_t
-xfs_file_aio_write_invis(
- struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
-{
- return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-
-STATIC ssize_t
xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
@@ -126,20 +91,13 @@ xfs_file_splice_read(
size_t len,
unsigned int flags)
{
- return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
- infilp, ppos, pipe, len, flags, 0);
-}
+ int ioflags = 0;
+
+ if (infilp->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
-STATIC ssize_t
-xfs_file_splice_read_invis(
- struct file *infilp,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len,
- unsigned int flags)
-{
return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
- infilp, ppos, pipe, len, flags, IO_INVIS);
+ infilp, ppos, pipe, len, flags, ioflags);
}
STATIC ssize_t
@@ -150,30 +108,49 @@ xfs_file_splice_write(
size_t len,
unsigned int flags)
{
- return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
- pipe, outfilp, ppos, len, flags, 0);
-}
+ int ioflags = 0;
+
+ if (outfilp->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
-STATIC ssize_t
-xfs_file_splice_write_invis(
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t len,
- unsigned int flags)
-{
return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
- pipe, outfilp, ppos, len, flags, IO_INVIS);
+ pipe, outfilp, ppos, len, flags, ioflags);
}
STATIC int
xfs_file_open(
struct inode *inode,
- struct file *filp)
+ struct file *file)
{
- if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+ if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
return -EFBIG;
- return -xfs_open(XFS_I(inode));
+ if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+ return -EIO;
+ return 0;
+}
+
+STATIC int
+xfs_dir_open(
+ struct inode *inode,
+ struct file *file)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ int mode;
+ int error;
+
+ error = xfs_file_open(inode, file);
+ if (error)
+ return error;
+
+ /*
+ * If there are any blocks, read-ahead block 0 as we're almost
+ * certain to have the next operation be a read there.
+ */
+ mode = xfs_ilock_map_shared(ip);
+ if (ip->i_d.di_nextents > 0)
+ xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+ xfs_iunlock(ip, mode);
+ return 0;
}
STATIC int
@@ -227,7 +204,7 @@ xfs_file_readdir(
* point we can change the ->readdir prototype to include the
* buffer size.
*/
- bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
+ bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
error = xfs_readdir(ip, dirent, bufsize,
(xfs_off_t *)&filp->f_pos, filldir);
@@ -248,48 +225,6 @@ xfs_file_mmap(
return 0;
}
-STATIC long
-xfs_file_ioctl(
- struct file *filp,
- unsigned int cmd,
- unsigned long p)
-{
- int error;
- struct inode *inode = filp->f_path.dentry->d_inode;
-
- error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
- xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
-
- /* NOTE: some of the ioctl's return positive #'s as a
- * byte count indicating success, such as
- * readlink_by_handle. So we don't "sign flip"
- * like most other routines. This means true
- * errors need to be returned as a negative value.
- */
- return error;
-}
-
-STATIC long
-xfs_file_ioctl_invis(
- struct file *filp,
- unsigned int cmd,
- unsigned long p)
-{
- int error;
- struct inode *inode = filp->f_path.dentry->d_inode;
-
- error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
- xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
-
- /* NOTE: some of the ioctl's return positive #'s as a
- * byte count indicating success, such as
- * readlink_by_handle. So we don't "sign flip"
- * like most other routines. This means true
- * errors need to be returned as a negative value.
- */
- return error;
-}
-
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
@@ -325,26 +260,8 @@ const struct file_operations xfs_file_operations = {
#endif
};
-const struct file_operations xfs_invis_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = xfs_file_aio_read_invis,
- .aio_write = xfs_file_aio_write_invis,
- .splice_read = xfs_file_splice_read_invis,
- .splice_write = xfs_file_splice_write_invis,
- .unlocked_ioctl = xfs_file_ioctl_invis,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = xfs_file_compat_invis_ioctl,
-#endif
- .mmap = xfs_file_mmap,
- .open = xfs_file_open,
- .release = xfs_file_release,
- .fsync = xfs_file_fsync,
-};
-
-
const struct file_operations xfs_dir_file_operations = {
+ .open = xfs_dir_open,
.read = generic_read_dir,
.readdir = xfs_file_readdir,
.llseek = generic_file_llseek,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 36caa6d957df..5aeb77776961 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; }
int fs_nosys(void) { return ENOSYS; }
void fs_noval(void) { return; }
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
void
xfs_tosspages(
xfs_inode_t *ip,
@@ -53,7 +57,7 @@ xfs_flushinval_pages(
if (!ret)
truncate_inode_pages(mapping, first);
}
- return ret;
+ return -ret;
}
int
@@ -72,10 +76,23 @@ xfs_flush_pages(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
ret = filemap_fdatawrite(mapping);
if (flags & XFS_B_ASYNC)
- return ret;
+ return -ret;
ret2 = filemap_fdatawait(mapping);
if (!ret)
ret = ret2;
}
- return ret;
+ return -ret;
+}
+
+int
+xfs_wait_on_pages(
+ xfs_inode_t *ip,
+ xfs_off_t first,
+ xfs_off_t last)
+{
+ struct address_space *mapping = VFS_I(ip)->i_mapping;
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+ return -filemap_fdatawait(mapping);
+ return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e6..2ae8b1ccb02e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
*/
xfs_param_t xfs_params = {
/* MIN DFLT MAX */
- .restrict_chown = { 0, 1, 1 },
.sgid_inherit = { 0, 0, 1 },
.symlink_mode = { 0, 0, 1 },
.panic_mask = { 0, 0, 255 },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 30*100, 3600*100},
};
-
-/*
- * Global system credential structure.
- */
-static cred_t sys_cred_val;
-cred_t *sys_cred = &sys_cred_val;
-
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 6eda8a3eb6f1..69f71caf061c 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
#define __XFS_GLOBALS_H__
extern uint64_t xfs_panic_mask; /* set to cause more panics */
-extern cred_t *sys_cred;
#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 281cbd5a25cf..67205f6198ba 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -68,26 +68,22 @@
* XFS_IOC_PATH_TO_HANDLE
* returns full handle for a path
*/
-STATIC int
+int
xfs_find_handle(
unsigned int cmd,
- void __user *arg)
+ xfs_fsop_handlereq_t *hreq)
{
int hsize;
xfs_handle_t handle;
- xfs_fsop_handlereq_t hreq;
struct inode *inode;
- if (copy_from_user(&hreq, arg, sizeof(hreq)))
- return -XFS_ERROR(EFAULT);
-
memset((char *)&handle, 0, sizeof(handle));
switch (cmd) {
case XFS_IOC_PATH_TO_FSHANDLE:
case XFS_IOC_PATH_TO_HANDLE: {
struct path path;
- int error = user_lpath((const char __user *)hreq.path, &path);
+ int error = user_lpath((const char __user *)hreq->path, &path);
if (error)
return error;
@@ -101,7 +97,7 @@ xfs_find_handle(
case XFS_IOC_FD_TO_HANDLE: {
struct file *file;
- file = fget(hreq.fd);
+ file = fget(hreq->fd);
if (!file)
return -EBADF;
@@ -158,8 +154,8 @@ xfs_find_handle(
}
/* now copy our handle into the user buffer & write out the size */
- if (copy_to_user(hreq.ohandle, &handle, hsize) ||
- copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+ if (copy_to_user(hreq->ohandle, &handle, hsize) ||
+ copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) {
iput(inode);
return -XFS_ERROR(EFAULT);
}
@@ -249,10 +245,10 @@ xfs_vget_fsop_handlereq(
return 0;
}
-STATIC int
+int
xfs_open_by_handle(
xfs_mount_t *mp,
- void __user *arg,
+ xfs_fsop_handlereq_t *hreq,
struct file *parfilp,
struct inode *parinode)
{
@@ -263,14 +259,11 @@ xfs_open_by_handle(
struct file *filp;
struct inode *inode;
struct dentry *dentry;
- xfs_fsop_handlereq_t hreq;
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
+ error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
if (error)
return -error;
@@ -281,10 +274,10 @@ xfs_open_by_handle(
}
#if BITS_PER_LONG != 32
- hreq.oflags |= O_LARGEFILE;
+ hreq->oflags |= O_LARGEFILE;
#endif
/* Put open permission in namei format. */
- permflag = hreq.oflags;
+ permflag = hreq->oflags;
if ((permflag+1) & O_ACCMODE)
permflag++;
if (permflag & O_TRUNC)
@@ -322,15 +315,16 @@ xfs_open_by_handle(
mntget(parfilp->f_path.mnt);
/* Create file pointer. */
- filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred);
+ filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred);
if (IS_ERR(filp)) {
put_unused_fd(new_fd);
return -XFS_ERROR(-PTR_ERR(filp));
}
+
if (inode->i_mode & S_IFREG) {
/* invisible operation should not change atime */
filp->f_flags |= O_NOATIME;
- filp->f_op = &xfs_invis_file_operations;
+ filp->f_mode |= FMODE_NOCMTIME;
}
fd_install(new_fd, filp);
@@ -363,24 +357,21 @@ do_readlink(
}
-STATIC int
+int
xfs_readlink_by_handle(
xfs_mount_t *mp,
- void __user *arg,
+ xfs_fsop_handlereq_t *hreq,
struct inode *parinode)
{
struct inode *inode;
- xfs_fsop_handlereq_t hreq;
__u32 olen;
void *link;
int error;
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
+ error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
if (error)
return -error;
@@ -390,7 +381,7 @@ xfs_readlink_by_handle(
goto out_iput;
}
- if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+ if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
error = -XFS_ERROR(EFAULT);
goto out_iput;
}
@@ -402,7 +393,7 @@ xfs_readlink_by_handle(
error = -xfs_readlink(XFS_I(inode), link);
if (error)
goto out_kfree;
- error = do_readlink(hreq.ohandle, olen, link);
+ error = do_readlink(hreq->ohandle, olen, link);
if (error)
goto out_kfree;
@@ -501,7 +492,7 @@ xfs_attrlist_by_handle(
return -error;
}
-STATIC int
+int
xfs_attrmulti_attr_get(
struct inode *inode,
char *name,
@@ -530,7 +521,7 @@ xfs_attrmulti_attr_get(
return error;
}
-STATIC int
+int
xfs_attrmulti_attr_set(
struct inode *inode,
char *name,
@@ -560,7 +551,7 @@ xfs_attrmulti_attr_set(
return error;
}
-STATIC int
+int
xfs_attrmulti_attr_remove(
struct inode *inode,
char *name,
@@ -662,19 +653,26 @@ xfs_attrmulti_by_handle(
return -error;
}
-STATIC int
+int
xfs_ioc_space(
struct xfs_inode *ip,
struct inode *inode,
struct file *filp,
int ioflags,
unsigned int cmd,
- void __user *arg)
+ xfs_flock64_t *bf)
{
- xfs_flock64_t bf;
int attr_flags = 0;
int error;
+ /*
+ * Only allow the sys admin to reserve space unless
+ * unwritten extents are enabled.
+ */
+ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+ !capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+
if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
return -XFS_ERROR(EPERM);
@@ -684,16 +682,12 @@ xfs_ioc_space(
if (!S_ISREG(inode->i_mode))
return -XFS_ERROR(EINVAL);
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -XFS_ERROR(EFAULT);
-
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
attr_flags |= XFS_ATTR_NONBLOCK;
if (ioflags & IO_INVIS)
attr_flags |= XFS_ATTR_DMI;
- error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
- NULL, attr_flags);
+ error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
return -error;
}
@@ -1105,10 +1099,6 @@ xfs_ioctl_setattr(
/*
* Change file ownership. Must be the owner or privileged.
- * If the system was configured with the "restricted_chown"
- * option, the owner is not permitted to give away the file,
- * and can change the group id only to a group of which he
- * or she is a member.
*/
if (mask & FSX_PROJID) {
/*
@@ -1137,7 +1127,7 @@ xfs_ioctl_setattr(
* the superblock version number since projids didn't
* exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
*/
- if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+ if (ip->i_d.di_version == 1)
xfs_bump_ino_vers2(tp, ip);
}
@@ -1256,43 +1246,67 @@ xfs_ioc_setxflags(
}
STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+ struct getbmap __user *base = *ap;
+
+ /* copy only getbmap portion (not getbmapx) */
+ if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+ return XFS_ERROR(EFAULT);
+
+ *ap += sizeof(struct getbmap);
+ return 0;
+}
+
+STATIC int
xfs_ioc_getbmap(
struct xfs_inode *ip,
int ioflags,
unsigned int cmd,
void __user *arg)
{
- struct getbmap bm;
- int iflags;
+ struct getbmapx bmx;
int error;
- if (copy_from_user(&bm, arg, sizeof(bm)))
+ if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
return -XFS_ERROR(EFAULT);
- if (bm.bmv_count < 2)
+ if (bmx.bmv_count < 2)
return -XFS_ERROR(EINVAL);
- iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+ bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
if (ioflags & IO_INVIS)
- iflags |= BMV_IF_NO_DMAPI_READ;
+ bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
- error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags);
+ error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+ (struct getbmap *)arg+1);
if (error)
return -error;
- if (copy_to_user(arg, &bm, sizeof(bm)))
+ /* copy back header - only size of getbmap */
+ if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
return -XFS_ERROR(EFAULT);
return 0;
}
STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+ struct getbmapx __user *base = *ap;
+
+ if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+ return XFS_ERROR(EFAULT);
+
+ *ap += sizeof(struct getbmapx);
+ return 0;
+}
+
+STATIC int
xfs_ioc_getbmapx(
struct xfs_inode *ip,
void __user *arg)
{
struct getbmapx bmx;
- struct getbmap bm;
- int iflags;
int error;
if (copy_from_user(&bmx, arg, sizeof(bmx)))
@@ -1301,46 +1315,46 @@ xfs_ioc_getbmapx(
if (bmx.bmv_count < 2)
return -XFS_ERROR(EINVAL);
- /*
- * Map input getbmapx structure to a getbmap
- * structure for xfs_getbmap.
- */
- GETBMAP_CONVERT(bmx, bm);
-
- iflags = bmx.bmv_iflags;
-
- if (iflags & (~BMV_IF_VALID))
+ if (bmx.bmv_iflags & (~BMV_IF_VALID))
return -XFS_ERROR(EINVAL);
- iflags |= BMV_IF_EXTENDED;
-
- error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags);
+ error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+ (struct getbmapx *)arg+1);
if (error)
return -error;
- GETBMAP_CONVERT(bm, bmx);
-
- if (copy_to_user(arg, &bmx, sizeof(bmx)))
+ /* copy back header */
+ if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
return -XFS_ERROR(EFAULT);
return 0;
}
-int
-xfs_ioctl(
- xfs_inode_t *ip,
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines. This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
struct file *filp,
- int ioflags,
unsigned int cmd,
- void __user *arg)
+ unsigned long p)
{
struct inode *inode = filp->f_path.dentry->d_inode;
- xfs_mount_t *mp = ip->i_mount;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ void __user *arg = (void __user *)p;
+ int ioflags = 0;
int error;
- xfs_itrace_entry(XFS_I(inode));
- switch (cmd) {
+ if (filp->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
+ xfs_itrace_entry(ip);
+
+ switch (cmd) {
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_RESVSP:
@@ -1348,17 +1362,13 @@ xfs_ioctl(
case XFS_IOC_ALLOCSP64:
case XFS_IOC_FREESP64:
case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- /*
- * Only allow the sys admin to reserve space unless
- * unwritten extents are enabled.
- */
- if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
+ case XFS_IOC_UNRESVSP64: {
+ xfs_flock64_t bf;
+ if (copy_from_user(&bf, arg, sizeof(bf)))
+ return -XFS_ERROR(EFAULT);
+ return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+ }
case XFS_IOC_DIOINFO: {
struct dioattr da;
xfs_buftarg_t *target =
@@ -1418,18 +1428,30 @@ xfs_ioctl(
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
- case XFS_IOC_PATH_TO_FSHANDLE:
- return xfs_find_handle(cmd, arg);
+ case XFS_IOC_PATH_TO_FSHANDLE: {
+ xfs_fsop_handlereq_t hreq;
- case XFS_IOC_OPEN_BY_HANDLE:
- return xfs_open_by_handle(mp, arg, filp, inode);
+ if (copy_from_user(&hreq, arg, sizeof(hreq)))
+ return -XFS_ERROR(EFAULT);
+ return xfs_find_handle(cmd, &hreq);
+ }
+ case XFS_IOC_OPEN_BY_HANDLE: {
+ xfs_fsop_handlereq_t hreq;
+ if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+ return xfs_open_by_handle(mp, &hreq, filp, inode);
+ }
case XFS_IOC_FSSETDM_BY_HANDLE:
return xfs_fssetdm_by_handle(mp, arg, inode);
- case XFS_IOC_READLINK_BY_HANDLE:
- return xfs_readlink_by_handle(mp, arg, inode);
+ case XFS_IOC_READLINK_BY_HANDLE: {
+ xfs_fsop_handlereq_t hreq;
+ if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+ return xfs_readlink_by_handle(mp, &hreq, inode);
+ }
case XFS_IOC_ATTRLIST_BY_HANDLE:
return xfs_attrlist_by_handle(mp, arg, inode);
@@ -1437,7 +1459,11 @@ xfs_ioctl(
return xfs_attrmulti_by_handle(mp, arg, filp, inode);
case XFS_IOC_SWAPEXT: {
- error = xfs_swapext((struct xfs_swapext __user *)arg);
+ struct xfs_swapext sxp;
+
+ if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+ return -XFS_ERROR(EFAULT);
+ error = xfs_swapext(&sxp);
return -error;
}
@@ -1493,9 +1519,6 @@ xfs_ioctl(
case XFS_IOC_FSGROWFSDATA: {
xfs_growfs_data_t in;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
@@ -1506,9 +1529,6 @@ xfs_ioctl(
case XFS_IOC_FSGROWFSLOG: {
xfs_growfs_log_t in;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
@@ -1519,9 +1539,6 @@ xfs_ioctl(
case XFS_IOC_FSGROWFSRT: {
xfs_growfs_rt_t in;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
new file mode 100644
index 000000000000..8c16bf2d7e03
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+
+extern int
+xfs_ioc_space(
+ struct xfs_inode *ip,
+ struct inode *inode,
+ struct file *filp,
+ int ioflags,
+ unsigned int cmd,
+ xfs_flock64_t *bf);
+
+extern int
+xfs_find_handle(
+ unsigned int cmd,
+ xfs_fsop_handlereq_t *hreq);
+
+extern int
+xfs_open_by_handle(
+ xfs_mount_t *mp,
+ xfs_fsop_handlereq_t *hreq,
+ struct file *parfilp,
+ struct inode *parinode);
+
+extern int
+xfs_readlink_by_handle(
+ xfs_mount_t *mp,
+ xfs_fsop_handlereq_t *hreq,
+ struct inode *parinode);
+
+extern int
+xfs_attrmulti_attr_get(
+ struct inode *inode,
+ char *name,
+ char __user *ubuf,
+ __uint32_t *len,
+ __uint32_t flags);
+
+extern int
+ xfs_attrmulti_attr_set(
+ struct inode *inode,
+ char *name,
+ const char __user *ubuf,
+ __uint32_t len,
+ __uint32_t flags);
+
+extern int
+xfs_attrmulti_attr_remove(
+ struct inode *inode,
+ char *name,
+ __uint32_t flags);
+
+extern long
+xfs_file_ioctl(
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long p);
+
+extern long
+xfs_file_compat_ioctl(
+ struct file *file,
+ unsigned int cmd,
+ unsigned long arg);
+
+#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a4b254eb43b2..0504cece9f66 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -16,11 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/compat.h>
-#include <linux/init.h>
#include <linux/ioctl.h>
-#include <linux/syscalls.h>
-#include <linux/types.h>
-#include <linux/fs.h>
#include <asm/uaccess.h>
#include "xfs.h"
#include "xfs_fs.h"
@@ -36,7 +32,6 @@
#include "xfs_bmap_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_dir2_sf.h"
-#include "xfs_vfs.h"
#include "xfs_vnode.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -44,221 +39,219 @@
#include "xfs_error.h"
#include "xfs_dfrag.h"
#include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
#include "xfs_ioctl32.h"
#define _NATIVE_IOC(cmd, type) \
_IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define _PACKED __attribute__((packed))
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct xfs_flock64_32 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} xfs_flock64_32_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
-#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
-#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
-#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
-#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
-#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
-
-/* just account for different alignment */
-STATIC unsigned long
-xfs_ioctl32_flock(
- unsigned long arg)
+#ifdef BROKEN_X86_ALIGNMENT
+STATIC int
+xfs_compat_flock64_copyin(
+ xfs_flock64_t *bf,
+ compat_xfs_flock64_t __user *arg32)
{
- xfs_flock64_32_t __user *p32 = (void __user *)arg;
- xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p));
-
- if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
- copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
- copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
- copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
- copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
- copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
- copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
- return -EFAULT;
-
- return (unsigned long)p;
+ if (get_user(bf->l_type, &arg32->l_type) ||
+ get_user(bf->l_whence, &arg32->l_whence) ||
+ get_user(bf->l_start, &arg32->l_start) ||
+ get_user(bf->l_len, &arg32->l_len) ||
+ get_user(bf->l_sysid, &arg32->l_sysid) ||
+ get_user(bf->l_pid, &arg32->l_pid) ||
+ copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
}
-typedef struct compat_xfs_fsop_geom_v1 {
- __u32 blocksize; /* filesystem (data) block size */
- __u32 rtextsize; /* realtime extent size */
- __u32 agblocks; /* fsblocks in an AG */
- __u32 agcount; /* number of allocation groups */
- __u32 logblocks; /* fsblocks in the log */
- __u32 sectsize; /* (data) sector size, bytes */
- __u32 inodesize; /* inode size in bytes */
- __u32 imaxpct; /* max allowed inode space(%) */
- __u64 datablocks; /* fsblocks in data subvolume */
- __u64 rtblocks; /* fsblocks in realtime subvol */
- __u64 rtextents; /* rt extents in realtime subvol*/
- __u64 logstart; /* starting fsblock of the log */
- unsigned char uuid[16]; /* unique id of the filesystem */
- __u32 sunit; /* stripe unit, fsblocks */
- __u32 swidth; /* stripe width, fsblocks */
- __s32 version; /* structure version */
- __u32 flags; /* superblock version flags */
- __u32 logsectsize; /* log sector size, bytes */
- __u32 rtsectsize; /* realtime sector size, bytes */
- __u32 dirblocksize; /* directory block size, bytes */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32 \
- _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
-
-STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
+STATIC int
+xfs_compat_ioc_fsgeometry_v1(
+ struct xfs_mount *mp,
+ compat_xfs_fsop_geom_v1_t __user *arg32)
{
- compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
- xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
+ xfs_fsop_geom_t fsgeo;
+ int error;
- if (copy_in_user(p, p32, sizeof(*p32)))
- return -EFAULT;
- return (unsigned long)p;
+ error = xfs_fs_geometry(mp, &fsgeo, 3);
+ if (error)
+ return -error;
+ /* The 32-bit variant simply has some padding at the end */
+ if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
}
-typedef struct compat_xfs_inogrp {
- __u64 xi_startino; /* starting inode number */
- __s32 xi_alloccount; /* # bits set in allocmask */
- __u64 xi_allocmask; /* mask of allocated inodes */
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-STATIC int xfs_inumbers_fmt_compat(
- void __user *ubuffer,
- const xfs_inogrp_t *buffer,
- long count,
- long *written)
+STATIC int
+xfs_compat_growfs_data_copyin(
+ struct xfs_growfs_data *in,
+ compat_xfs_growfs_data_t __user *arg32)
{
- compat_xfs_inogrp_t __user *p32 = ubuffer;
- long i;
+ if (get_user(in->newblocks, &arg32->newblocks) ||
+ get_user(in->imaxpct, &arg32->imaxpct))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
+
+STATIC int
+xfs_compat_growfs_rt_copyin(
+ struct xfs_growfs_rt *in,
+ compat_xfs_growfs_rt_t __user *arg32)
+{
+ if (get_user(in->newblocks, &arg32->newblocks) ||
+ get_user(in->extsize, &arg32->extsize))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt_compat(
+ void __user *ubuffer,
+ const xfs_inogrp_t *buffer,
+ long count,
+ long *written)
+{
+ compat_xfs_inogrp_t __user *p32 = ubuffer;
+ long i;
for (i = 0; i < count; i++) {
if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -EFAULT;
+ return -XFS_ERROR(EFAULT);
}
*written = count * sizeof(*p32);
return 0;
}
#else
-
#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#define _PACKED
+#endif /* BROKEN_X86_ALIGNMENT */
-#endif
+STATIC int
+xfs_ioctl32_bstime_copyin(
+ xfs_bstime_t *bstime,
+ compat_xfs_bstime_t __user *bstime32)
+{
+ compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */
-/* XFS_IOC_FSBULKSTAT and friends */
+ if (get_user(sec32, &bstime32->tv_sec) ||
+ get_user(bstime->tv_nsec, &bstime32->tv_nsec))
+ return -XFS_ERROR(EFAULT);
+ bstime->tv_sec = sec32;
+ return 0;
+}
+
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+ xfs_bstat_t *bstat,
+ compat_xfs_bstat_t __user *bstat32)
+{
+ if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
+ get_user(bstat->bs_mode, &bstat32->bs_mode) ||
+ get_user(bstat->bs_nlink, &bstat32->bs_nlink) ||
+ get_user(bstat->bs_uid, &bstat32->bs_uid) ||
+ get_user(bstat->bs_gid, &bstat32->bs_gid) ||
+ get_user(bstat->bs_rdev, &bstat32->bs_rdev) ||
+ get_user(bstat->bs_blksize, &bstat32->bs_blksize) ||
+ get_user(bstat->bs_size, &bstat32->bs_size) ||
+ xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+ xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+ xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+ get_user(bstat->bs_blocks, &bstat32->bs_size) ||
+ get_user(bstat->bs_xflags, &bstat32->bs_size) ||
+ get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
+ get_user(bstat->bs_extents, &bstat32->bs_extents) ||
+ get_user(bstat->bs_gen, &bstat32->bs_gen) ||
+ get_user(bstat->bs_projid, &bstat32->bs_projid) ||
+ get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
+ get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
+ get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
-typedef struct compat_xfs_bstime {
- __s32 tv_sec; /* seconds */
- __s32 tv_nsec; /* and nanoseconds */
-} compat_xfs_bstime_t;
+/* XFS_IOC_FSBULKSTAT and friends */
-STATIC int xfs_bstime_store_compat(
- compat_xfs_bstime_t __user *p32,
- const xfs_bstime_t *p)
+STATIC int
+xfs_bstime_store_compat(
+ compat_xfs_bstime_t __user *p32,
+ const xfs_bstime_t *p)
{
- __s32 sec32;
+ __s32 sec32;
sec32 = p->tv_sec;
if (put_user(sec32, &p32->tv_sec) ||
put_user(p->tv_nsec, &p32->tv_nsec))
- return -EFAULT;
+ return -XFS_ERROR(EFAULT);
return 0;
}
-typedef struct compat_xfs_bstat {
- __u64 bs_ino; /* inode number */
- __u16 bs_mode; /* type and mode */
- __u16 bs_nlink; /* number of links */
- __u32 bs_uid; /* user id */
- __u32 bs_gid; /* group id */
- __u32 bs_rdev; /* device value */
- __s32 bs_blksize; /* block size */
- __s64 bs_size; /* file size */
- compat_xfs_bstime_t bs_atime; /* access time */
- compat_xfs_bstime_t bs_mtime; /* modify time */
- compat_xfs_bstime_t bs_ctime; /* inode change time */
- int64_t bs_blocks; /* number of blocks */
- __u32 bs_xflags; /* extended flags */
- __s32 bs_extsize; /* extent size */
- __s32 bs_extents; /* number of extents */
- __u32 bs_gen; /* generation count */
- __u16 bs_projid; /* project id */
- unsigned char bs_pad[14]; /* pad space, unused */
- __u32 bs_dmevmask; /* DMIG event mask */
- __u16 bs_dmstate; /* DMIG state info */
- __u16 bs_aextents; /* attribute number of extents */
-} _PACKED compat_xfs_bstat_t;
-
-STATIC int xfs_bulkstat_one_fmt_compat(
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
+STATIC int
+xfs_bulkstat_one_fmt_compat(
void __user *ubuffer,
+ int ubsize,
+ int *ubused,
const xfs_bstat_t *buffer)
{
- compat_xfs_bstat_t __user *p32 = ubuffer;
-
- if (put_user(buffer->bs_ino, &p32->bs_ino) ||
- put_user(buffer->bs_mode, &p32->bs_mode) ||
- put_user(buffer->bs_nlink, &p32->bs_nlink) ||
- put_user(buffer->bs_uid, &p32->bs_uid) ||
- put_user(buffer->bs_gid, &p32->bs_gid) ||
- put_user(buffer->bs_rdev, &p32->bs_rdev) ||
- put_user(buffer->bs_blksize, &p32->bs_blksize) ||
- put_user(buffer->bs_size, &p32->bs_size) ||
+ compat_xfs_bstat_t __user *p32 = ubuffer;
+
+ if (ubsize < sizeof(*p32))
+ return XFS_ERROR(ENOMEM);
+
+ if (put_user(buffer->bs_ino, &p32->bs_ino) ||
+ put_user(buffer->bs_mode, &p32->bs_mode) ||
+ put_user(buffer->bs_nlink, &p32->bs_nlink) ||
+ put_user(buffer->bs_uid, &p32->bs_uid) ||
+ put_user(buffer->bs_gid, &p32->bs_gid) ||
+ put_user(buffer->bs_rdev, &p32->bs_rdev) ||
+ put_user(buffer->bs_blksize, &p32->bs_blksize) ||
+ put_user(buffer->bs_size, &p32->bs_size) ||
xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
- put_user(buffer->bs_blocks, &p32->bs_blocks) ||
- put_user(buffer->bs_xflags, &p32->bs_xflags) ||
- put_user(buffer->bs_extsize, &p32->bs_extsize) ||
- put_user(buffer->bs_extents, &p32->bs_extents) ||
- put_user(buffer->bs_gen, &p32->bs_gen) ||
- put_user(buffer->bs_projid, &p32->bs_projid) ||
- put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
- put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
+ put_user(buffer->bs_blocks, &p32->bs_blocks) ||
+ put_user(buffer->bs_xflags, &p32->bs_xflags) ||
+ put_user(buffer->bs_extsize, &p32->bs_extsize) ||
+ put_user(buffer->bs_extents, &p32->bs_extents) ||
+ put_user(buffer->bs_gen, &p32->bs_gen) ||
+ put_user(buffer->bs_projid, &p32->bs_projid) ||
+ put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
+ put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
- return -EFAULT;
- return sizeof(*p32);
+ return XFS_ERROR(EFAULT);
+ if (ubused)
+ *ubused = sizeof(*p32);
+ return 0;
}
-
-
-typedef struct compat_xfs_fsop_bulkreq {
- compat_uptr_t lastip; /* last inode # pointer */
- __s32 icount; /* count of entries in buffer */
- compat_uptr_t ubuffer; /* user buffer for inode desc. */
- compat_uptr_t ocount; /* output count pointer */
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
- _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
- _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
- _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+STATIC int
+xfs_bulkstat_one_compat(
+ xfs_mount_t *mp, /* mount point for filesystem */
+ xfs_ino_t ino, /* inode number to get data for */
+ void __user *buffer, /* buffer to place output in */
+ int ubsize, /* size of buffer */
+ void *private_data, /* my private data */
+ xfs_daddr_t bno, /* starting bno of inode cluster */
+ int *ubused, /* bytes used by me */
+ void *dibuff, /* on-disk inode buffer */
+ int *stat) /* BULKSTAT_RV_... */
+{
+ return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
+ xfs_bulkstat_one_fmt_compat, bno,
+ ubused, dibuff, stat);
+}
/* copied from xfs_ioctl.c */
STATIC int
-xfs_ioc_bulkstat_compat(
- xfs_mount_t *mp,
- unsigned int cmd,
- void __user *arg)
+xfs_compat_ioc_bulkstat(
+ xfs_mount_t *mp,
+ unsigned int cmd,
+ compat_xfs_fsop_bulkreq_t __user *p32)
{
- compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
u32 addr;
xfs_fsop_bulkreq_t bulkreq;
int count; /* # of records returned */
@@ -270,20 +263,20 @@ xfs_ioc_bulkstat_compat(
/* should be called again (unused here, but used in dmapi) */
if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -XFS_ERROR(EPERM);
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
if (get_user(addr, &p32->lastip))
- return -EFAULT;
+ return -XFS_ERROR(EFAULT);
bulkreq.lastip = compat_ptr(addr);
if (get_user(bulkreq.icount, &p32->icount) ||
get_user(addr, &p32->ubuffer))
- return -EFAULT;
+ return -XFS_ERROR(EFAULT);
bulkreq.ubuffer = compat_ptr(addr);
if (get_user(addr, &p32->ocount))
- return -EFAULT;
+ return -XFS_ERROR(EFAULT);
bulkreq.ocount = compat_ptr(addr);
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
@@ -295,17 +288,22 @@ xfs_ioc_bulkstat_compat(
if (bulkreq.ubuffer == NULL)
return -XFS_ERROR(EINVAL);
- if (cmd == XFS_IOC_FSINUMBERS)
+ if (cmd == XFS_IOC_FSINUMBERS_32) {
error = xfs_inumbers(mp, &inlast, &count,
bulkreq.ubuffer, xfs_inumbers_fmt_compat);
- else {
- /* declare a var to get a warning in case the type changes */
- bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
+ } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
+ int res;
+
+ error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+ sizeof(compat_xfs_bstat_t),
+ NULL, 0, NULL, NULL, &res);
+ } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one, formatter,
+ xfs_bulkstat_one_compat, NULL,
sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
BULKSTAT_FG_QUICK, &done);
- }
+ } else
+ error = XFS_ERROR(EINVAL);
if (error)
return -error;
@@ -321,63 +319,306 @@ xfs_ioc_bulkstat_compat(
return 0;
}
+STATIC int
+xfs_compat_handlereq_copyin(
+ xfs_fsop_handlereq_t *hreq,
+ compat_xfs_fsop_handlereq_t __user *arg32)
+{
+ compat_xfs_fsop_handlereq_t hreq32;
+
+ if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ hreq->fd = hreq32.fd;
+ hreq->path = compat_ptr(hreq32.path);
+ hreq->oflags = hreq32.oflags;
+ hreq->ihandle = compat_ptr(hreq32.ihandle);
+ hreq->ihandlen = hreq32.ihandlen;
+ hreq->ohandle = compat_ptr(hreq32.ohandle);
+ hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+ return 0;
+}
-typedef struct compat_xfs_fsop_handlereq {
- __u32 fd; /* fd for FD_TO_HANDLE */
- compat_uptr_t path; /* user pathname */
- __u32 oflags; /* open flags */
- compat_uptr_t ihandle; /* user supplied handle */
- __u32 ihandlen; /* user supplied length */
- compat_uptr_t ohandle; /* user buffer for handle */
- compat_uptr_t ohandlen; /* user buffer length */
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
- _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
- _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
- _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
- _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
- _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
+/*
+ * Convert userspace handle data into inode.
+ *
+ * We use the fact that all the fsop_handlereq ioctl calls have a data
+ * structure argument whose first component is always a xfs_fsop_handlereq_t,
+ * so we can pass that sub structure into this handy, shared routine.
+ *
+ * If no error, caller must always iput the returned inode.
+ */
+STATIC int
+xfs_vget_fsop_handlereq_compat(
+ xfs_mount_t *mp,
+ struct inode *parinode, /* parent inode pointer */
+ compat_xfs_fsop_handlereq_t *hreq,
+ struct inode **inode)
{
- compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
- xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
- u32 addr;
-
- if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
- get_user(addr, &p32->path) ||
- put_user(compat_ptr(addr), &p->path) ||
- copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
- get_user(addr, &p32->ihandle) ||
- put_user(compat_ptr(addr), &p->ihandle) ||
- copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
- get_user(addr, &p32->ohandle) ||
- put_user(compat_ptr(addr), &p->ohandle) ||
- get_user(addr, &p32->ohandlen) ||
- put_user(compat_ptr(addr), &p->ohandlen))
- return -EFAULT;
-
- return (unsigned long)p;
+ void __user *hanp;
+ size_t hlen;
+ xfs_fid_t *xfid;
+ xfs_handle_t *handlep;
+ xfs_handle_t handle;
+ xfs_inode_t *ip;
+ xfs_ino_t ino;
+ __u32 igen;
+ int error;
+
+ /*
+ * Only allow handle opens under a directory.
+ */
+ if (!S_ISDIR(parinode->i_mode))
+ return XFS_ERROR(ENOTDIR);
+
+ hanp = compat_ptr(hreq->ihandle);
+ hlen = hreq->ihandlen;
+ handlep = &handle;
+
+ if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+ return XFS_ERROR(EINVAL);
+ if (copy_from_user(handlep, hanp, hlen))
+ return XFS_ERROR(EFAULT);
+ if (hlen < sizeof(*handlep))
+ memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+ if (hlen > sizeof(handlep->ha_fsid)) {
+ if (handlep->ha_fid.fid_len !=
+ (hlen - sizeof(handlep->ha_fsid) -
+ sizeof(handlep->ha_fid.fid_len)) ||
+ handlep->ha_fid.fid_pad)
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * Crack the handle, obtain the inode # & generation #
+ */
+ xfid = (struct xfs_fid *)&handlep->ha_fid;
+ if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
+ ino = xfid->fid_ino;
+ igen = xfid->fid_gen;
+ } else {
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * Get the XFS inode, building a Linux inode to go with it.
+ */
+ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+ if (error)
+ return error;
+ if (ip == NULL)
+ return XFS_ERROR(EIO);
+ if (ip->i_d.di_gen != igen) {
+ xfs_iput_new(ip, XFS_ILOCK_SHARED);
+ return XFS_ERROR(ENOENT);
+ }
+
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ *inode = VFS_I(ip);
+ return 0;
}
+STATIC int
+xfs_compat_attrlist_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct inode *parinode)
+{
+ int error;
+ attrlist_cursor_kern_t *cursor;
+ compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+ struct inode *inode;
+ char *kbuf;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&al_hreq, arg,
+ sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+ if (al_hreq.buflen > XATTR_LIST_MAX)
+ return -XFS_ERROR(EINVAL);
+
+ /*
+ * Reject flags, only allow namespaces.
+ */
+ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+ return -XFS_ERROR(EINVAL);
+
+ error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq,
+ &inode);
+ if (error)
+ goto out;
+
+ kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+ if (!kbuf)
+ goto out_vn_rele;
+
+ cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+ error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen,
+ al_hreq.flags, cursor);
+ if (error)
+ goto out_kfree;
+
+ if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+ error = -EFAULT;
+
+ out_kfree:
+ kfree(kbuf);
+ out_vn_rele:
+ iput(inode);
+ out:
+ return -error;
+}
-STATIC long
-xfs_compat_ioctl(
- int mode,
- struct file *file,
- unsigned cmd,
- unsigned long arg)
+STATIC int
+xfs_compat_attrmulti_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct inode *parinode)
+{
+ int error;
+ compat_xfs_attr_multiop_t *ops;
+ compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
+ struct inode *inode;
+ unsigned int i, size;
+ char *attr_name;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&am_hreq, arg,
+ sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq,
+ &inode);
+ if (error)
+ goto out;
+
+ error = E2BIG;
+ size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+ if (!size || size > 16 * PAGE_SIZE)
+ goto out_vn_rele;
+
+ error = ENOMEM;
+ ops = kmalloc(size, GFP_KERNEL);
+ if (!ops)
+ goto out_vn_rele;
+
+ error = EFAULT;
+ if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
+ goto out_kfree_ops;
+
+ attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+ if (!attr_name)
+ goto out_kfree_ops;
+
+
+ error = 0;
+ for (i = 0; i < am_hreq.opcount; i++) {
+ ops[i].am_error = strncpy_from_user(attr_name,
+ compat_ptr(ops[i].am_attrname),
+ MAXNAMELEN);
+ if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+ error = -ERANGE;
+ if (ops[i].am_error < 0)
+ break;
+
+ switch (ops[i].am_opcode) {
+ case ATTR_OP_GET:
+ ops[i].am_error = xfs_attrmulti_attr_get(inode,
+ attr_name,
+ compat_ptr(ops[i].am_attrvalue),
+ &ops[i].am_length, ops[i].am_flags);
+ break;
+ case ATTR_OP_SET:
+ ops[i].am_error = xfs_attrmulti_attr_set(inode,
+ attr_name,
+ compat_ptr(ops[i].am_attrvalue),
+ ops[i].am_length, ops[i].am_flags);
+ break;
+ case ATTR_OP_REMOVE:
+ ops[i].am_error = xfs_attrmulti_attr_remove(inode,
+ attr_name, ops[i].am_flags);
+ break;
+ default:
+ ops[i].am_error = EINVAL;
+ }
+ }
+
+ if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+ error = XFS_ERROR(EFAULT);
+
+ kfree(attr_name);
+ out_kfree_ops:
+ kfree(ops);
+ out_vn_rele:
+ iput(inode);
+ out:
+ return -error;
+}
+
+STATIC int
+xfs_compat_fssetdm_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct inode *parinode)
+{
+ int error;
+ struct fsdmidata fsd;
+ compat_xfs_fsop_setdm_handlereq_t dmhreq;
+ struct inode *inode;
+
+ if (!capable(CAP_MKNOD))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&dmhreq, arg,
+ sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq,
+ &inode);
+ if (error)
+ return -error;
+
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+ error = -XFS_ERROR(EPERM);
+ goto out;
+ }
+
+ if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+ error = -XFS_ERROR(EFAULT);
+ goto out;
+ }
+
+ error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask,
+ fsd.fsd_dmstate);
+
+out:
+ iput(inode);
+ return error;
+}
+
+long
+xfs_file_compat_ioctl(
+ struct file *filp,
+ unsigned cmd,
+ unsigned long p)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- int error;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ void __user *arg = (void __user *)p;
+ int ioflags = 0;
+ int error;
+
+ if (filp->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
+
+ xfs_itrace_entry(ip);
switch (cmd) {
+ /* No size or alignment issues on any arch */
case XFS_IOC_DIOINFO:
case XFS_IOC_FSGEOMETRY:
case XFS_IOC_FSGETXATTR:
@@ -387,48 +628,18 @@ xfs_compat_ioctl(
case XFS_IOC_GETBMAP:
case XFS_IOC_GETBMAPA:
case XFS_IOC_GETBMAPX:
-/* not handled
- case XFS_IOC_FSSETDM_BY_HANDLE:
- case XFS_IOC_ATTRLIST_BY_HANDLE:
- case XFS_IOC_ATTRMULTI_BY_HANDLE:
-*/
case XFS_IOC_FSCOUNTS:
case XFS_IOC_SET_RESBLKS:
case XFS_IOC_GET_RESBLKS:
- case XFS_IOC_FSGROWFSDATA:
case XFS_IOC_FSGROWFSLOG:
- case XFS_IOC_FSGROWFSRT:
case XFS_IOC_FREEZE:
case XFS_IOC_THAW:
case XFS_IOC_GOINGDOWN:
case XFS_IOC_ERROR_INJECTION:
case XFS_IOC_ERROR_CLEARALL:
- break;
-
- case XFS_IOC32_GETXFLAGS:
- case XFS_IOC32_SETXFLAGS:
- case XFS_IOC32_GETVERSION:
- cmd = _NATIVE_IOC(cmd, long);
- break;
-#ifdef BROKEN_X86_ALIGNMENT
- /* xfs_flock_t has wrong u32 vs u64 alignment */
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32:
- case XFS_IOC_RESVSP_32:
- case XFS_IOC_UNRESVSP_32:
- case XFS_IOC_RESVSP64_32:
- case XFS_IOC_UNRESVSP64_32:
- arg = xfs_ioctl32_flock(arg);
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- break;
- case XFS_IOC_FSGEOMETRY_V1_32:
- arg = xfs_ioctl32_geom_v1(arg);
- cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
- break;
-
-#else /* These are handled fine if no alignment issues */
+ return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
+ /* These are handled fine if no alignment issues */
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_RESVSP:
@@ -438,51 +649,97 @@ xfs_compat_ioctl(
case XFS_IOC_RESVSP64:
case XFS_IOC_UNRESVSP64:
case XFS_IOC_FSGEOMETRY_V1:
- break;
+ case XFS_IOC_FSGROWFSDATA:
+ case XFS_IOC_FSGROWFSRT:
+ return xfs_file_ioctl(filp, cmd, p);
+#else
+ case XFS_IOC_ALLOCSP_32:
+ case XFS_IOC_FREESP_32:
+ case XFS_IOC_ALLOCSP64_32:
+ case XFS_IOC_FREESP64_32:
+ case XFS_IOC_RESVSP_32:
+ case XFS_IOC_UNRESVSP_32:
+ case XFS_IOC_RESVSP64_32:
+ case XFS_IOC_UNRESVSP64_32: {
+ struct xfs_flock64 bf;
- /* xfs_bstat_t still has wrong u32 vs u64 alignment */
- case XFS_IOC_SWAPEXT:
- break;
+ if (xfs_compat_flock64_copyin(&bf, arg))
+ return -XFS_ERROR(EFAULT);
+ cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+ return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+ }
+ case XFS_IOC_FSGEOMETRY_V1_32:
+ return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+ case XFS_IOC_FSGROWFSDATA_32: {
+ struct xfs_growfs_data in;
+
+ if (xfs_compat_growfs_data_copyin(&in, arg))
+ return -XFS_ERROR(EFAULT);
+ error = xfs_growfs_data(mp, &in);
+ return -error;
+ }
+ case XFS_IOC_FSGROWFSRT_32: {
+ struct xfs_growfs_rt in;
+ if (xfs_compat_growfs_rt_copyin(&in, arg))
+ return -XFS_ERROR(EFAULT);
+ error = xfs_growfs_rt(mp, &in);
+ return -error;
+ }
#endif
+ /* long changes size, but xfs only copiese out 32 bits */
+ case XFS_IOC_GETXFLAGS_32:
+ case XFS_IOC_SETXFLAGS_32:
+ case XFS_IOC_GETVERSION_32:
+ cmd = _NATIVE_IOC(cmd, long);
+ return xfs_file_ioctl(filp, cmd, p);
+ case XFS_IOC_SWAPEXT: {
+ struct xfs_swapext sxp;
+ struct compat_xfs_swapext __user *sxu = arg;
+
+ /* Bulk copy in up to the sx_stat field, then copy bstat */
+ if (copy_from_user(&sxp, sxu,
+ offsetof(struct xfs_swapext, sx_stat)) ||
+ xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+ return -XFS_ERROR(EFAULT);
+ error = xfs_swapext(&sxp);
+ return -error;
+ }
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
case XFS_IOC_FSINUMBERS_32:
- cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
- return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount,
- cmd, (void __user*)arg);
+ return xfs_compat_ioc_bulkstat(mp, cmd, arg);
case XFS_IOC_FD_TO_HANDLE_32:
case XFS_IOC_PATH_TO_HANDLE_32:
- case XFS_IOC_PATH_TO_FSHANDLE_32:
- case XFS_IOC_OPEN_BY_HANDLE_32:
- case XFS_IOC_READLINK_BY_HANDLE_32:
- arg = xfs_ioctl32_fshandle(arg);
+ case XFS_IOC_PATH_TO_FSHANDLE_32: {
+ struct xfs_fsop_handlereq hreq;
+
+ if (xfs_compat_handlereq_copyin(&hreq, arg))
+ return -XFS_ERROR(EFAULT);
cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
- break;
- default:
- return -ENOIOCTLCMD;
+ return xfs_find_handle(cmd, &hreq);
}
+ case XFS_IOC_OPEN_BY_HANDLE_32: {
+ struct xfs_fsop_handlereq hreq;
- error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg);
- xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
-
- return error;
-}
-
-long
-xfs_file_compat_ioctl(
- struct file *file,
- unsigned cmd,
- unsigned long arg)
-{
- return xfs_compat_ioctl(0, file, cmd, arg);
-}
+ if (xfs_compat_handlereq_copyin(&hreq, arg))
+ return -XFS_ERROR(EFAULT);
+ return xfs_open_by_handle(mp, &hreq, filp, inode);
+ }
+ case XFS_IOC_READLINK_BY_HANDLE_32: {
+ struct xfs_fsop_handlereq hreq;
-long
-xfs_file_compat_invis_ioctl(
- struct file *file,
- unsigned cmd,
- unsigned long arg)
-{
- return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
+ if (xfs_compat_handlereq_copyin(&hreq, arg))
+ return -XFS_ERROR(EFAULT);
+ return xfs_readlink_by_handle(mp, &hreq, inode);
+ }
+ case XFS_IOC_ATTRLIST_BY_HANDLE_32:
+ return xfs_compat_attrlist_by_handle(mp, arg, inode);
+ case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+ return xfs_compat_attrmulti_by_handle(mp, arg, inode);
+ case XFS_IOC_FSSETDM_BY_HANDLE_32:
+ return xfs_compat_fssetdm_by_handle(mp, arg, inode);
+ default:
+ return -XFS_ERROR(ENOIOCTLCMD);
+ }
}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 02de6e62ee37..1024c4f8ba0d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,217 @@
#ifndef __XFS_IOCTL32_H__
#define __XFS_IOCTL32_H__
-extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long);
-extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long);
+#include <linux/compat.h>
+
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment. We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION
+
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+
+typedef struct compat_xfs_bstime {
+ compat_time_t tv_sec; /* seconds */
+ __s32 tv_nsec; /* and nanoseconds */
+} compat_xfs_bstime_t;
+
+typedef struct compat_xfs_bstat {
+ __u64 bs_ino; /* inode number */
+ __u16 bs_mode; /* type and mode */
+ __u16 bs_nlink; /* number of links */
+ __u32 bs_uid; /* user id */
+ __u32 bs_gid; /* group id */
+ __u32 bs_rdev; /* device value */
+ __s32 bs_blksize; /* block size */
+ __s64 bs_size; /* file size */
+ compat_xfs_bstime_t bs_atime; /* access time */
+ compat_xfs_bstime_t bs_mtime; /* modify time */
+ compat_xfs_bstime_t bs_ctime; /* inode change time */
+ int64_t bs_blocks; /* number of blocks */
+ __u32 bs_xflags; /* extended flags */
+ __s32 bs_extsize; /* extent size */
+ __s32 bs_extents; /* number of extents */
+ __u32 bs_gen; /* generation count */
+ __u16 bs_projid; /* project id */
+ unsigned char bs_pad[14]; /* pad space, unused */
+ __u32 bs_dmevmask; /* DMIG event mask */
+ __u16 bs_dmstate; /* DMIG state info */
+ __u16 bs_aextents; /* attribute number of extents */
+} __compat_packed compat_xfs_bstat_t;
+
+typedef struct compat_xfs_fsop_bulkreq {
+ compat_uptr_t lastip; /* last inode # pointer */
+ __s32 icount; /* count of entries in buffer */
+ compat_uptr_t ubuffer; /* user buffer for inode desc. */
+ compat_uptr_t ocount; /* output count pointer */
+} compat_xfs_fsop_bulkreq_t;
+
+#define XFS_IOC_FSBULKSTAT_32 \
+ _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+ _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+ _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+typedef struct compat_xfs_fsop_handlereq {
+ __u32 fd; /* fd for FD_TO_HANDLE */
+ compat_uptr_t path; /* user pathname */
+ __u32 oflags; /* open flags */
+ compat_uptr_t ihandle; /* user supplied handle */
+ __u32 ihandlen; /* user supplied length */
+ compat_uptr_t ohandle; /* user buffer for handle */
+ compat_uptr_t ohandlen; /* user buffer length */
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+ _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+ _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+ _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+ _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+ _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+ __int64_t sx_version; /* version */
+ __int64_t sx_fdtarget; /* fd of target file */
+ __int64_t sx_fdtmp; /* fd of tmp file */
+ xfs_off_t sx_offset; /* offset into file */
+ xfs_off_t sx_length; /* leng from offset */
+ char sx_pad[16]; /* pad space, unused */
+ compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+
+#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
+
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+ struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+ struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */
+ __u32 flags; /* which namespace to use */
+ __u32 buflen; /* length of buffer supplied */
+ compat_uptr_t buffer; /* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+ _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+ __u32 am_opcode;
+ __s32 am_error;
+ compat_uptr_t am_attrname;
+ compat_uptr_t am_attrvalue;
+ __u32 am_length;
+ __u32 am_flags;
+} compat_xfs_attr_multiop_t;
+
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+ struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+ __u32 opcount;/* count of following multiop */
+ /* ptr to compat_xfs_attr_multiop */
+ compat_uptr_t ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+ _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+
+typedef struct compat_xfs_fsop_setdm_handlereq {
+ struct compat_xfs_fsop_handlereq hreq; /* handle information */
+ /* ptr to struct fsdmidata */
+ compat_uptr_t data; /* DMAPI data */
+} compat_xfs_fsop_setdm_handlereq_t;
+
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+ _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+ __s16 l_type;
+ __s16 l_whence;
+ __s64 l_start __attribute__((packed));
+ /* len == 0 means until end of file */
+ __s64 l_len __attribute__((packed));
+ __s32 l_sysid;
+ __u32 l_pid;
+ __s32 l_pad[4]; /* reserve area */
+} compat_xfs_flock64_t;
+
+#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
+
+typedef struct compat_xfs_fsop_geom_v1 {
+ __u32 blocksize; /* filesystem (data) block size */
+ __u32 rtextsize; /* realtime extent size */
+ __u32 agblocks; /* fsblocks in an AG */
+ __u32 agcount; /* number of allocation groups */
+ __u32 logblocks; /* fsblocks in the log */
+ __u32 sectsize; /* (data) sector size, bytes */
+ __u32 inodesize; /* inode size in bytes */
+ __u32 imaxpct; /* max allowed inode space(%) */
+ __u64 datablocks; /* fsblocks in data subvolume */
+ __u64 rtblocks; /* fsblocks in realtime subvol */
+ __u64 rtextents; /* rt extents in realtime subvol*/
+ __u64 logstart; /* starting fsblock of the log */
+ unsigned char uuid[16]; /* unique id of the filesystem */
+ __u32 sunit; /* stripe unit, fsblocks */
+ __u32 swidth; /* stripe width, fsblocks */
+ __s32 version; /* structure version */
+ __u32 flags; /* superblock version flags */
+ __u32 logsectsize; /* log sector size, bytes */
+ __u32 rtsectsize; /* realtime sector size, bytes */
+ __u32 dirblocksize; /* directory block size, bytes */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32 \
+ _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+
+typedef struct compat_xfs_inogrp {
+ __u64 xi_startino; /* starting inode number */
+ __s32 xi_alloccount; /* # bits set in allocmask */
+ __u64 xi_allocmask; /* mask of allocated inodes */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+ __u64 newblocks; /* new data subvol size, fsblocks */
+ __u32 imaxpct; /* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+
+typedef struct compat_xfs_growfs_rt {
+ __u64 newblocks; /* new realtime size, fsblocks */
+ __u32 extsize; /* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt)
+
+#endif /* BROKEN_X86_ALIGNMENT */
#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f3434..7aa53fefc67f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -53,6 +53,7 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/falloc.h>
+#include <linux/fiemap.h>
/*
* Bring the atime in the XFS inode uptodate.
@@ -64,14 +65,14 @@ xfs_synchronize_atime(
{
struct inode *inode = VFS_I(ip);
- if (inode) {
+ if (!(inode->i_state & I_CLEAR)) {
ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
}
}
/*
- * If the linux inode exists, mark it dirty.
+ * If the linux inode is valid, mark it dirty.
* Used when commiting a dirty inode into a transaction so that
* the inode will get written back by the linux code
*/
@@ -81,7 +82,7 @@ xfs_mark_inode_dirty_sync(
{
struct inode *inode = VFS_I(ip);
- if (inode)
+ if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
mark_inode_dirty_sync(inode);
}
@@ -128,7 +129,7 @@ xfs_ichgtime(
if (sync_it) {
SYNCHRONIZE();
ip->i_update_core = 1;
- mark_inode_dirty_sync(inode);
+ xfs_mark_inode_dirty_sync(ip);
}
}
@@ -158,8 +159,6 @@ xfs_init_security(
}
error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
- if (!error)
- xfs_iflags_set(ip, XFS_IMODIFIED);
kfree(name);
kfree(value);
@@ -260,7 +259,6 @@ xfs_vn_mknod(
error = _ACL_INHERIT(inode, mode, default_acl);
if (unlikely(error))
goto out_cleanup_inode;
- xfs_iflags_set(ip, XFS_IMODIFIED);
_ACL_FREE(default_acl);
}
@@ -366,21 +364,17 @@ xfs_vn_link(
struct inode *dir,
struct dentry *dentry)
{
- struct inode *inode; /* inode of guy being linked to */
+ struct inode *inode = old_dentry->d_inode;
struct xfs_name name;
int error;
- inode = old_dentry->d_inode;
xfs_dentry_to_name(&name, dentry);
- igrab(inode);
error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
- if (unlikely(error)) {
- iput(inode);
+ if (unlikely(error))
return -error;
- }
- xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
+ atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
return 0;
}
@@ -601,7 +595,7 @@ xfs_vn_setattr(
struct dentry *dentry,
struct iattr *iattr)
{
- return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
+ return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
}
/*
@@ -642,7 +636,7 @@ xfs_vn_fallocate(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
- 0, NULL, XFS_ATTR_NOLOCK);
+ 0, XFS_ATTR_NOLOCK);
if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode))
new_size = offset + len;
@@ -653,7 +647,7 @@ xfs_vn_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
+ error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -661,6 +655,88 @@ out_error:
return error;
}
+#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+ void **arg,
+ struct getbmapx *bmv,
+ int *full)
+{
+ int error;
+ struct fiemap_extent_info *fieinfo = *arg;
+ u32 fiemap_flags = 0;
+ u64 logical, physical, length;
+
+ /* Do nothing for a hole */
+ if (bmv->bmv_block == -1LL)
+ return 0;
+
+ logical = BBTOB(bmv->bmv_offset);
+ physical = BBTOB(bmv->bmv_block);
+ length = BBTOB(bmv->bmv_length);
+
+ if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+ fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+ else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+ fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+ physical = 0; /* no block yet */
+ }
+ if (bmv->bmv_oflags & BMV_OF_LAST)
+ fiemap_flags |= FIEMAP_EXTENT_LAST;
+
+ error = fiemap_fill_next_extent(fieinfo, logical, physical,
+ length, fiemap_flags);
+ if (error > 0) {
+ error = 0;
+ *full = 1; /* user array now full */
+ }
+
+ return -error;
+}
+
+STATIC int
+xfs_vn_fiemap(
+ struct inode *inode,
+ struct fiemap_extent_info *fieinfo,
+ u64 start,
+ u64 length)
+{
+ xfs_inode_t *ip = XFS_I(inode);
+ struct getbmapx bm;
+ int error;
+
+ error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+ if (error)
+ return error;
+
+ /* Set up bmap header for xfs internal routine */
+ bm.bmv_offset = BTOBB(start);
+ /* Special case for whole file */
+ if (length == FIEMAP_MAX_OFFSET)
+ bm.bmv_length = -1LL;
+ else
+ bm.bmv_length = BTOBB(length);
+
+ /* our formatter will tell xfs_getbmap when to stop. */
+ bm.bmv_count = MAXEXTNUM;
+ bm.bmv_iflags = BMV_IF_PREALLOC;
+ if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+ bm.bmv_iflags |= BMV_IF_ATTRFORK;
+ if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+ bm.bmv_iflags |= BMV_IF_DELALLOC;
+
+ error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+ if (error)
+ return -error;
+
+ return 0;
+}
+
static const struct inode_operations xfs_inode_operations = {
.permission = xfs_vn_permission,
.truncate = xfs_vn_truncate,
@@ -671,6 +747,7 @@ static const struct inode_operations xfs_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.fallocate = xfs_vn_fallocate,
+ .fiemap = xfs_vn_fiemap,
};
static const struct inode_operations xfs_dir_inode_operations = {
@@ -766,12 +843,20 @@ xfs_diflags_to_iflags(
* When reading existing inodes from disk this is called directly
* from xfs_iget, when creating a new inode it is called from
* xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
*/
void
xfs_setup_inode(
struct xfs_inode *ip)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = &ip->i_vnode;
+
+ inode->i_ino = ip->i_ino;
+ inode->i_state = I_NEW|I_LOCK;
+ inode_add_to_lists(ip->i_mount->m_super, inode);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
@@ -799,7 +884,6 @@ xfs_setup_inode(
inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
xfs_diflags_to_iflags(inode, ip);
- xfs_iflags_clear(ip, XFS_IMODIFIED);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b1a1e31dc21..ef41c92ce66e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,7 +22,6 @@ struct xfs_inode;
extern const struct file_operations xfs_file_operations;
extern const struct file_operations xfs_dir_file_operations;
-extern const struct file_operations xfs_invis_file_operations;
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a9795..507492d6dccd 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -21,18 +21,12 @@
#include <linux/types.h>
/*
- * Some types are conditional depending on the target system.
* XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
- * as requiring XFS_BIG_BLKNOS to be set.
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
*/
#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
# define XFS_BIG_BLKNOS 1
-# if BITS_PER_LONG == 64
-# define XFS_BIG_INUMS 1
-# else
-# define XFS_BIG_INUMS 0
-# endif
+# define XFS_BIG_INUMS 1
#else
# define XFS_BIG_BLKNOS 0
# define XFS_BIG_INUMS 0
@@ -77,6 +71,7 @@
#include <linux/spinlock.h>
#include <linux/random.h>
#include <linux/ctype.h>
+#include <linux/writeback.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -85,7 +80,6 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include <xfs_vfs.h>
#include <xfs_cred.h>
#include <xfs_vnode.h>
#include <xfs_stats.h>
@@ -107,7 +101,6 @@
#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#endif
-#define restricted_chown xfs_params.restrict_chown.val
#define irix_sgid_inherit xfs_params.sgid_inherit.val
#define irix_symlink_mode xfs_params.symlink_mode.val
#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1957e5357d04..7e90daa0d1d1 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,7 +51,6 @@
#include "xfs_vnodeops.h"
#include <linux/capability.h>
-#include <linux/mount.h>
#include <linux/writeback.h>
@@ -243,7 +242,7 @@ xfs_read(
if (unlikely(ioflags & IO_ISDIRECT)) {
if (inode->i_mapping->nrpages)
- ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
+ ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
mutex_unlock(&inode->i_mutex);
if (ret) {
@@ -668,15 +667,8 @@ start:
if (new_size > xip->i_size)
xip->i_new_size = new_size;
- /*
- * We're not supposed to change timestamps in readonly-mounted
- * filesystems. Throw it away if anyone asks us.
- */
- if (likely(!(ioflags & IO_INVIS) &&
- !mnt_want_write(file->f_path.mnt))) {
+ if (likely(!(ioflags & IO_INVIS)))
xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- mnt_drop_write(file->f_path.mnt);
- }
/*
* If the offset is beyond the size of the file, we have a couple
@@ -715,7 +707,6 @@ start:
}
}
-retry:
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
@@ -771,6 +762,17 @@ retry:
if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
+ isize = i_size_read(inode);
+ if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+ *offset = isize;
+
+ if (*offset > xip->i_size) {
+ xfs_ilock(xip, XFS_ILOCK_EXCL);
+ if (*offset > xip->i_size)
+ xip->i_size = *offset;
+ xfs_iunlock(xip, XFS_ILOCK_EXCL);
+ }
+
if (ret == -ENOSPC &&
DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
xfs_iunlock(xip, iolock);
@@ -784,20 +786,7 @@ retry:
xfs_ilock(xip, iolock);
if (error)
goto out_unlock_internal;
- pos = xip->i_size;
- ret = 0;
- goto retry;
- }
-
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
- *offset = isize;
-
- if (*offset > xip->i_size) {
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- if (*offset > xip->i_size)
- xip->i_size = *offset;
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
+ goto start;
}
error = -ret;
@@ -855,13 +844,7 @@ retry:
int
xfs_bdstrat_cb(struct xfs_buf *bp)
{
- xfs_mount_t *mp;
-
- mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_buf_iorequest(bp);
- return 0;
- } else {
+ if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
/*
* Metadata write that didn't get logged but
@@ -874,6 +857,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
else
return (xfs_bioerror(bp));
}
+
+ xfs_buf_iorequest(bp);
+ return 0;
}
/*
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c7..c3526d445f6a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,11 +53,15 @@ xfs_read_xfsstats(
{ "icluster", XFSSTAT_END_INODE_CLUSTER },
{ "vnodes", XFSSTAT_END_VNODE_OPS },
{ "buf", XFSSTAT_END_BUF },
+ { "abtb2", XFSSTAT_END_ABTB_V2 },
+ { "abtc2", XFSSTAT_END_ABTC_V2 },
+ { "bmbt2", XFSSTAT_END_BMBT_V2 },
+ { "ibt2", XFSSTAT_END_IBT_V2 },
};
/* Loop over all stats groups */
for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
- len += sprintf(buffer + len, xstats[i].desc);
+ len += sprintf(buffer + len, "%s", xstats[i].desc);
/* inner loop does each group */
while (j < xstats[i].endpoint) {
val = 0;
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9f..736854b1ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
__uint32_t xb_page_retries;
__uint32_t xb_page_found;
__uint32_t xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
+ __uint32_t xs_abtb_2_lookup;
+ __uint32_t xs_abtb_2_compare;
+ __uint32_t xs_abtb_2_insrec;
+ __uint32_t xs_abtb_2_delrec;
+ __uint32_t xs_abtb_2_newroot;
+ __uint32_t xs_abtb_2_killroot;
+ __uint32_t xs_abtb_2_increment;
+ __uint32_t xs_abtb_2_decrement;
+ __uint32_t xs_abtb_2_lshift;
+ __uint32_t xs_abtb_2_rshift;
+ __uint32_t xs_abtb_2_split;
+ __uint32_t xs_abtb_2_join;
+ __uint32_t xs_abtb_2_alloc;
+ __uint32_t xs_abtb_2_free;
+ __uint32_t xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
+ __uint32_t xs_abtc_2_lookup;
+ __uint32_t xs_abtc_2_compare;
+ __uint32_t xs_abtc_2_insrec;
+ __uint32_t xs_abtc_2_delrec;
+ __uint32_t xs_abtc_2_newroot;
+ __uint32_t xs_abtc_2_killroot;
+ __uint32_t xs_abtc_2_increment;
+ __uint32_t xs_abtc_2_decrement;
+ __uint32_t xs_abtc_2_lshift;
+ __uint32_t xs_abtc_2_rshift;
+ __uint32_t xs_abtc_2_split;
+ __uint32_t xs_abtc_2_join;
+ __uint32_t xs_abtc_2_alloc;
+ __uint32_t xs_abtc_2_free;
+ __uint32_t xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
+ __uint32_t xs_bmbt_2_lookup;
+ __uint32_t xs_bmbt_2_compare;
+ __uint32_t xs_bmbt_2_insrec;
+ __uint32_t xs_bmbt_2_delrec;
+ __uint32_t xs_bmbt_2_newroot;
+ __uint32_t xs_bmbt_2_killroot;
+ __uint32_t xs_bmbt_2_increment;
+ __uint32_t xs_bmbt_2_decrement;
+ __uint32_t xs_bmbt_2_lshift;
+ __uint32_t xs_bmbt_2_rshift;
+ __uint32_t xs_bmbt_2_split;
+ __uint32_t xs_bmbt_2_join;
+ __uint32_t xs_bmbt_2_alloc;
+ __uint32_t xs_bmbt_2_free;
+ __uint32_t xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
+ __uint32_t xs_ibt_2_lookup;
+ __uint32_t xs_ibt_2_compare;
+ __uint32_t xs_ibt_2_insrec;
+ __uint32_t xs_ibt_2_delrec;
+ __uint32_t xs_ibt_2_newroot;
+ __uint32_t xs_ibt_2_killroot;
+ __uint32_t xs_ibt_2_increment;
+ __uint32_t xs_ibt_2_decrement;
+ __uint32_t xs_ibt_2_lshift;
+ __uint32_t xs_ibt_2_rshift;
+ __uint32_t xs_ibt_2_split;
+ __uint32_t xs_ibt_2_join;
+ __uint32_t xs_ibt_2_alloc;
+ __uint32_t xs_ibt_2_free;
+ __uint32_t xs_ibt_2_moves;
/* Extra precision counters */
__uint64_t xs_xstrat_bytes;
__uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056eb..36f6cc703ef2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
#include "xfs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
-#include "xfs_clnt.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
@@ -36,6 +35,7 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
@@ -48,7 +48,6 @@
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_vnodeops.h"
-#include "xfs_vfsops.h"
#include "xfs_version.h"
#include "xfs_log_priv.h"
#include "xfs_trans_priv.h"
@@ -58,6 +57,7 @@
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
#include "xfs_inode_item.h"
+#include "xfs_sync.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -70,36 +70,9 @@
static struct quotactl_ops xfs_quotactl_operations;
static struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_vnode_zone;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
-STATIC struct xfs_mount_args *
-xfs_args_allocate(
- struct super_block *sb,
- int silent)
-{
- struct xfs_mount_args *args;
-
- args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
- if (!args)
- return NULL;
-
- args->logbufs = args->logbufsize = -1;
- strncpy(args->fsname, sb->s_id, MAXNAMELEN);
-
- /* Copy the already-parsed mount(2) flags we're interested in */
- if (sb->s_flags & MS_DIRSYNC)
- args->flags |= XFSMNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
- args->flags |= XFSMNT_WSYNC;
- if (silent)
- args->flags |= XFSMNT_QUIET;
- args->flags |= XFSMNT_32BITINODES;
-
- return args;
-}
-
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
#define MNTOPT_LOGDEV "logdev" /* log device */
@@ -188,26 +161,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
}
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure. The caller takes care of them.
+ */
STATIC int
xfs_parseargs(
struct xfs_mount *mp,
char *options,
- struct xfs_mount_args *args,
- int update)
+ char **mtpt)
{
+ struct super_block *sb = mp->m_super;
char *this_char, *value, *eov;
- int dsunit, dswidth, vol_dsunit, vol_dswidth;
- int iosize;
+ int dsunit = 0;
+ int dswidth = 0;
+ int iosize = 0;
int dmapi_implies_ikeep = 1;
+ uchar_t iosizelog = 0;
+
+ /*
+ * Copy binary VFS mount flags we are interested in.
+ */
+ if (sb->s_flags & MS_RDONLY)
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+ if (sb->s_flags & MS_DIRSYNC)
+ mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+
+ /*
+ * Set some default flags that could be cleared by the mount option
+ * parsing.
+ */
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- args->flags |= XFSMNT_BARRIER;
- args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+ /*
+ * These can be overridden by the mount option parsing.
+ */
+ mp->m_logbufs = -1;
+ mp->m_logbsize = -1;
if (!options)
goto done;
- iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
-
while ((this_char = strsep(&options, ",")) != NULL) {
if (!*this_char)
continue;
@@ -221,7 +222,7 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- args->logbufs = simple_strtoul(value, &eov, 10);
+ mp->m_logbufs = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -229,7 +230,7 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- args->logbufsize = suffix_strtoul(value, &eov, 10);
+ mp->m_logbsize = suffix_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -237,7 +238,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->logname, value, MAXNAMELEN);
+ mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_logname)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -245,7 +248,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->mtpt, value, MAXNAMELEN);
+ *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!*mtpt)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -253,7 +258,9 @@ xfs_parseargs(
this_char);
return EINVAL;
}
- strncpy(args->rtname, value, MAXNAMELEN);
+ mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_rtname)
+ return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -262,8 +269,7 @@ xfs_parseargs(
return EINVAL;
}
iosize = simple_strtoul(value, &eov, 10);
- args->flags |= XFSMNT_IOSIZE;
- args->iosizelog = (uint8_t) iosize;
+ iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -272,8 +278,7 @@ xfs_parseargs(
return EINVAL;
}
iosize = suffix_strtoul(value, &eov, 10);
- args->flags |= XFSMNT_IOSIZE;
- args->iosizelog = ffs(iosize) - 1;
+ iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +286,25 @@ xfs_parseargs(
!strcmp(this_char, MNTOPT_SYSVGROUPS)) {
mp->m_flags &= ~XFS_MOUNT_GRPID;
} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
- args->flags |= XFSMNT_WSYNC;
+ mp->m_flags |= XFS_MOUNT_WSYNC;
} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
- args->flags |= XFSMNT_OSYNCISOSYNC;
+ mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
- args->flags |= XFSMNT_NORECOVERY;
+ mp->m_flags |= XFS_MOUNT_NORECOVERY;
} else if (!strcmp(this_char, MNTOPT_INO64)) {
- args->flags |= XFSMNT_INO64;
-#if !XFS_BIG_INUMS
+#if XFS_BIG_INUMS
+ mp->m_flags |= XFS_MOUNT_INO64;
+ mp->m_inoadd = XFS_INO64_OFFSET;
+#else
cmn_err(CE_WARN,
"XFS: %s option not allowed on this system",
this_char);
return EINVAL;
#endif
} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
- args->flags |= XFSMNT_NOALIGN;
+ mp->m_flags |= XFS_MOUNT_NOALIGN;
} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
- args->flags |= XFSMNT_SWALLOC;
+ mp->m_flags |= XFS_MOUNT_SWALLOC;
} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -315,7 +322,7 @@ xfs_parseargs(
}
dswidth = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
- args->flags &= ~XFSMNT_32BITINODES;
+ mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
#if !XFS_BIG_INUMS
cmn_err(CE_WARN,
"XFS: %s option not allowed on this system",
@@ -323,56 +330,61 @@ xfs_parseargs(
return EINVAL;
#endif
} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
- args->flags |= XFSMNT_NOUUID;
+ mp->m_flags |= XFS_MOUNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
- args->flags |= XFSMNT_BARRIER;
+ mp->m_flags |= XFS_MOUNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
- args->flags &= ~XFSMNT_BARRIER;
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
- args->flags |= XFSMNT_IKEEP;
+ mp->m_flags |= XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
dmapi_implies_ikeep = 0;
- args->flags &= ~XFSMNT_IKEEP;
+ mp->m_flags &= ~XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
- args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+ mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
- args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
- args->flags |= XFSMNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
- args->flags &= ~XFSMNT_ATTR2;
- args->flags |= XFSMNT_NOATTR2;
+ mp->m_flags &= ~XFS_MOUNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
- args->flags2 |= XFSMNT2_FILESTREAMS;
+ mp->m_flags |= XFS_MOUNT_FILESTREAMS;
} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
- args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
- args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+ mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+ XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+ XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+ XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
!strcmp(this_char, MNTOPT_UQUOTA) ||
!strcmp(this_char, MNTOPT_USRQUOTA)) {
- args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+ XFS_UQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
!strcmp(this_char, MNTOPT_UQUOTANOENF)) {
- args->flags |= XFSMNT_UQUOTA;
- args->flags &= ~XFSMNT_UQUOTAENF;
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_UQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
!strcmp(this_char, MNTOPT_PRJQUOTA)) {
- args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+ XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
- args->flags |= XFSMNT_PQUOTA;
- args->flags &= ~XFSMNT_PQUOTAENF;
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
!strcmp(this_char, MNTOPT_GRPQUOTA)) {
- args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+ XFS_OQUOTA_ENFD);
} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
- args->flags |= XFSMNT_GQUOTA;
- args->flags &= ~XFSMNT_GQUOTAENF;
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DMAPI)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_XDSM)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DMI)) {
- args->flags |= XFSMNT_DMAPI;
+ mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, "ihashsize")) {
cmn_err(CE_WARN,
"XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +402,29 @@ xfs_parseargs(
}
}
- if (args->flags & XFSMNT_NORECOVERY) {
- if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
- cmn_err(CE_WARN,
- "XFS: no-recovery mounts must be read-only.");
- return EINVAL;
- }
+ /*
+ * no recovery flag requires a read-only mount
+ */
+ if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+ !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
+ return EINVAL;
}
- if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+ if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
cmn_err(CE_WARN,
"XFS: sunit and swidth options incompatible with the noalign option");
return EINVAL;
}
- if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+ if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+ (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
cmn_err(CE_WARN,
"XFS: cannot mount with both project and group quota");
return EINVAL;
}
- if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+ if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
printk("XFS: %s option needs the mount point option as well\n",
MNTOPT_DMAPI);
return EINVAL;
@@ -438,27 +452,66 @@ xfs_parseargs(
* Note that if "ikeep" or "noikeep" mount options are
* supplied, then they are honored.
*/
- if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep)
- args->flags |= XFSMNT_IKEEP;
+ if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
+ mp->m_flags |= XFS_MOUNT_IKEEP;
- if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+done:
+ if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+ /*
+ * At this point the superblock has not been read
+ * in, therefore we do not know the block size.
+ * Before the mount call ends we will convert
+ * these to FSBs.
+ */
if (dsunit) {
- args->sunit = dsunit;
- args->flags |= XFSMNT_RETERR;
- } else {
- args->sunit = vol_dsunit;
+ mp->m_dalign = dsunit;
+ mp->m_flags |= XFS_MOUNT_RETERR;
}
- dswidth ? (args->swidth = dswidth) :
- (args->swidth = vol_dswidth);
- } else {
- args->sunit = args->swidth = 0;
+
+ if (dswidth)
+ mp->m_swidth = dswidth;
+ }
+
+ if (mp->m_logbufs != -1 &&
+ mp->m_logbufs != 0 &&
+ (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+ mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufs value: %d [not %d-%d]",
+ mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return XFS_ERROR(EINVAL);
+ }
+ if (mp->m_logbsize != -1 &&
+ mp->m_logbsize != 0 &&
+ (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+ mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(mp->m_logbsize))) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ mp->m_logbsize);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+ if (!mp->m_fsname)
+ return ENOMEM;
+ mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+ if (iosizelog) {
+ if (iosizelog > XFS_MAX_IO_LOG ||
+ iosizelog < XFS_MIN_IO_LOG) {
+ cmn_err(CE_WARN,
+ "XFS: invalid log iosize: %d [not %d-%d]",
+ iosizelog, XFS_MIN_IO_LOG,
+ XFS_MAX_IO_LOG);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+ mp->m_readio_log = iosizelog;
+ mp->m_writeio_log = iosizelog;
}
-done:
- if (args->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- if (args->flags2)
- args->flags |= XFSMNT_FLAGS2;
return 0;
}
@@ -704,8 +757,7 @@ xfs_close_devices(
*/
STATIC int
xfs_open_devices(
- struct xfs_mount *mp,
- struct xfs_mount_args *args)
+ struct xfs_mount *mp)
{
struct block_device *ddev = mp->m_super->s_bdev;
struct block_device *logdev = NULL, *rtdev = NULL;
@@ -714,14 +766,14 @@ xfs_open_devices(
/*
* Open real time and log devices - order is important.
*/
- if (args->logname[0]) {
- error = xfs_blkdev_get(mp, args->logname, &logdev);
+ if (mp->m_logname) {
+ error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error)
goto out;
}
- if (args->rtname[0]) {
- error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+ if (mp->m_rtname) {
+ error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
if (error)
goto out_close_logdev;
@@ -813,18 +865,18 @@ xfs_setup_devices(
*/
void
xfsaild_wakeup(
- xfs_mount_t *mp,
+ struct xfs_ail *ailp,
xfs_lsn_t threshold_lsn)
{
- mp->m_ail.xa_target = threshold_lsn;
- wake_up_process(mp->m_ail.xa_task);
+ ailp->xa_target = threshold_lsn;
+ wake_up_process(ailp->xa_task);
}
int
xfsaild(
void *data)
{
- xfs_mount_t *mp = (xfs_mount_t *)data;
+ struct xfs_ail *ailp = data;
xfs_lsn_t last_pushed_lsn = 0;
long tout = 0;
@@ -836,11 +888,11 @@ xfsaild(
/* swsusp */
try_to_freeze();
- ASSERT(mp->m_log);
- if (XFS_FORCED_SHUTDOWN(mp))
+ ASSERT(ailp->xa_mount->m_log);
+ if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
continue;
- tout = xfsaild_push(mp, &last_pushed_lsn);
+ tout = xfsaild_push(ailp, &last_pushed_lsn);
}
return 0;
@@ -848,43 +900,82 @@ xfsaild(
int
xfsaild_start(
- xfs_mount_t *mp)
+ struct xfs_ail *ailp)
{
- mp->m_ail.xa_target = 0;
- mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
- if (IS_ERR(mp->m_ail.xa_task))
- return -PTR_ERR(mp->m_ail.xa_task);
+ ailp->xa_target = 0;
+ ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+ if (IS_ERR(ailp->xa_task))
+ return -PTR_ERR(ailp->xa_task);
return 0;
}
void
xfsaild_stop(
- xfs_mount_t *mp)
+ struct xfs_ail *ailp)
{
- kthread_stop(mp->m_ail.xa_task);
+ kthread_stop(ailp->xa_task);
}
-
+/* Catch misguided souls that try to use this interface on XFS */
STATIC struct inode *
xfs_fs_alloc_inode(
struct super_block *sb)
{
- return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
+ BUG();
+ return NULL;
}
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
STATIC void
xfs_fs_destroy_inode(
- struct inode *inode)
+ struct inode *inode)
{
- kmem_zone_free(xfs_vnode_zone, inode);
+ xfs_inode_t *ip = XFS_I(inode);
+
+ XFS_STATS_INC(vn_reclaim);
+ if (xfs_reclaim(ip))
+ panic("%s: cannot reclaim 0x%p\n", __func__, inode);
}
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly intialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
STATIC void
xfs_fs_inode_init_once(
- void *vnode)
+ void *inode)
{
- inode_init_once((struct inode *)vnode);
+ struct xfs_inode *ip = inode;
+
+ memset(ip, 0, sizeof(struct xfs_inode));
+
+ /* vfs inode */
+ inode_init_once(VFS_I(ip));
+
+ /* xfs inode */
+ atomic_set(&ip->i_iocount, 0);
+ atomic_set(&ip->i_pincount, 0);
+ spin_lock_init(&ip->i_flags_lock);
+ init_waitqueue_head(&ip->i_ipin_wait);
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&ip->i_flush);
+ complete(&ip->i_flush);
+
+ mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+ "xfsino", ip->i_ino);
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
}
/*
@@ -898,21 +989,26 @@ xfs_fs_write_inode(
struct inode *inode,
int sync)
{
+ struct xfs_inode *ip = XFS_I(inode);
int error = 0;
int flags = 0;
- xfs_itrace_entry(XFS_I(inode));
+ xfs_itrace_entry(ip);
if (sync) {
- filemap_fdatawait(inode->i_mapping);
+ error = xfs_wait_on_pages(ip, 0, -1);
+ if (error)
+ goto out_error;
flags |= FLUSH_SYNC;
}
- error = xfs_inode_flush(XFS_I(inode), flags);
+ error = xfs_inode_flush(ip, flags);
+
+out_error:
/*
* if we failed to write out the inode then mark
* it dirty again so we'll try again later.
*/
if (error)
- mark_inode_dirty_sync(inode);
+ xfs_mark_inode_dirty_sync(ip);
return -error;
}
@@ -923,164 +1019,12 @@ xfs_fs_clear_inode(
{
xfs_inode_t *ip = XFS_I(inode);
- /*
- * ip can be null when xfs_iget_core calls xfs_idestroy if we
- * find an inode with di_mode == 0 but without IGET_CREATE set.
- */
- if (ip) {
- xfs_itrace_entry(ip);
- XFS_STATS_INC(vn_rele);
- XFS_STATS_INC(vn_remove);
- XFS_STATS_INC(vn_reclaim);
- XFS_STATS_DEC(vn_active);
-
- xfs_inactive(ip);
- xfs_iflags_clear(ip, XFS_IMODIFIED);
- if (xfs_reclaim(ip))
- panic("%s: cannot reclaim 0x%p\n", __func__, inode);
- }
-
- ASSERT(XFS_I(inode) == NULL);
-}
+ xfs_itrace_entry(ip);
+ XFS_STATS_INC(vn_rele);
+ XFS_STATS_INC(vn_remove);
+ XFS_STATS_DEC(vn_active);
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
- struct xfs_mount *mp,
- void *data,
- void (*syncer)(struct xfs_mount *, void *))
-{
- struct bhv_vfs_sync_work *work;
-
- work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
- INIT_LIST_HEAD(&work->w_list);
- work->w_syncer = syncer;
- work->w_data = data;
- work->w_mount = mp;
- spin_lock(&mp->m_sync_lock);
- list_add_tail(&work->w_list, &mp->m_sync_list);
- spin_unlock(&mp->m_sync_lock);
- wake_up_process(mp->m_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations. At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inode_work(
- struct xfs_mount *mp,
- void *arg)
-{
- struct inode *inode = arg;
- filemap_flush(inode->i_mapping);
- iput(inode);
-}
-
-void
-xfs_flush_inode(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
- delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
- struct xfs_mount *mp,
- void *arg)
-{
- struct inode *inode = arg;
- sync_blockdev(mp->m_super->s_bdev);
- iput(inode);
-}
-
-void
-xfs_flush_device(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
- delay(msecs_to_jiffies(500));
- xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
-STATIC void
-xfs_sync_worker(
- struct xfs_mount *mp,
- void *unused)
-{
- int error;
-
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
- mp->m_sync_seq++;
- wake_up(&mp->m_wait_single_sync_task);
-}
-
-STATIC int
-xfssyncd(
- void *arg)
-{
- struct xfs_mount *mp = arg;
- long timeleft;
- bhv_vfs_sync_work_t *work, *n;
- LIST_HEAD (tmp);
-
- set_freezable();
- timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
- for (;;) {
- timeleft = schedule_timeout_interruptible(timeleft);
- /* swsusp */
- try_to_freeze();
- if (kthread_should_stop() && list_empty(&mp->m_sync_list))
- break;
-
- spin_lock(&mp->m_sync_lock);
- /*
- * We can get woken by laptop mode, to do a sync -
- * that's the (only!) case where the list would be
- * empty with time remaining.
- */
- if (!timeleft || list_empty(&mp->m_sync_list)) {
- if (!timeleft)
- timeleft = xfs_syncd_centisecs *
- msecs_to_jiffies(10);
- INIT_LIST_HEAD(&mp->m_sync_work.w_list);
- list_add_tail(&mp->m_sync_work.w_list,
- &mp->m_sync_list);
- }
- list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
- list_move(&work->w_list, &tmp);
- spin_unlock(&mp->m_sync_lock);
-
- list_for_each_entry_safe(work, n, &tmp, w_list) {
- (*work->w_syncer)(mp, work->w_data);
- list_del(&work->w_list);
- if (work == &mp->m_sync_work)
- continue;
- kmem_free(work);
- }
- }
-
- return 0;
+ xfs_inactive(ip);
}
STATIC void
@@ -1099,11 +1043,9 @@ xfs_fs_put_super(
struct xfs_mount *mp = XFS_M(sb);
struct xfs_inode *rip = mp->m_rootip;
int unmount_event_flags = 0;
- int error;
- kthread_stop(mp->m_sync_task);
-
- xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
+ xfs_syncd_stop(mp);
+ xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
#ifdef HAVE_DMAPI
if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1128,18 +1070,6 @@ xfs_fs_put_super(
xfs_filestream_unmount(mp);
XFS_bflush(mp->m_ddev_targp);
- error = xfs_unmount_flush(mp, 0);
- WARN_ON(error);
-
- /*
- * If we're forcing a shutdown, typically because of a media error,
- * we want to make sure we invalidate dirty pages that belong to
- * referenced vnodes as well.
- */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
- ASSERT(error != EFSCORRUPTED);
- }
if (mp->m_flags & XFS_MOUNT_DMAPI) {
XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
struct super_block *sb)
{
if (!(sb->s_flags & MS_RDONLY))
- xfs_sync(XFS_M(sb), SYNC_FSDATA);
+ xfs_sync_fsdata(XFS_M(sb), 0);
sb->s_dirt = 0;
}
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
{
struct xfs_mount *mp = XFS_M(sb);
int error;
- int flags;
/*
* Treat a sync operation like a freeze. This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
* dirty the Linux inode until after the transaction I/O
* completes.
*/
- if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
- /*
- * First stage of freeze - no more writers will make progress
- * now we are here, so we flush delwri and delalloc buffers
- * here, then wait for all I/O to complete. Data is frozen at
- * that point. Metadata is not frozen, transactions can still
- * occur here so don't bother flushing the buftarg (i.e
- * SYNC_QUIESCE) because it'll just get dirty again.
- */
- flags = SYNC_DATA_QUIESCE;
- } else
- flags = SYNC_FSDATA;
-
- error = xfs_sync(mp, flags);
+ if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
+ error = xfs_quiesce_data(mp);
+ else
+ error = xfs_sync_fsdata(mp, 0);
sb->s_dirt = 0;
if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
/* rw -> ro */
if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
- xfs_filestream_flush(mp);
- xfs_sync(mp, SYNC_DATA_QUIESCE);
- xfs_attr_quiesce(mp);
+ xfs_quiesce_data(mp);
+ xfs_quiesce_attr(mp);
mp->m_flags |= XFS_MOUNT_RDONLY;
}
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
/*
* Second stage of a freeze. The data is already frozen so we only
- * need to take care of themetadata. Once that's done write a dummy
+ * need to take care of the metadata. Once that's done write a dummy
* record to dirty the log in case of a crash while frozen.
*/
STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_attr_quiesce(mp);
+ xfs_quiesce_attr(mp);
xfs_fs_log_dummy(mp);
}
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
/*
* This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- int error;
-
- /* Values are in BBs */
- if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = ap->sunit;
- mp->m_swidth = ap->swidth;
- }
-
- if (ap->logbufs != -1 &&
- ap->logbufs != 0 &&
- (ap->logbufs < XLOG_MIN_ICLOGS ||
- ap->logbufs > XLOG_MAX_ICLOGS)) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufs value: %d [not %d-%d]",
- ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbufs = ap->logbufs;
- if (ap->logbufsize != -1 &&
- ap->logbufsize != 0 &&
- (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
- ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(ap->logbufsize))) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- ap->logbufsize);
- return XFS_ERROR(EINVAL);
- }
-
- error = ENOMEM;
-
- mp->m_logbsize = ap->logbufsize;
- mp->m_fsname_len = strlen(ap->fsname) + 1;
-
- mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
- if (!mp->m_fsname)
- goto out;
-
- if (ap->rtname[0]) {
- mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
- if (!mp->m_rtname)
- goto out_free_fsname;
-
- }
-
- if (ap->logname[0]) {
- mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
- if (!mp->m_logname)
- goto out_free_rtname;
- }
-
- if (ap->flags & XFSMNT_WSYNC)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
- if (ap->flags & XFSMNT_INO64) {
- mp->m_flags |= XFS_MOUNT_INO64;
- mp->m_inoadd = XFS_INO64_OFFSET;
- }
-#endif
- if (ap->flags & XFSMNT_RETERR)
- mp->m_flags |= XFS_MOUNT_RETERR;
- if (ap->flags & XFSMNT_NOALIGN)
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- if (ap->flags & XFSMNT_SWALLOC)
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- if (ap->flags & XFSMNT_OSYNCISOSYNC)
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
- if (ap->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
-
- if (ap->flags & XFSMNT_IOSIZE) {
- if (ap->iosizelog > XFS_MAX_IO_LOG ||
- ap->iosizelog < XFS_MIN_IO_LOG) {
- cmn_err(CE_WARN,
- "XFS: invalid log iosize: %d [not %d-%d]",
- ap->iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
- }
-
- if (ap->flags & XFSMNT_IKEEP)
- mp->m_flags |= XFS_MOUNT_IKEEP;
- if (ap->flags & XFSMNT_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (ap->flags & XFSMNT_ATTR2)
- mp->m_flags |= XFS_MOUNT_ATTR2;
- if (ap->flags & XFSMNT_NOATTR2)
- mp->m_flags |= XFS_MOUNT_NOATTR2;
-
- if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * no recovery flag requires a read-only mount
- */
- if (ap->flags & XFSMNT_NORECOVERY) {
- if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- cmn_err(CE_WARN,
- "XFS: tried to mount a FS read-write without recovery!");
- return XFS_ERROR(EINVAL);
- }
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- }
-
- if (ap->flags & XFSMNT_NOUUID)
- mp->m_flags |= XFS_MOUNT_NOUUID;
- if (ap->flags & XFSMNT_BARRIER)
- mp->m_flags |= XFS_MOUNT_BARRIER;
- else
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
- if (ap->flags2 & XFSMNT2_FILESTREAMS)
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
- if (ap->flags & XFSMNT_DMAPI)
- mp->m_flags |= XFS_MOUNT_DMAPI;
- return 0;
-
-
- out_free_rtname:
- kfree(mp->m_rtname);
- out_free_fsname:
- kfree(mp->m_fsname);
- out:
- return error;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
* Note: the superblock _has_ now been read in.
*/
STATIC int
xfs_finish_flags(
- struct xfs_mount_args *ap,
struct xfs_mount *mp)
{
int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
/* Fail a mount where the logbuf is smaller then the log stripe */
if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- if ((ap->logbufsize <= 0) &&
- (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+ if (mp->m_logbsize <= 0 &&
+ mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (ap->logbufsize > 0 &&
- ap->logbufsize < mp->m_sb.sb_logsunit) {
+ } else if (mp->m_logbsize > 0 &&
+ mp->m_logbsize < mp->m_sb.sb_logsunit) {
cmn_err(CE_WARN,
"XFS: logbuf size must be greater than or equal to log stripe size");
return XFS_ERROR(EINVAL);
}
} else {
/* Fail a mount if the logbuf is larger than 32K */
- if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
cmn_err(CE_WARN,
"XFS: logbuf size for version 1 logs must be 16K or 32K");
return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
* told by noattr2 to turn it off
*/
if (xfs_sb_version_hasattr2(&mp->m_sb) &&
- !(ap->flags & XFSMNT_NOATTR2))
+ !(mp->m_flags & XFS_MOUNT_NOATTR2))
mp->m_flags |= XFS_MOUNT_ATTR2;
/*
@@ -1614,48 +1385,6 @@ xfs_finish_flags(
return XFS_ERROR(EROFS);
}
- /*
- * check for shared mount.
- */
- if (ap->flags & XFSMNT_SHARED) {
- if (!xfs_sb_version_hasshared(&mp->m_sb))
- return XFS_ERROR(EINVAL);
-
- /*
- * For IRIX 6.5, shared mounts must have the shared
- * version bit set, have the persistent readonly
- * field set, must be version 0 and can only be mounted
- * read-only.
- */
- if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
- (mp->m_sb.sb_shared_vn != 0))
- return XFS_ERROR(EINVAL);
-
- mp->m_flags |= XFS_MOUNT_SHARED;
-
- /*
- * Shared XFS V0 can't deal with DMI. Return EINVAL.
- */
- if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
- return XFS_ERROR(EINVAL);
- }
-
- if (ap->flags & XFSMNT_UQUOTA) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_UQUOTAENF)
- mp->m_qflags |= XFS_UQUOTA_ENFD;
- }
-
- if (ap->flags & XFSMNT_GQUOTA) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_GQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- } else if (ap->flags & XFSMNT_PQUOTA) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_PQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- }
-
return 0;
}
@@ -1667,19 +1396,14 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- struct xfs_mount_args *args;
int flags = 0, error = ENOMEM;
-
- args = xfs_args_allocate(sb, silent);
- if (!args)
- return -ENOMEM;
+ char *mtpt = NULL;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
- goto out_free_args;
+ goto out;
spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_ilock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1413,9 @@ xfs_fs_fill_super(
mp->m_super = sb;
sb->s_fs_info = mp;
- if (sb->s_flags & MS_RDONLY)
- mp->m_flags |= XFS_MOUNT_RDONLY;
-
- error = xfs_parseargs(mp, (char *)data, args, 0);
+ error = xfs_parseargs(mp, (char *)data, &mtpt);
if (error)
- goto out_free_mp;
+ goto out_free_fsname;
sb_min_blocksize(sb, BBSIZE);
sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1423,28 @@ xfs_fs_fill_super(
sb->s_qcop = &xfs_quotactl_operations;
sb->s_op = &xfs_super_operations;
- error = xfs_dmops_get(mp, args);
+ error = xfs_dmops_get(mp);
if (error)
- goto out_free_mp;
- error = xfs_qmops_get(mp, args);
+ goto out_free_fsname;
+ error = xfs_qmops_get(mp);
if (error)
goto out_put_dmops;
- if (args->flags & XFSMNT_QUIET)
+ if (silent)
flags |= XFS_MFSI_QUIET;
- error = xfs_open_devices(mp, args);
+ error = xfs_open_devices(mp);
if (error)
goto out_put_qmops;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
- /*
- * Setup flags based on mount(2) options and then the superblock
- */
- error = xfs_start_flags(args, mp);
- if (error)
- goto out_free_fsname;
error = xfs_readsb(mp, flags);
if (error)
- goto out_free_fsname;
- error = xfs_finish_flags(args, mp);
+ goto out_destroy_counters;
+
+ error = xfs_finish_flags(mp);
if (error)
goto out_free_sb;
@@ -1747,7 +1463,7 @@ xfs_fs_fill_super(
if (error)
goto out_filestream_unmount;
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
+ XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
sb->s_dirt = 1;
sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1488,31 @@ xfs_fs_fill_super(
goto fail_vnrele;
}
- mp->m_sync_work.w_syncer = xfs_sync_worker;
- mp->m_sync_work.w_mount = mp;
- mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
- if (IS_ERR(mp->m_sync_task)) {
- error = -PTR_ERR(mp->m_sync_task);
+ error = xfs_syncd_init(mp);
+ if (error)
goto fail_vnrele;
- }
- xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
+ kfree(mtpt);
- kfree(args);
+ xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
return 0;
out_filestream_unmount:
xfs_filestream_unmount(mp);
out_free_sb:
xfs_freesb(mp);
- out_free_fsname:
- xfs_free_fsname(mp);
+ out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
out_put_qmops:
xfs_qmops_put(mp);
out_put_dmops:
xfs_dmops_put(mp);
- out_free_mp:
+ out_free_fsname:
+ xfs_free_fsname(mp);
+ kfree(mtpt);
kfree(mp);
- out_free_args:
- kfree(args);
+ out:
return -error;
fail_vnrele:
@@ -1820,8 +1532,6 @@ xfs_fs_fill_super(
xfs_filestream_unmount(mp);
XFS_bflush(mp->m_ddev_targp);
- error = xfs_unmount_flush(mp, 0);
- WARN_ON(error);
xfs_unmountfs(mp);
goto out_free_sb;
@@ -1882,10 +1592,19 @@ xfs_alloc_trace_bufs(void)
if (!xfs_bmap_trace_buf)
goto out_free_alloc_trace;
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
+ xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
+ KM_MAYFAIL);
+ if (!xfs_allocbt_trace_buf)
+ goto out_free_bmap_trace;
+
+ xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_inobt_trace_buf)
+ goto out_free_allocbt_trace;
+
xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
if (!xfs_bmbt_trace_buf)
- goto out_free_bmap_trace;
+ goto out_free_inobt_trace;
#endif
#ifdef XFS_ATTR_TRACE
xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1626,12 @@ xfs_alloc_trace_bufs(void)
ktrace_free(xfs_attr_trace_buf);
out_free_bmbt_trace:
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(xfs_bmbt_trace_buf);
+ out_free_inobt_trace:
+ ktrace_free(xfs_inobt_trace_buf);
+ out_free_allocbt_trace:
+ ktrace_free(xfs_allocbt_trace_buf);
out_free_bmap_trace:
#endif
#ifdef XFS_BMAP_TRACE
@@ -1931,8 +1654,10 @@ xfs_free_trace_bufs(void)
#ifdef XFS_ATTR_TRACE
ktrace_free(xfs_attr_trace_buf);
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(xfs_bmbt_trace_buf);
+ ktrace_free(xfs_inobt_trace_buf);
+ ktrace_free(xfs_allocbt_trace_buf);
#endif
#ifdef XFS_BMAP_TRACE
ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1670,10 @@ xfs_free_trace_bufs(void)
STATIC int __init
xfs_init_zones(void)
{
- xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
- if (!xfs_vnode_zone)
- goto out;
xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
if (!xfs_ioend_zone)
- goto out_destroy_vnode_zone;
+ goto out;
xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
xfs_ioend_zone);
@@ -1970,6 +1689,7 @@ xfs_init_zones(void)
"xfs_bmap_free_item");
if (!xfs_bmap_free_item_zone)
goto out_destroy_log_ticket_zone;
+
xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
"xfs_btree_cur");
if (!xfs_btree_cur_zone)
@@ -2017,8 +1737,8 @@ xfs_init_zones(void)
xfs_inode_zone =
kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD, NULL);
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+ xfs_fs_inode_init_once);
if (!xfs_inode_zone)
goto out_destroy_efi_zone;
@@ -2066,8 +1786,6 @@ xfs_init_zones(void)
mempool_destroy(xfs_ioend_pool);
out_destroy_ioend_zone:
kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
- kmem_zone_destroy(xfs_vnode_zone);
out:
return -ENOMEM;
}
@@ -2092,7 +1810,6 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_log_ticket_zone);
mempool_destroy(xfs_ioend_pool);
kmem_zone_destroy(xfs_ioend_zone);
- kmem_zone_destroy(xfs_vnode_zone);
}
@@ -2100,13 +1817,12 @@ STATIC int __init
init_xfs_fs(void)
{
int error;
- static char message[] __initdata = KERN_INFO \
- XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
- printk(message);
+ printk(KERN_INFO XFS_VERSION_STRING " with "
+ XFS_BUILD_OPTIONS " enabled\n");
ktrace_init(64);
- vn_init();
+ xfs_ioend_init();
xfs_dir_startup();
error = xfs_init_zones();
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f9..d5d776d4cd67 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -20,24 +20,12 @@
#include <linux/exportfs.h>
-#ifdef CONFIG_XFS_DMAPI
-# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops)
-# define vfs_initdmapi() dmapi_init()
-# define vfs_exitdmapi() dmapi_uninit()
-#else
-# define vfs_insertdmapi(vfs) do { } while (0)
-# define vfs_initdmapi() do { } while (0)
-# define vfs_exitdmapi() do { } while (0)
-#endif
-
#ifdef CONFIG_XFS_QUOTA
-# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops)
extern void xfs_qm_init(void);
extern void xfs_qm_exit(void);
# define vfs_initquota() xfs_qm_init()
# define vfs_exitquota() xfs_qm_exit()
#else
-# define vfs_insertquota(vfs) do { } while (0)
# define vfs_initquota() do { } while (0)
# define vfs_exitquota() do { } while (0)
#endif
@@ -101,9 +89,6 @@ struct block_device;
extern __uint64_t xfs_max_file_offset(unsigned int);
-extern void xfs_flush_inode(struct xfs_inode *);
-extern void xfs_flush_device(struct xfs_inode *);
-
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 000000000000..2ed035354c26
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_utils.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
+#include "xfs_rw.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+/*
+ * Sync all the inodes in the given AG according to the
+ * direction given by the flags.
+ */
+STATIC int
+xfs_sync_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ int flags)
+{
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int nr_found;
+ uint32_t first_index = 0;
+ int error = 0;
+ int last_error = 0;
+ int fflag = XFS_B_ASYNC;
+
+ if (flags & SYNC_DELWRI)
+ fflag = XFS_B_DELWRI;
+ if (flags & SYNC_WAIT)
+ fflag = 0; /* synchronous overrides all */
+
+ do {
+ struct inode *inode;
+ xfs_inode_t *ip = NULL;
+ int lock_flags = XFS_ILOCK_SHARED;
+
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ read_unlock(&pag->pag_ici_lock);
+ return 0;
+ }
+
+ /*
+ * If we can't get a reference on the inode, it must be
+ * in reclaim. Leave it for the reclaim code to flush.
+ */
+ inode = VFS_I(ip);
+ if (!igrab(inode)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ /* avoid new or bad inodes */
+ if (is_bad_inode(inode) ||
+ xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ continue;
+ }
+
+ /*
+ * If we have to flush data or wait for I/O completion
+ * we need to hold the iolock.
+ */
+ if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ lock_flags |= XFS_IOLOCK_SHARED;
+ error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
+ if (flags & SYNC_IOWAIT)
+ xfs_ioend_wait(ip);
+ }
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
+ if (flags & SYNC_WAIT) {
+ xfs_iflock(ip);
+ if (!xfs_inode_clean(ip))
+ error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+ else
+ xfs_ifunlock(ip);
+ } else if (xfs_iflock_nowait(ip)) {
+ if (!xfs_inode_clean(ip))
+ error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+ else
+ xfs_ifunlock(ip);
+ }
+ }
+ xfs_iput(ip, lock_flags);
+
+ if (error)
+ last_error = error;
+ /*
+ * bail out if the filesystem is corrupted.
+ */
+ if (error == EFSCORRUPTED)
+ return XFS_ERROR(error);
+
+ } while (nr_found);
+
+ return last_error;
+}
+
+int
+xfs_sync_inodes(
+ xfs_mount_t *mp,
+ int flags)
+{
+ int error;
+ int last_error;
+ int i;
+ int lflags = XFS_LOG_FORCE;
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return 0;
+ error = 0;
+ last_error = 0;
+
+ if (flags & SYNC_WAIT)
+ lflags |= XFS_LOG_SYNC;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ error = xfs_sync_inodes_ag(mp, i, flags);
+ if (error)
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ if (flags & SYNC_DELWRI)
+ xfs_log_force(mp, 0, lflags);
+
+ return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_commit_dummy_trans(
+ struct xfs_mount *mp,
+ uint log_flags)
+{
+ struct xfs_inode *ip = mp->m_rootip;
+ struct xfs_trans *tp;
+ int error;
+
+ /*
+ * Put a dummy transaction in the log to tell recovery
+ * that all others are OK.
+ */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+ error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, ip);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ /* XXX(hch): ignoring the error here.. */
+ error = xfs_trans_commit(tp, 0);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ xfs_log_force(mp, 0, log_flags);
+ return 0;
+}
+
+int
+xfs_sync_fsdata(
+ struct xfs_mount *mp,
+ int flags)
+{
+ struct xfs_buf *bp;
+ struct xfs_buf_log_item *bip;
+ int error = 0;
+
+ /*
+ * If this is xfssyncd() then only sync the superblock if we can
+ * lock it without sleeping and it is not pinned.
+ */
+ if (flags & SYNC_BDFLUSH) {
+ ASSERT(!(flags & SYNC_WAIT));
+
+ bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
+ if (!bp)
+ goto out;
+
+ bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
+ if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
+ goto out_brelse;
+ } else {
+ bp = xfs_getsb(mp, 0);
+
+ /*
+ * If the buffer is pinned then push on the log so we won't
+ * get stuck waiting in the write for someone, maybe
+ * ourselves, to flush the log.
+ *
+ * Even though we just pushed the log above, we did not have
+ * the superblock buffer locked at that point so it can
+ * become pinned in between there and here.
+ */
+ if (XFS_BUF_ISPINNED(bp))
+ xfs_log_force(mp, 0, XFS_LOG_FORCE);
+ }
+
+
+ if (flags & SYNC_WAIT)
+ XFS_BUF_UNASYNC(bp);
+ else
+ XFS_BUF_ASYNC(bp);
+
+ return xfs_bwrite(mp, bp);
+
+ out_brelse:
+ xfs_buf_relse(bp);
+ out:
+ return error;
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete. Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ /* push non-blocking */
+ xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+ XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ xfs_filestream_flush(mp);
+
+ /* push and block */
+ xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+ XFS_QM_DQSYNC(mp, SYNC_WAIT);
+
+ /* write superblock and hoover up shutdown errors */
+ error = xfs_sync_fsdata(mp, 0);
+
+ /* flush data-only devices */
+ if (mp->m_rtdev_targp)
+ XFS_bflush(mp->m_rtdev_targp);
+
+ return error;
+}
+
+STATIC void
+xfs_quiesce_fs(
+ struct xfs_mount *mp)
+{
+ int count = 0, pincount;
+
+ xfs_flush_buftarg(mp->m_ddev_targp, 0);
+ xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+
+ /*
+ * This loop must run at least twice. The first instance of the loop
+ * will flush most meta data but that will generate more meta data
+ * (typically directory updates). Which then must be flushed and
+ * logged before we can write the unmount record.
+ */
+ do {
+ xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+ pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+ if (!pincount) {
+ delay(50);
+ count++;
+ }
+ } while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+void
+xfs_quiesce_attr(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+
+ /* wait for all modifications to complete */
+ while (atomic_read(&mp->m_active_trans) > 0)
+ delay(100);
+
+ /* flush inodes and push all remaining buffers out to disk */
+ xfs_quiesce_fs(mp);
+
+ ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
+ /* Push the superblock and write an unmount record */
+ error = xfs_log_sbcount(mp, 1);
+ if (error)
+ xfs_fs_cmn_err(CE_WARN, mp,
+ "xfs_attr_quiesce: failed to log sb changes. "
+ "Frozen image may not be consistent.");
+ xfs_log_unmount_write(mp);
+ xfs_unmountfs_writesb(mp);
+}
+
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+ struct xfs_mount *mp,
+ void *data,
+ void (*syncer)(struct xfs_mount *, void *))
+{
+ struct bhv_vfs_sync_work *work;
+
+ work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+ INIT_LIST_HEAD(&work->w_list);
+ work->w_syncer = syncer;
+ work->w_data = data;
+ work->w_mount = mp;
+ spin_lock(&mp->m_sync_lock);
+ list_add_tail(&work->w_list, &mp->m_sync_list);
+ spin_unlock(&mp->m_sync_lock);
+ wake_up_process(mp->m_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations. At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+ struct xfs_mount *mp,
+ void *arg)
+{
+ struct inode *inode = arg;
+ filemap_flush(inode->i_mapping);
+ iput(inode);
+}
+
+void
+xfs_flush_inode(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ igrab(inode);
+ xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
+ delay(msecs_to_jiffies(500));
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+ struct xfs_mount *mp,
+ void *arg)
+{
+ struct inode *inode = arg;
+ sync_blockdev(mp->m_super->s_bdev);
+ iput(inode);
+}
+
+void
+xfs_flush_device(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ igrab(inode);
+ xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
+ delay(msecs_to_jiffies(500));
+ xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes, sync
+ * quota and write out the superblock. We might need to cover the log
+ * to indicate it is idle.
+ */
+STATIC void
+xfs_sync_worker(
+ struct xfs_mount *mp,
+ void *unused)
+{
+ int error;
+
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+ xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ /* dgc: errors ignored here */
+ error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+ if (xfs_log_need_covered(mp))
+ error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
+ }
+ mp->m_sync_seq++;
+ wake_up(&mp->m_wait_single_sync_task);
+}
+
+STATIC int
+xfssyncd(
+ void *arg)
+{
+ struct xfs_mount *mp = arg;
+ long timeleft;
+ bhv_vfs_sync_work_t *work, *n;
+ LIST_HEAD (tmp);
+
+ set_freezable();
+ timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
+ for (;;) {
+ timeleft = schedule_timeout_interruptible(timeleft);
+ /* swsusp */
+ try_to_freeze();
+ if (kthread_should_stop() && list_empty(&mp->m_sync_list))
+ break;
+
+ spin_lock(&mp->m_sync_lock);
+ /*
+ * We can get woken by laptop mode, to do a sync -
+ * that's the (only!) case where the list would be
+ * empty with time remaining.
+ */
+ if (!timeleft || list_empty(&mp->m_sync_list)) {
+ if (!timeleft)
+ timeleft = xfs_syncd_centisecs *
+ msecs_to_jiffies(10);
+ INIT_LIST_HEAD(&mp->m_sync_work.w_list);
+ list_add_tail(&mp->m_sync_work.w_list,
+ &mp->m_sync_list);
+ }
+ list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
+ list_move(&work->w_list, &tmp);
+ spin_unlock(&mp->m_sync_lock);
+
+ list_for_each_entry_safe(work, n, &tmp, w_list) {
+ (*work->w_syncer)(mp, work->w_data);
+ list_del(&work->w_list);
+ if (work == &mp->m_sync_work)
+ continue;
+ kmem_free(work);
+ }
+ }
+
+ return 0;
+}
+
+int
+xfs_syncd_init(
+ struct xfs_mount *mp)
+{
+ mp->m_sync_work.w_syncer = xfs_sync_worker;
+ mp->m_sync_work.w_mount = mp;
+ mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+ if (IS_ERR(mp->m_sync_task))
+ return -PTR_ERR(mp->m_sync_task);
+ return 0;
+}
+
+void
+xfs_syncd_stop(
+ struct xfs_mount *mp)
+{
+ kthread_stop(mp->m_sync_task);
+}
+
+int
+xfs_reclaim_inode(
+ xfs_inode_t *ip,
+ int locked,
+ int sync_mode)
+{
+ xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
+
+ /* The hash lock here protects a thread in xfs_iget_core from
+ * racing with us on linking the inode back with a vnode.
+ * Once we have the XFS_IRECLAIM flag set it will not touch
+ * us.
+ */
+ write_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+ !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
+ if (locked) {
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+ return 1;
+ }
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(ip->i_mount, pag);
+
+ /*
+ * If the inode is still dirty, then flush it out. If the inode
+ * is not in the AIL, then it will be OK to flush it delwri as
+ * long as xfs_iflush() does not keep any references to the inode.
+ * We leave that decision up to xfs_iflush() since it has the
+ * knowledge of whether it's OK to simply do a delwri flush of
+ * the inode or whether we need to wait until the inode is
+ * pulled from the AIL.
+ * We get the flush lock regardless, though, just to make sure
+ * we don't free it while it is being flushed.
+ */
+ if (!locked) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_iflock(ip);
+ }
+
+ /*
+ * In the case of a forced shutdown we rely on xfs_iflush() to
+ * wait for the inode to be unpinned before returning an error.
+ */
+ if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
+ /* synchronize with xfs_iflush_done */
+ xfs_iflock(ip);
+ xfs_ifunlock(ip);
+ }
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_ireclaim(ip);
+ return 0;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+ xfs_inode_t *ip)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+
+ read_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ radix_tree_tag_set(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(mp, pag);
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+ xfs_mount_t *mp,
+ xfs_perag_t *pag,
+ xfs_inode_t *ip)
+{
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+}
+
+void
+xfs_inode_clear_reclaim_tag(
+ xfs_inode_t *ip)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+
+ read_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+ __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
+ xfs_put_perag(mp, pag);
+}
+
+
+STATIC void
+xfs_reclaim_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ int noblock,
+ int mode)
+{
+ xfs_inode_t *ip = NULL;
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int nr_found;
+ uint32_t first_index;
+ int skipped;
+
+restart:
+ first_index = 0;
+ skipped = 0;
+ do {
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1,
+ XFS_ICI_RECLAIM_TAG);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /* ignore if already under reclaim */
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+
+ if (noblock) {
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ if (xfs_ipincount(ip) ||
+ !xfs_iflock_nowait(ip)) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ read_unlock(&pag->pag_ici_lock);
+ continue;
+ }
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ /*
+ * hmmm - this is an inode already in reclaim. Do
+ * we even bother catching it here?
+ */
+ if (xfs_reclaim_inode(ip, noblock, mode))
+ skipped++;
+ } while (nr_found);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ return;
+
+}
+
+int
+xfs_reclaim_inodes(
+ xfs_mount_t *mp,
+ int noblock,
+ int mode)
+{
+ int i;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+ }
+ return 0;
+}
+
+
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 000000000000..5f6de1efe1f6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+
+typedef struct bhv_vfs_sync_work {
+ struct list_head w_list;
+ struct xfs_mount *w_mount;
+ void *w_data; /* syncer routine argument */
+ void (*w_syncer)(struct xfs_mount *, void *);
+} bhv_vfs_sync_work_t;
+
+#define SYNC_ATTR 0x0001 /* sync attributes */
+#define SYNC_DELWRI 0x0002 /* look at delayed writes */
+#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
+#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
+#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inode(struct xfs_inode *ip);
+void xfs_flush_device(struct xfs_inode *ip);
+
+int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+ struct xfs_inode *ip);
+#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3f..916c0ffb6083 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
static ctl_table xfs_table[] = {
{
- .ctl_name = XFS_RESTRICT_CHOWN,
- .procname = "restrict_chown",
- .data = &xfs_params.restrict_chown.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &xfs_params.restrict_chown.min,
- .extra2 = &xfs_params.restrict_chown.max
- },
- {
.ctl_name = XFS_SGID_INHERIT,
.procname = "irix_sgid_inherit",
.data = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c37..b9937d450f8e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
} xfs_sysctl_val_t;
typedef struct xfs_param {
- xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
* not a member of parent dir GID. */
xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
enum {
/* XFS_REFCACHE_SIZE = 1 */
/* XFS_REFCACHE_PURGE = 2 */
- XFS_RESTRICT_CHOWN = 3,
+ /* XFS_RESTRICT_CHOWN = 3 */
XFS_SGID_INHERIT = 4,
XFS_SYMLINK_MODE = 5,
XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
deleted file mode 100644
index 7e60c7776b1c..000000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VFS_H__
-#define __XFS_VFS_H__
-
-#include <linux/vfs.h>
-#include "xfs_fs.h"
-
-struct inode;
-
-struct fid;
-struct cred;
-struct seq_file;
-struct super_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_mount_args;
-
-typedef struct kstatfs bhv_statvfs_t;
-
-typedef struct bhv_vfs_sync_work {
- struct list_head w_list;
- struct xfs_mount *w_mount;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct xfs_mount *, void *);
-} bhv_vfs_sync_work_t;
-
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_CLOSE 0x0002 /* close file system down */
-#define SYNC_DELWRI 0x0004 /* look at delayed writes */
-#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
-#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
-#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
-#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
-#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem,
- * we have two phases to execute. This first phase is syncing the data
- * before we quiesce the fielsystem, and the second is flushing all the
- * inodes out after we've waited for all the transactions created by
- * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
- * to ensure that the inodes are written to their location on disk
- * rather than just existing in transactions in the log. This means
- * after a quiesce there is no log replay required to write the inodes
- * to disk (this is the main difference between a sync and a quiesce).
- */
-#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
-#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
-
-#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
-#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
-#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
-#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
-#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
-#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
-
-#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
-
-#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
deleted file mode 100644
index b52528bbbfff..000000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-
-/*
- * And this gunk is needed for xfs_mount.h"
- */
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dmapi.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-
-/*
- * Dedicated vnode inactive/reclaim sync wait queues.
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC 37
-#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t vsync[NVSYNC];
-
-void __init
-vn_init(void)
-{
- int i;
-
- for (i = 0; i < NVSYNC; i++)
- init_waitqueue_head(&vsync[i]);
-}
-
-void
-vn_iowait(
- xfs_inode_t *ip)
-{
- wait_queue_head_t *wq = vptosync(ip);
-
- wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-void
-vn_iowake(
- xfs_inode_t *ip)
-{
- if (atomic_dec_and_test(&ip->i_iocount))
- wake_up(vptosync(ip));
-}
-
-/*
- * Volume managers supporting multiple paths can send back ENODEV when the
- * final path disappears. In this case continuing to fill the page cache
- * with dirty data which cannot be written out is evil, so prevent that.
- */
-void
-vn_ioerror(
- xfs_inode_t *ip,
- int error,
- char *f,
- int l)
-{
- if (unlikely(error == -ENODEV))
- xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
-}
-
-#ifdef XFS_INODE_TRACE
-
-/*
- * Reference count of Linux inode if present, -1 if the xfs_inode
- * has no associated Linux inode.
- */
-static inline int xfs_icount(struct xfs_inode *ip)
-{
- struct inode *vp = VFS_I(ip);
-
- if (vp)
- return vn_count(vp);
- return -1;
-}
-
-#define KTRACE_ENTER(ip, vk, s, line, ra) \
- ktrace_enter( (ip)->i_trace, \
-/* 0 */ (void *)(__psint_t)(vk), \
-/* 1 */ (void *)(s), \
-/* 2 */ (void *)(__psint_t) line, \
-/* 3 */ (void *)(__psint_t)xfs_icount(ip), \
-/* 4 */ (void *)(ra), \
-/* 5 */ NULL, \
-/* 6 */ (void *)(__psint_t)current_cpu(), \
-/* 7 */ (void *)(__psint_t)current_pid(), \
-/* 8 */ (void *)__return_address, \
-/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-
-/*
- * Vnode tracing code.
- */
-void
-_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
-{
- KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
-}
-
-void
-_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
-{
- KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
-}
-
-void
-xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
-}
-
-void
-_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
-}
-
-void
-xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
-{
- KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
-}
-#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210ff..f65983a230d3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -18,7 +18,10 @@
#ifndef __XFS_VNODE_H__
#define __XFS_VNODE_H__
+#include "xfs_fs.h"
+
struct file;
+struct xfs_inode;
struct xfs_iomap;
struct attrlist_cursor_kern;
@@ -51,40 +54,6 @@ struct attrlist_cursor_kern;
Prevent VM access to the pages until
the operation completes. */
-
-extern void vn_init(void);
-
-/*
- * Yeah, these don't take vnode anymore at all, all this should be
- * cleaned up at some point.
- */
-extern void vn_iowait(struct xfs_inode *ip);
-extern void vn_iowake(struct xfs_inode *ip);
-extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
-
-static inline int vn_count(struct inode *vp)
-{
- return atomic_read(&vp->i_count);
-}
-
-#define IHOLD(ip) \
-do { \
- ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
- atomic_inc(&(VFS_I(ip)->i_count)); \
- xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
-} while (0)
-
-#define IRELE(ip) \
-do { \
- xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
- iput(VFS_I(ip)); \
-} while (0)
-
-static inline struct inode *vn_grab(struct inode *vp)
-{
- return igrab(vp);
-}
-
/*
* Dealing with bad inodes
*/
@@ -121,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
PAGECACHE_TAG_DIRTY)
-/*
- * Tracking vnode activity.
- */
-#if defined(XFS_INODE_TRACE)
-
-#define INODE_TRACE_SIZE 16 /* number of trace entries */
-#define INODE_KTRACE_ENTRY 1
-#define INODE_KTRACE_EXIT 2
-#define INODE_KTRACE_HOLD 3
-#define INODE_KTRACE_REF 4
-#define INODE_KTRACE_RELE 5
-
-extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
-extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
-extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
-extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
-extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
-#define xfs_itrace_entry(ip) \
- _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
-#define xfs_itrace_exit(ip) \
- _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
-#define xfs_itrace_exit_tag(ip, tag) \
- _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
-#define xfs_itrace_ref(ip) \
- _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
-
-#else
-#define xfs_itrace_entry(a)
-#define xfs_itrace_exit(a)
-#define xfs_itrace_exit_tag(a, b)
-#define xfs_itrace_hold(a, b, c, d)
-#define xfs_itrace_ref(a)
-#define xfs_itrace_rele(a, b, c, d)
-#endif
-
#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index f2705f2fd43c..591ca6602bfb 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
if (brandnewdquot) {
dqp->dq_flnext = dqp->dq_flprev = dqp;
mutex_init(&dqp->q_qlock);
- sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+ init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
- dqp->q_pincount = 0;
+ atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(dqp->dq_flnext == dqp->dq_flprev);
@@ -1221,16 +1221,14 @@ xfs_qm_dqflush(
xfs_dqtrace_entry(dqp, "DQFLUSH");
/*
- * If not dirty, nada.
+ * If not dirty, or it's pinned and we are not supposed to
+ * block, nada.
*/
- if (!XFS_DQ_IS_DIRTY(dqp)) {
+ if (!XFS_DQ_IS_DIRTY(dqp) ||
+ (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) {
xfs_dqfunlock(dqp);
- return (0);
+ return 0;
}
-
- /*
- * Cant flush a pinned dquot. Wait for it.
- */
xfs_qm_dqunpin_wait(dqp);
/*
@@ -1274,10 +1272,8 @@ xfs_qm_dqflush(
dqp->dq_flags &= ~(XFS_DQ_DIRTY);
mp = dqp->q_mount;
- /* lsn is 64 bits */
- spin_lock(&mp->m_ail_lock);
- dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
- spin_unlock(&mp->m_ail_lock);
+ xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+ &dqp->q_logitem.qli_item.li_lsn);
/*
* Attach an iodone routine so that we can remove this dquot from the
@@ -1323,8 +1319,10 @@ xfs_qm_dqflush_done(
xfs_dq_logitem_t *qip)
{
xfs_dquot_t *dqp;
+ struct xfs_ail *ailp;
dqp = qip->qli_dquot;
+ ailp = qip->qli_item.li_ailp;
/*
* We only want to pull the item from the AIL if its
@@ -1337,15 +1335,12 @@ xfs_qm_dqflush_done(
if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
qip->qli_item.li_lsn == qip->qli_flush_lsn) {
- spin_lock(&dqp->q_mount->m_ail_lock);
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ spin_lock(&ailp->xa_lock);
if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
- xfs_trans_delete_ail(dqp->q_mount,
- (xfs_log_item_t*)qip);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
else
- spin_unlock(&dqp->q_mount->m_ail_lock);
+ spin_unlock(&ailp->xa_lock);
}
/*
@@ -1375,7 +1370,7 @@ xfs_dqunlock(
mutex_unlock(&(dqp->q_qlock));
if (dqp->q_logitem.qli_dquot == dqp) {
/* Once was dqp->q_mount, but might just have been cleared */
- xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
+ xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
(xfs_log_item_t*)&(dqp->q_logitem));
}
}
@@ -1489,7 +1484,7 @@ xfs_qm_dqpurge(
"xfs_qm_dqpurge: dquot %p flush failed", dqp);
xfs_dqflock(dqp);
}
- ASSERT(dqp->q_pincount == 0);
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 8958d0faf8d3..7e455337e2ba 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
mutex_t q_qlock; /* quota lock */
struct completion q_flush; /* flush completion queue */
- uint q_pincount; /* pin count for this dquot */
- sv_t q_pinwait; /* sync var for pinning */
+ atomic_t q_pincount; /* dquot pin count */
+ wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
#ifdef XFS_DQUOT_TRACE
struct ktrace *q_trace; /* trace header structure */
#endif
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f028644caa5e..1728f6a7c4f5 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format(
/*
* Increment the pin count of the given dquot.
- * This value is protected by pinlock spinlock in the xQM structure.
*/
STATIC void
xfs_qm_dquot_logitem_pin(
xfs_dq_logitem_t *logitem)
{
- xfs_dquot_t *dqp;
+ xfs_dquot_t *dqp = logitem->qli_dquot;
- dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount++;
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ atomic_inc(&dqp->q_pincount);
}
/*
* Decrement the pin count of the given dquot, and wake up
* anyone in xfs_dqwait_unpin() if the count goes to 0. The
- * dquot must have been previously pinned with a call to xfs_dqpin().
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
*/
/* ARGSUSED */
STATIC void
@@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin(
xfs_dq_logitem_t *logitem,
int stale)
{
- xfs_dquot_t *dqp;
+ xfs_dquot_t *dqp = logitem->qli_dquot;
- dqp = logitem->qli_dquot;
- ASSERT(dqp->q_pincount > 0);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount--;
- if (dqp->q_pincount == 0) {
- sv_broadcast(&dqp->q_pinwait);
- }
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ ASSERT(atomic_read(&dqp->q_pincount) > 0);
+ if (atomic_dec_and_test(&dqp->q_pincount))
+ wake_up(&dqp->q_pinwait);
}
/* ARGSUSED */
@@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait(
xfs_dquot_t *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (dqp->q_pincount == 0) {
+ if (atomic_read(&dqp->q_pincount) == 0)
return;
- }
/*
* Give the log a push so we don't wait here too long.
*/
xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- if (dqp->q_pincount == 0) {
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- return;
- }
- sv_wait(&(dqp->q_pinwait), PINOD,
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+ wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
/*
@@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock(
uint retval;
dqp = qip->qli_dquot;
- if (dqp->q_pincount > 0)
+ if (atomic_read(&dqp->q_pincount) > 0)
return (XFS_ITEM_PINNED);
if (! xfs_qm_dqlock_nowait(dqp))
@@ -568,14 +553,16 @@ xfs_qm_qoffend_logitem_committed(
xfs_lsn_t lsn)
{
xfs_qoff_logitem_t *qfs;
+ struct xfs_ail *ailp;
qfs = qfe->qql_start_lip;
- spin_lock(&qfs->qql_item.li_mountp->m_ail_lock);
+ ailp = qfs->qql_item.li_ailp;
+ spin_lock(&ailp->xa_lock);
/*
* Delete the qoff-start logitem from the AIL.
- * xfs_trans_delete_ail() drops the AIL lock.
+ * xfs_trans_ail_delete() drops the AIL lock.
*/
- xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
+ xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
kmem_free(qfs);
kmem_free(qfe);
return (xfs_lsn_t)-1;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index df0ffef9775a..6b13960cf318 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -20,7 +20,6 @@
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
-#include "xfs_clnt.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
@@ -396,13 +395,10 @@ xfs_qm_mount_quotas(
/*
* Called from the vfsops layer.
*/
-int
+void
xfs_qm_unmount_quotas(
xfs_mount_t *mp)
{
- xfs_inode_t *uqp, *gqp;
- int error = 0;
-
/*
* Release the dquots that root inode, et al might be holding,
* before we flush quotas and blow away the quotainfo structure.
@@ -415,43 +411,18 @@ xfs_qm_unmount_quotas(
xfs_qm_dqdetach(mp->m_rsumip);
/*
- * Flush out the quota inodes.
+ * Release the quota inodes.
*/
- uqp = gqp = NULL;
if (mp->m_quotainfo) {
- if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
- xfs_ilock(uqp, XFS_ILOCK_EXCL);
- xfs_iflock(uqp);
- error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
- xfs_iunlock(uqp, XFS_ILOCK_EXCL);
- if (unlikely(error == EFSCORRUPTED)) {
- XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
- XFS_ERRLEVEL_LOW, mp);
- goto out;
- }
+ if (mp->m_quotainfo->qi_uquotaip) {
+ IRELE(mp->m_quotainfo->qi_uquotaip);
+ mp->m_quotainfo->qi_uquotaip = NULL;
}
- if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
- xfs_ilock(gqp, XFS_ILOCK_EXCL);
- xfs_iflock(gqp);
- error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
- xfs_iunlock(gqp, XFS_ILOCK_EXCL);
- if (unlikely(error == EFSCORRUPTED)) {
- XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
- XFS_ERRLEVEL_LOW, mp);
- goto out;
- }
+ if (mp->m_quotainfo->qi_gquotaip) {
+ IRELE(mp->m_quotainfo->qi_gquotaip);
+ mp->m_quotainfo->qi_gquotaip = NULL;
}
}
- if (uqp) {
- IRELE(uqp);
- mp->m_quotainfo->qi_uquotaip = NULL;
- }
- if (gqp) {
- IRELE(gqp);
- mp->m_quotainfo->qi_gquotaip = NULL;
- }
-out:
- return XFS_ERROR(error);
}
/*
@@ -987,14 +958,10 @@ xfs_qm_dqdetach(
}
/*
- * This is called by VFS_SYNC and flags arg determines the caller,
- * and its motives, as done in xfs_sync.
- *
- * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
- * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
- * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
+ * This is called to sync quotas. We can be told to use non-blocking
+ * semantics by either the SYNC_BDFLUSH flag or the absence of the
+ * SYNC_WAIT flag.
*/
-
int
xfs_qm_sync(
xfs_mount_t *mp,
@@ -1137,7 +1104,6 @@ xfs_qm_init_quotainfo(
return error;
}
- spin_lock_init(&qinf->qi_pinlock);
xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
qinf->qi_dqreclaims = 0;
@@ -1234,7 +1200,6 @@ xfs_qm_destroy_quotainfo(
*/
xfs_qm_rele_quotafs_ref(mp);
- spinlock_destroy(&qi->qi_pinlock);
xfs_qm_list_destroy(&qi->qi_dqlist);
if (qi->qi_uquotaip) {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 44f25349e478..ddf09166387c 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
typedef struct xfs_quotainfo {
xfs_inode_t *qi_uquotaip; /* user quota inode */
xfs_inode_t *qi_gquotaip; /* group quota inode */
- spinlock_t qi_pinlock; /* dquot pinning lock */
xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
int qi_dqreclaims; /* a change here indicates
a removal in the dqlist */
@@ -168,7 +167,7 @@ extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
extern void xfs_qm_mount_quotas(xfs_mount_t *);
extern int xfs_qm_quotacheck(xfs_mount_t *);
extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern int xfs_qm_unmount_quotas(xfs_mount_t *);
+extern void xfs_qm_unmount_quotas(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
extern int xfs_qm_sync(xfs_mount_t *, int);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index eea2e60b456b..bc6c5cca3e12 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -20,7 +20,6 @@
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
-#include "xfs_clnt.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
@@ -51,7 +50,7 @@
STATIC void
xfs_fill_statvfs_from_dquot(
- bhv_statvfs_t *statp,
+ struct kstatfs *statp,
xfs_disk_dquot_t *dp)
{
__uint64_t limit;
@@ -88,7 +87,7 @@ xfs_fill_statvfs_from_dquot(
STATIC void
xfs_qm_statvfs(
xfs_inode_t *ip,
- bhv_statvfs_t *statp)
+ struct kstatfs *statp)
{
xfs_mount_t *mp = ip->i_mount;
xfs_dquot_t *dqp;
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 1a3b803dfa55..68139b38aede 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -127,7 +127,7 @@ xfs_qm_quotactl(
break;
case Q_XQUOTASYNC:
- return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL));
+ return xfs_sync_inodes(mp, SYNC_DELWRI);
default:
break;
@@ -1022,101 +1022,104 @@ xfs_qm_export_flags(
/*
- * Go thru all the inodes in the file system, releasing their dquots.
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * Release all the dquots on the inodes in an AG.
*/
-void
-xfs_qm_dqrele_all_inodes(
- struct xfs_mount *mp,
- uint flags)
+STATIC void
+xfs_qm_dqrele_inodes_ag(
+ xfs_mount_t *mp,
+ int ag,
+ uint flags)
{
- xfs_inode_t *ip, *topino;
- uint ireclaims;
- struct inode *vp;
- boolean_t vnode_refd;
+ xfs_inode_t *ip = NULL;
+ xfs_perag_t *pag = &mp->m_perag[ag];
+ int first_index = 0;
+ int nr_found;
- ASSERT(mp->m_quotainfo);
-
- XFS_MOUNT_ILOCK(mp);
-again:
- ip = mp->m_inodes;
- if (ip == NULL) {
- XFS_MOUNT_IUNLOCK(mp);
- return;
- }
do {
- /* Skip markers inserted by xfs_sync */
- if (ip->i_mount == NULL) {
- ip = ip->i_mnext;
- continue;
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void**)&ip, first_index, 1);
+
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
}
- /* Root inode, rbmip and rsumip have associated blocks */
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
+ read_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /* skip quota inodes */
if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
ASSERT(ip->i_udquot == NULL);
ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
+ read_unlock(&pag->pag_ici_lock);
continue;
}
- vp = VFS_I(ip);
- if (!vp) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- ip = ip->i_mnext;
+
+ /*
+ * If we can't get a reference on the inode, it must be
+ * in reclaim. Leave it for the reclaim code to flush.
+ */
+ if (!igrab(VFS_I(ip))) {
+ read_unlock(&pag->pag_ici_lock);
continue;
}
- vnode_refd = B_FALSE;
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- vp = vn_grab(vp);
- if (!vp)
- goto again;
-
- XFS_MOUNT_IUNLOCK(mp);
- /* XXX restart limit ? */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- vnode_refd = B_TRUE;
- } else {
- ireclaims = mp->m_ireclaims;
- topino = mp->m_inodes;
- XFS_MOUNT_IUNLOCK(mp);
+ read_unlock(&pag->pag_ici_lock);
+
+ /* avoid new inodes though we shouldn't find any here */
+ if (xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ continue;
}
- /*
- * We don't keep the mountlock across the dqrele() call,
- * since it can take a while..
- */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
xfs_qm_dqrele(ip->i_udquot);
ip->i_udquot = NULL;
}
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
+ ip->i_gdquot) {
xfs_qm_dqrele(ip->i_gdquot);
ip->i_gdquot = NULL;
}
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Wait until we've dropped the ilock and mountlock to
- * do the vn_rele. Or be condemned to an eternity in the
- * inactive code in hell.
- */
- if (vnode_refd)
- IRELE(ip);
- XFS_MOUNT_ILOCK(mp);
- /*
- * If an inode was inserted or removed, we gotta
- * start over again.
- */
- if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
- /* XXX use a sentinel */
- goto again;
- }
- ip = ip->i_mnext;
- } while (ip != mp->m_inodes);
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+
+ } while (nr_found);
+}
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+ struct xfs_mount *mp,
+ uint flags)
+{
+ int i;
- XFS_MOUNT_IUNLOCK(mp);
+ ASSERT(mp->m_quotainfo);
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ if (!mp->m_perag[i].pag_ici_init)
+ continue;
+ xfs_qm_dqrele_inodes_ag(mp, i, flags);
+ }
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c27abef7b84f..ae5482965424 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -18,6 +18,13 @@
#include <xfs.h>
#include "debug.h"
+/* xfs_mount.h drags a lot of crap in, sorry.. */
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+
static char message[1024]; /* keep it off the stack */
static DEFINE_SPINLOCK(xfs_err_lock);
@@ -55,22 +62,42 @@ cmn_err(register int level, char *fmt, ...)
}
void
-icmn_err(register int level, char *fmt, va_list ap)
+xfs_fs_vcmn_err(
+ int level,
+ struct xfs_mount *mp,
+ char *fmt,
+ va_list ap)
{
- ulong flags;
- int len;
+ unsigned long flags;
+ int len = 0;
level &= XFS_ERR_MASK;
- if(level > XFS_MAX_ERR_LEVEL)
+ if (level > XFS_MAX_ERR_LEVEL)
level = XFS_MAX_ERR_LEVEL;
+
spin_lock_irqsave(&xfs_err_lock,flags);
- len = vsnprintf(message, sizeof(message), fmt, ap);
+
+ if (mp) {
+ len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname);
+
+ /*
+ * Skip the printk if we can't print anything useful
+ * due to an over-long device name.
+ */
+ if (len >= sizeof(message))
+ goto out;
+ }
+
+ len = vsnprintf(message + len, sizeof(message) - len, fmt, ap);
if (len >= sizeof(message))
len = sizeof(message) - 1;
if (message[len-1] == '\n')
message[len-1] = 0;
+
printk("%s%s\n", err_level[level], message);
+ out:
spin_unlock_irqrestore(&xfs_err_lock,flags);
+
BUG_ON(level == CE_PANIC);
}
@@ -84,5 +111,5 @@ assfail(char *expr, char *file, int line)
void
xfs_hex_dump(void *p, int length)
{
- print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
+ print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 75845f950814..6f4fd37c67af 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -27,8 +27,6 @@
#define CE_ALERT 1 /* alert */
#define CE_PANIC 0 /* panic */
-extern void icmn_err(int, char *, va_list)
- __attribute__ ((format (printf, 2, 0)));
extern void cmn_err(int, char *, ...)
__attribute__ ((format (printf, 2, 3)));
extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index a34ef05489b1..2d494c26717f 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -113,21 +113,16 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
void
ktrace_free(ktrace_t *ktp)
{
- int entries_size;
-
if (ktp == (ktrace_t *)NULL)
return;
/*
* Special treatment for the Vnode trace buffer.
*/
- if (ktp->kt_nentries == ktrace_zentries) {
+ if (ktp->kt_nentries == ktrace_zentries)
kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
- } else {
- entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
-
+ else
kmem_free(ktp->kt_entries);
- }
kmem_zone_free(ktrace_hdr_zone, ktp);
}
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..17254b529c54 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -30,7 +30,7 @@
#define XFS_ATTR_TRACE 1
#define XFS_BLI_TRACE 1
#define XFS_BMAP_TRACE 1
-#define XFS_BMBT_TRACE 1
+#define XFS_BTREE_TRACE 1
#define XFS_DIR2_TRACE 1
#define XFS_DQUOT_TRACE 1
#define XFS_ILOCK_TRACE 1
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 91d69338d3b2..a8cdd73999a4 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -758,7 +758,7 @@ xfs_acl_setmode(
if (gap && nomask)
iattr.ia_mode |= gap->ae_perm << 3;
- return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);
+ return xfs_setattr(XFS_I(vp), &iattr, 0);
}
/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 61b292a9fb41..f2e21817a226 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -91,6 +91,8 @@ typedef struct xfs_agf {
#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp))
+extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
/*
* Size of the unlinked inode hash table in the agi.
@@ -142,6 +144,9 @@ typedef struct xfs_agi {
#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp))
+extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, struct xfs_buf **bpp);
+
/*
* The third a.g. block contains the a.g. freelist, an array
* of block pointers to blocks owned by the allocation btree code.
@@ -192,17 +197,23 @@ typedef struct xfs_perag
xfs_agino_t pagi_freecount; /* number of free inodes */
xfs_agino_t pagi_count; /* number of allocated inodes */
int pagb_count; /* pagb slots in use */
+ xfs_perag_busy_t *pagb_list; /* unstable blocks */
#ifdef __KERNEL__
spinlock_t pagb_lock; /* lock for pagb_list */
-#endif
- xfs_perag_busy_t *pagb_list; /* unstable blocks */
+
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
+#endif
} xfs_perag_t;
+/*
+ * tags for inode radix tree
+ */
+#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
+
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
(MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1956f83489f1..028e44e58ea9 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -90,6 +90,92 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
*/
/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_eq(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_alloc_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len) /* length of extent */
+{
+ union xfs_btree_rec rec;
+
+ rec.alloc.ar_startblock = cpu_to_be32(bno);
+ rec.alloc.ar_blockcount = cpu_to_be32(len);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int /* error */
+xfs_alloc_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ *bno = be32_to_cpu(rec->alloc.ar_startblock);
+ *len = be32_to_cpu(rec->alloc.ar_blockcount);
+ }
+ return error;
+}
+
+/*
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
@@ -294,21 +380,20 @@ xfs_alloc_fixup_trees(
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
+
#ifdef DEBUG
- {
- xfs_alloc_block_t *bnoblock;
- xfs_alloc_block_t *cntblock;
-
- if (bno_cur->bc_nlevels == 1 &&
- cnt_cur->bc_nlevels == 1) {
- bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
- cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
- XFS_WANT_CORRUPTED_RETURN(
- be16_to_cpu(bnoblock->bb_numrecs) ==
- be16_to_cpu(cntblock->bb_numrecs));
- }
+ if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
+ struct xfs_btree_block *bnoblock;
+ struct xfs_btree_block *cntblock;
+
+ bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
+ cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+
+ XFS_WANT_CORRUPTED_RETURN(
+ bnoblock->bb_numrecs == cntblock->bb_numrecs);
}
#endif
+
/*
* Deal with all four cases: the allocated record is contained
* within the freespace record, so we can have new freespace
@@ -333,7 +418,7 @@ xfs_alloc_fixup_trees(
/*
* Delete the entry from the by-size btree.
*/
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
/*
@@ -343,7 +428,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -351,7 +436,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -362,7 +447,7 @@ xfs_alloc_fixup_trees(
/*
* No remaining freespace, just delete the by-block tree entry.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
} else {
@@ -379,7 +464,7 @@ xfs_alloc_fixup_trees(
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 0);
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
}
@@ -640,8 +725,8 @@ xfs_alloc_ag_vextent_exact(
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Look for the closest free block <= bno, it must contain bno
@@ -696,8 +781,8 @@ xfs_alloc_ag_vextent_exact(
* We are allocating agbno for rlen [agbno .. end]
* Allocate/initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
ASSERT(args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
@@ -759,8 +844,8 @@ xfs_alloc_ag_vextent_near(
/*
* Get a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
ltlen = 0;
bno_cur_lt = bno_cur_gt = NULL;
/*
@@ -818,7 +903,7 @@ xfs_alloc_ag_vextent_near(
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (ltlen >= args->minlen)
break;
- if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
goto error0;
} while (i);
ASSERT(ltlen >= args->minlen);
@@ -828,7 +913,7 @@ xfs_alloc_ag_vextent_near(
i = cnt_cur->bc_ptrs[0];
for (j = 1, blen = 0, bdiff = 0;
!error && j && (blen < args->maxlen || bdiff > 0);
- error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+ error = xfs_btree_increment(cnt_cur, 0, &j)) {
/*
* For each entry, decide if it's better than
* the previous best entry.
@@ -886,8 +971,8 @@ xfs_alloc_ag_vextent_near(
/*
* Set up a cursor for the by-bno tree.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_BNO);
/*
* Fix up the btree entries.
*/
@@ -914,8 +999,8 @@ xfs_alloc_ag_vextent_near(
/*
* Allocate and initialize the cursor for the leftward search.
*/
- bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
/*
* Lookup <= bno to find the leftward search's starting point.
*/
@@ -938,7 +1023,7 @@ xfs_alloc_ag_vextent_near(
* Increment the cursor, so we will point at the entry just right
* of the leftward entry if any, or to the leftmost entry.
*/
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
/*
@@ -961,7 +1046,7 @@ xfs_alloc_ag_vextent_near(
args->minlen, &ltbnoa, &ltlena);
if (ltlena >= args->minlen)
break;
- if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_lt,
@@ -977,7 +1062,7 @@ xfs_alloc_ag_vextent_near(
args->minlen, &gtbnoa, &gtlena);
if (gtlena >= args->minlen)
break;
- if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
+ if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
xfs_btree_del_cursor(bno_cur_gt,
@@ -1066,7 +1151,7 @@ xfs_alloc_ag_vextent_near(
/*
* Fell off the right end.
*/
- if ((error = xfs_alloc_increment(
+ if ((error = xfs_btree_increment(
bno_cur_gt, 0, &i)))
goto error0;
if (!i) {
@@ -1162,7 +1247,7 @@ xfs_alloc_ag_vextent_near(
/*
* Fell off the left end.
*/
- if ((error = xfs_alloc_decrement(
+ if ((error = xfs_btree_decrement(
bno_cur_lt, 0, &i)))
goto error0;
if (!i) {
@@ -1267,8 +1352,8 @@ xfs_alloc_ag_vextent_size(
/*
* Allocate and initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT, NULL, 0);
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
/*
* Look for an entry >= maxlen+alignment-1 blocks.
@@ -1321,7 +1406,7 @@ xfs_alloc_ag_vextent_size(
bestflen = flen;
bestfbno = fbno;
for (;;) {
- if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
goto error0;
if (i == 0)
break;
@@ -1372,8 +1457,8 @@ xfs_alloc_ag_vextent_size(
/*
* Allocate and initialize a cursor for the by-block tree.
*/
- bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO, NULL, 0);
+ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->agno, XFS_BTNUM_BNO);
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
rbno, rlen, XFSA_FIXUP_CNT_OK)))
goto error0;
@@ -1416,7 +1501,7 @@ xfs_alloc_ag_vextent_small(
xfs_extlen_t flen;
int i;
- if ((error = xfs_alloc_decrement(ccur, 0, &i)))
+ if ((error = xfs_btree_decrement(ccur, 0, &i)))
goto error0;
if (i) {
if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1515,8 +1600,7 @@ xfs_free_ag_extent(
/*
* Allocate and initialize a cursor for the by-block btree.
*/
- bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL,
- 0);
+ bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
cnt_cur = NULL;
/*
* Look for a neighboring block on the left (lower block numbers)
@@ -1549,7 +1633,7 @@ xfs_free_ag_extent(
* Look for a neighboring block on the right (higher block numbers)
* that is contiguous with this space.
*/
- if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
+ if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
goto error0;
if (haveright) {
/*
@@ -1575,8 +1659,7 @@ xfs_free_ag_extent(
/*
* Now allocate and initialize a cursor for the by-size tree.
*/
- cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL,
- 0);
+ cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
/*
* Have both left and right contiguous neighbors.
* Merge all three into a single free block.
@@ -1588,7 +1671,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1597,19 +1680,19 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Delete the old by-block entry for the right block.
*/
- if ((error = xfs_alloc_delete(bno_cur, &i)))
+ if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Move the by-block cursor back to the left neighbor.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
#ifdef DEBUG
@@ -1648,14 +1731,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
* Back up the by-block cursor to the left neighbor, and
* update its length.
*/
- if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
+ if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
nbno = ltbno;
@@ -1674,7 +1757,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_delete(cnt_cur, &i)))
+ if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
/*
@@ -1693,7 +1776,7 @@ xfs_free_ag_extent(
else {
nbno = bno;
nlen = len;
- if ((error = xfs_alloc_insert(bno_cur, &i)))
+ if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
@@ -1705,7 +1788,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
- if ((error = xfs_alloc_insert(cnt_cur, &i)))
+ if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -2150,51 +2233,83 @@ xfs_alloc_put_freelist(
* Read in the allocation group header (free/alloc section).
*/
int /* error */
-xfs_alloc_read_agf(
- xfs_mount_t *mp, /* mount point structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags, /* XFS_ALLOC_FLAG_... */
- xfs_buf_t **bpp) /* buffer for the ag freelist header */
+xfs_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_BUF_ */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
{
- xfs_agf_t *agf; /* ag freelist header */
+ struct xfs_agf *agf; /* ag freelist header */
int agf_ok; /* set if agf is consistent */
- xfs_buf_t *bp; /* return value */
- xfs_perag_t *pag; /* per allocation group data */
int error;
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1),
- (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
- &bp);
+ XFS_FSS_TO_BB(mp, 1), flags, bpp);
if (error)
return error;
- ASSERT(!bp || !XFS_BUF_GETERROR(bp));
- if (!bp) {
- *bpp = NULL;
+ if (!*bpp)
return 0;
- }
+
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+ agf = XFS_BUF_TO_AGF(*bpp);
+
/*
* Validate the magic number of the agf block.
*/
- agf = XFS_BUF_TO_AGF(bp);
agf_ok =
be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
+ be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
+ be32_to_cpu(agf->agf_seqno) == agno;
+ if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+ agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
+ be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
XFS_ERRLEVEL_LOW, mp, agf);
- xfs_trans_brelse(tp, bp);
+ xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
+
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
+ return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int /* error */
+xfs_alloc_read_agf(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_agnumber_t agno, /* allocation group number */
+ int flags, /* XFS_ALLOC_FLAG_... */
+ struct xfs_buf **bpp) /* buffer for the ag freelist header */
+{
+ struct xfs_agf *agf; /* ag freelist header */
+ struct xfs_perag *pag; /* per allocation group data */
+ int error;
+
+ ASSERT(agno != NULLAGNUMBER);
+
+ error = xfs_read_agf(mp, tp, agno,
+ (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0,
+ bpp);
+ if (error)
+ return error;
+ if (!*bpp)
+ return 0;
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ agf = XFS_BUF_TO_AGF(*bpp);
pag = &mp->m_perag[agno];
if (!pag->pagf_init) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
@@ -2213,6 +2328,7 @@ xfs_alloc_read_agf(
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+ ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
@@ -2221,8 +2337,6 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
- *bpp = bp;
return 0;
}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5aec15d0651e..588172796f7b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf;
#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
#endif
+void
+xfs_alloc_mark_busy(xfs_trans_t *tp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len);
+
+void
+xfs_alloc_clear_busy(xfs_trans_t *tp,
+ xfs_agnumber_t ag,
+ int idx);
+
+#endif /* __KERNEL__ */
+
/*
* Compute and fill in value of m_ag_maxlevels.
*/
@@ -196,18 +209,4 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
-void
-xfs_alloc_mark_busy(xfs_trans_t *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
- xfs_extlen_t len);
-
-void
-xfs_alloc_clear_busy(xfs_trans_t *tp,
- xfs_agnumber_t ag,
- int idx);
-
-
-#endif /* __KERNEL__ */
-
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3ce2645508ae..733cb75a8c5d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -35,2177 +35,464 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
-/*
- * Prototypes for internal functions.
- */
-STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
-STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
-STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
- xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
+STATIC struct xfs_btree_cur *
+xfs_allocbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno,
+ cur->bc_btnum);
+}
-/*
- * Internal functions.
- */
+STATIC void
+xfs_allocbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int btnum = cur->bc_btnum;
-/*
- * Single level of the xfs_alloc_delete record deletion routine.
- * Delete record pointed to by cur/level.
- * Remove the record from its block then rebalance the tree.
- * Return 0 for error, 1 for done, 2 to go on to the next level.
- */
-STATIC int /* error */
-xfs_alloc_delrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level removing record from */
- int *stat) /* fail/done/go-on */
+ ASSERT(ptr->s != 0);
+
+ agf->agf_roots[btnum] = ptr->s;
+ be32_add_cpu(&agf->agf_levels[btnum], inc);
+ cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_allocbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int length,
+ int *stat)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_agblock_t bno; /* btree block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* kp points here if block is level 0 */
- xfs_agblock_t lbno; /* left block's block number */
- xfs_buf_t *lbp; /* left block's buffer pointer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
- xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
- int lrecs=0; /* number of records in left block */
- xfs_alloc_rec_t *lrp; /* left block record pointer */
- xfs_mount_t *mp; /* mount structure */
- int ptr; /* index in btree block for this rec */
- xfs_agblock_t rbno; /* right block's block number */
- xfs_buf_t *rbp; /* right block's buffer pointer */
- xfs_alloc_block_t *right; /* right btree block */
- xfs_alloc_key_t *rkp; /* right block key pointer */
- xfs_alloc_ptr_t *rpp; /* right block address pointer */
- int rrecs=0; /* number of records in right block */
- int numrecs;
- xfs_alloc_rec_t *rrp; /* right block record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
+ int error;
+ xfs_agblock_t bno;
- /*
- * Get the index of the entry being deleted, check for nothing there.
- */
- ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Get the buffer & block containing the record or key/ptr.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
-#endif
- /*
- * Fail if we're off the end of the block.
- */
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (ptr > numrecs) {
+ }
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
- XFS_STATS_INC(xs_abt_delrec);
- /*
- * It's a nonleaf. Excise the key and ptr being deleted, by
- * sliding the entries past them down one.
- * Log the changed areas of the block.
- */
- if (level > 0) {
- lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- if (ptr < numrecs) {
- memmove(&lkp[ptr - 1], &lkp[ptr],
- (numrecs - ptr) * sizeof(*lkp));
- memmove(&lpp[ptr - 1], &lpp[ptr],
- (numrecs - ptr) * sizeof(*lpp));
- xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
- xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
- }
- }
- /*
- * It's a leaf. Excise the record being deleted, by sliding the
- * entries past it down one. Log the changed areas of the block.
- */
- else {
- lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- if (ptr < numrecs) {
- memmove(&lrp[ptr - 1], &lrp[ptr],
- (numrecs - ptr) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
- }
- /*
- * If it's the first record in the block, we'll need a key
- * structure to pass up to the next level (updkey).
- */
- if (ptr == 1) {
- key.ar_startblock = lrp->ar_startblock;
- key.ar_blockcount = lrp->ar_blockcount;
- lkp = &key;
- }
- }
- /*
- * Decrement and log the number of entries in the block.
- */
- numrecs--;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * See if the longest free extent in the allocation group was
- * changed by this operation. True if it's the by-size btree, and
- * this is the leaf level, and there is no right sibling block,
- * and this was the last record.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- mp = cur->bc_mp;
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr > numrecs) {
- ASSERT(ptr == numrecs + 1);
- /*
- * There are still records in the block. Grab the size
- * from the last one.
- */
- if (numrecs) {
- rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
- agf->agf_longest = rrp->ar_blockcount;
- }
- /*
- * No free extents left.
- */
- else
- agf->agf_longest = 0;
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
- be32_to_cpu(agf->agf_longest);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Is this the root level? If so, we're almost done.
- */
- if (level == cur->bc_nlevels - 1) {
- /*
- * If this is the root level,
- * and there's only one entry left,
- * and it's NOT the leaf level,
- * then we can get rid of this level.
- */
- if (numrecs == 1 && level > 0) {
- /*
- * lpp is still set to the first pointer in the block.
- * Make it the new root of the btree.
- */
- bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- agf->agf_roots[cur->bc_btnum] = *lpp;
- be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
- mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
- /*
- * Put this buffer/block on the ag's freelist.
- */
- error = xfs_alloc_put_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, NULL, bno, 1);
- if (error)
- return error;
- /*
- * Since blocks move to the free list without the
- * coordination used in xfs_bmap_finish, we can't allow
- * block to be available for reallocation and
- * non-transaction writing (user data) until we know
- * that the transaction that moved it to the free list
- * is permanently on disk. We track the blocks by
- * declaring these blocks as "busy"; the busy list is
- * maintained on a per-ag basis and each transaction
- * records which entries should be removed when the
- * iclog commits to disk. If a busy block is
- * allocated, the iclog is pushed up to the LSN
- * that freed the block.
- */
- xfs_alloc_mark_busy(cur->bc_tp,
- be32_to_cpu(agf->agf_seqno), bno, 1);
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- /*
- * Update the cursor so there's one fewer level.
- */
- xfs_btree_setbuf(cur, level, NULL);
- cur->bc_nlevels--;
- } else if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * If we deleted the leftmost entry in the block, update the
- * key values above us in the tree.
- */
- if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
- return error;
- /*
- * If the number of records remaining in the block is at least
- * the minimum, we're done.
- */
- if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * Otherwise, we have to move some records around to keep the
- * tree balanced. Look at the left and right sibling blocks to
- * see if we can re-balance by moving only one record.
- */
- rbno = be32_to_cpu(block->bb_rightsib);
- lbno = be32_to_cpu(block->bb_leftsib);
- bno = NULLAGBLOCK;
- ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
- /*
- * Duplicate the cursor so our btree manipulations here won't
- * disrupt the next level up.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- /*
- * If there's a right sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (rbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the last entry in the next block.
- * Actually any entry but the first would suffice.
- */
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * Grab a pointer to the block.
- */
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(right->bb_leftsib);
- /*
- * If right block is full enough so that removing one entry
- * won't make it too empty, and left-shifting an entry out
- * of right to us works, we're done.
- */
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_lshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level > 0 &&
- (error = xfs_alloc_decrement(cur, level,
- &i)))
- return error;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference, and fix up the temp cursor to point
- * to our block again (last record).
- */
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLAGBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
- }
- /*
- * If there's a left sibling, see if it's ok to shift an entry
- * out of it.
- */
- if (lbno != NULLAGBLOCK) {
- /*
- * Move the temp cursor to the first entry in the
- * previous block.
- */
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_decrement(tcur, level, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_btree_firstrec(tcur, level);
- /*
- * Grab a pointer to the block.
- */
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- goto error0;
-#endif
- /*
- * Grab the current block number, for future use.
- */
- bno = be32_to_cpu(left->bb_rightsib);
- /*
- * If left block is full enough so that removing one entry
- * won't make it too empty, and right-shifting an entry out
- * of left to us works, we're done.
- */
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
- if ((error = xfs_alloc_rshift(tcur, level, &i)))
- goto error0;
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_ALLOC_BLOCK_MINRECS(level, cur));
- xfs_btree_del_cursor(tcur,
- XFS_BTREE_NOERROR);
- if (level == 0)
- cur->bc_ptrs[0]++;
- *stat = 1;
- return 0;
- }
- }
- /*
- * Otherwise, grab the number of records in right for
- * future reference.
- */
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- /*
- * Delete the temp cursor, we're done with it.
- */
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- /*
- * If here, we need to do a join to keep the tree balanced.
- */
- ASSERT(bno != NULLAGBLOCK);
- /*
- * See if we can join with the left neighbor block.
- */
- if (lbno != NULLAGBLOCK &&
- lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "right" to be the starting block,
- * "left" to be the left neighbor.
- */
- rbno = bno;
- right = block;
- rrecs = be16_to_cpu(right->bb_numrecs);
- rbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- lrecs = be16_to_cpu(left->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- }
- /*
- * If that won't work, see if we can join with the right neighbor block.
- */
- else if (rbno != NULLAGBLOCK &&
- rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * Set "left" to be the starting block,
- * "right" to be the right neighbor.
- */
- lbno = bno;
- left = block;
- lrecs = be16_to_cpu(left->bb_numrecs);
- lbp = bp;
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- rrecs = be16_to_cpu(right->bb_numrecs);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- }
- /*
- * Otherwise, we can't fix the imbalance.
- * Just return. This is probably a logic error, but it's not fatal.
- */
- else {
- if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- *stat = 1;
- return 0;
- }
- /*
- * We're now going to join "left" and "right" by moving all the stuff
- * in "right" to "left" and deleting "right".
- */
- if (level > 0) {
- /*
- * It's a non-leaf. Move keys and pointers.
- */
- lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memcpy(lkp, rkp, rrecs * sizeof(*lkp));
- memcpy(lpp, rpp, rrecs * sizeof(*lpp));
- xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
- xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
- } else {
- /*
- * It's a leaf. Move records.
- */
- lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(lrp, rrp, rrecs * sizeof(*lrp));
- xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
- }
- /*
- * If we joined with the left neighbor, set the buffer in the
- * cursor to the left block, and fix up the index.
- */
- if (bp != lbp) {
- xfs_btree_setbuf(cur, level, lbp);
- cur->bc_ptrs[level] += lrecs;
- }
- /*
- * If we joined with the right neighbor and there's a level above
- * us, increment the cursor at that level.
- */
- else if (level + 1 < cur->bc_nlevels &&
- (error = xfs_alloc_increment(cur, level + 1, &i)))
- return error;
- /*
- * Fix up the number of records in the surviving block.
- */
- lrecs += rrecs;
- left->bb_numrecs = cpu_to_be16(lrecs);
- /*
- * Fix up the right block pointer in the surviving block, and log it.
- */
- left->bb_rightsib = right->bb_rightsib;
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there is a right sibling now, make it point to the
- * remaining block.
- */
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock;
- xfs_buf_t *rrbp;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * Free the deleting block by putting it on the freelist.
- */
- error = xfs_alloc_put_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, NULL, rbno, 1);
+STATIC int
+xfs_allocbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
if (error)
return error;
+
/*
- * Since blocks move to the free list without the coordination
- * used in xfs_bmap_finish, we can't allow block to be available
- * for reallocation and non-transaction writing (user data)
- * until we know that the transaction that moved it to the free
- * list is permanently on disk. We track the blocks by declaring
- * these blocks as "busy"; the busy list is maintained on a
- * per-ag basis and each transaction records which entries
- * should be removed when the iclog commits to disk. If a
- * busy block is allocated, the iclog is pushed up to the
+ * Since blocks move to the free list without the coordination used in
+ * xfs_bmap_finish, we can't allow block to be available for
+ * reallocation and non-transaction writing (user data) until we know
+ * that the transaction that moved it to the free list is permanently
+ * on disk. We track the blocks by declaring these blocks as "busy";
+ * the busy list is maintained on a per-ag basis and each transaction
+ * records which entries should be removed when the iclog commits to
+ * disk. If a busy block is allocated, the iclog is pushed up to the
* LSN that freed the block.
*/
xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
- /*
- * Adjust the current level's cursor so that we're left referring
- * to the right node, after we're done.
- * If this leaves the ptr value 0 our caller will fix it up.
- */
- if (level > 0)
- cur->bc_ptrs[level]--;
- /*
- * Return value means the next level up has something to do.
- */
- *stat = 2;
return 0;
-
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
}
/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
+ * Update the longest extent in the AGF
*/
-STATIC int /* error */
-xfs_alloc_insrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to insert record at */
- xfs_agblock_t *bnop, /* i/o: block number inserted */
- xfs_alloc_rec_t *recp, /* i/o: record data inserted */
- xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
- int *stat) /* output: success/failure */
+STATIC void
+xfs_allocbt_update_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_rec *rec,
+ int ptr,
+ int reason)
{
- xfs_agf_t *agf; /* allocation group freelist header */
- xfs_alloc_block_t *block; /* btree block record/key lives in */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value being inserted */
- xfs_alloc_key_t *kp; /* pointer to btree keys */
- xfs_agblock_t nbno; /* block number of allocated block */
- xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
- xfs_alloc_key_t nkey; /* new key value, from split */
- xfs_alloc_rec_t nrec; /* new record value, for caller */
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ __be32 len;
int numrecs;
- int optr; /* old ptr value */
- xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
- int ptr; /* index in btree block for this rec */
- xfs_alloc_rec_t *rp; /* pointer to btree records */
- ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
+ ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
+
+ switch (reason) {
+ case LASTREC_UPDATE:
+ /*
+ * If this is the last leaf block and it's the last record,
+ * then update the size of the longest extent in the AG.
+ */
+ if (ptr != xfs_btree_get_numrecs(block))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_INSREC:
+ if (be32_to_cpu(rec->alloc.ar_blockcount) <=
+ be32_to_cpu(agf->agf_longest))
+ return;
+ len = rec->alloc.ar_blockcount;
+ break;
+ case LASTREC_DELREC:
+ numrecs = xfs_btree_get_numrecs(block);
+ if (ptr <= numrecs)
+ return;
+ ASSERT(ptr == numrecs + 1);
- /*
- * GCC doesn't understand the (arguably complex) control flow in
- * this function and complains about uninitialized structure fields
- * without this.
- */
- memset(&nrec, 0, sizeof(nrec));
+ if (numrecs) {
+ xfs_alloc_rec_t *rrp;
- /*
- * If we made it to the root level, allocate a new root block
- * and we're done.
- */
- if (level >= cur->bc_nlevels) {
- XFS_STATS_INC(xs_abt_insrec);
- if ((error = xfs_alloc_newroot(cur, &i)))
- return error;
- *bnop = NULLAGBLOCK;
- *stat = i;
- return 0;
- }
- /*
- * Make a key out of the record data to be inserted, and save it.
- */
- key.ar_startblock = recp->ar_startblock;
- key.ar_blockcount = recp->ar_blockcount;
- optr = ptr = cur->bc_ptrs[level];
- /*
- * If we're off the left edge, return failure.
- */
- if (ptr == 0) {
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_abt_insrec);
- /*
- * Get pointers to the btree buffer and block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
- /*
- * Check that the new entry is being inserted in the right place.
- */
- if (ptr <= numrecs) {
- if (level == 0) {
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+ rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
+ len = rrp->ar_blockcount;
} else {
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- xfs_btree_check_key(cur->bc_btnum, &key, kp);
- }
- }
-#endif
- nbno = NULLAGBLOCK;
- ncur = NULL;
- /*
- * If the block is full, we can't insert the new entry until we
- * make the block un-full.
- */
- if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- /*
- * First, try shifting an entry to the right neighbor.
- */
- if ((error = xfs_alloc_rshift(cur, level, &i)))
- return error;
- if (i) {
- /* nothing */
- }
- /*
- * Next, try shifting an entry to the left neighbor.
- */
- else {
- if ((error = xfs_alloc_lshift(cur, level, &i)))
- return error;
- if (i)
- optr = ptr = cur->bc_ptrs[level];
- else {
- /*
- * Next, try splitting the current block in
- * half. If this works we have to re-set our
- * variables because we could be in a
- * different block now.
- */
- if ((error = xfs_alloc_split(cur, level, &nbno,
- &nkey, &ncur, &i)))
- return error;
- if (i) {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error =
- xfs_btree_check_sblock(cur,
- block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- nrec.ar_startblock = nkey.ar_startblock;
- nrec.ar_blockcount = nkey.ar_blockcount;
- }
- /*
- * Otherwise the insert fails.
- */
- else {
- *stat = 0;
- return 0;
- }
- }
- }
- }
- /*
- * At this point we know there's room for our new entry in the block
- * we're pointing at.
- */
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (level > 0) {
- /*
- * It's a non-leaf entry. Make a hole for the new data
- * in the key and ptr regions of the block.
- */
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
- return error;
+ len = 0;
}
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
- return error;
-#endif
- /*
- * Now stuff the new data in, bump numrecs and log the new data.
- */
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be32(*bnop);
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_keys(cur, bp, ptr, numrecs);
- xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
-#ifdef DEBUG
- if (ptr < numrecs)
- xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
- kp + ptr);
-#endif
- } else {
- /*
- * It's a leaf entry. Make a hole for the new record.
- */
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*rp));
- /*
- * Now stuff the new record in, bump numrecs
- * and log the new data.
- */
- rp[ptr - 1] = *recp;
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_alloc_log_recs(cur, bp, ptr, numrecs);
-#ifdef DEBUG
- if (ptr < numrecs)
- xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
- rp + ptr);
-#endif
- }
- /*
- * Log the new number of records in the btree header.
- */
- xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
- /*
- * If we inserted at the start of a block, update the parents' keys.
- */
- if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
- return error;
- /*
- * Look to see if the longest extent in the allocation group
- * needs to be updated.
- */
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- if (level == 0 &&
- cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
- /*
- * If this is a leaf in the by-size btree and there
- * is no right sibling block and this block is bigger
- * than the previous longest block, update it.
- */
- agf->agf_longest = recp->ar_blockcount;
- cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
- = be32_to_cpu(recp->ar_blockcount);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
+ break;
+ default:
+ ASSERT(0);
+ return;
}
- /*
- * Return the new block number, if any.
- * If there is one, give back a record value and a cursor too.
- */
- *bnop = nbno;
- if (nbno != NULLAGBLOCK) {
- *recp = nrec;
- *curp = ncur;
- }
- *stat = 1;
- return 0;
+
+ agf->agf_longest = len;
+ cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
+ xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
-/*
- * Log header fields from a btree block.
- */
-STATIC void
-xfs_alloc_log_block(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *bp, /* buffer containing btree block */
- int fields) /* mask of fields: XFS_BB_... */
+STATIC int
+xfs_allocbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
{
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- static const short offsets[] = { /* table of offsets */
- offsetof(xfs_alloc_block_t, bb_magic),
- offsetof(xfs_alloc_block_t, bb_level),
- offsetof(xfs_alloc_block_t, bb_numrecs),
- offsetof(xfs_alloc_block_t, bb_leftsib),
- offsetof(xfs_alloc_block_t, bb_rightsib),
- sizeof(xfs_alloc_block_t)
- };
+ return cur->bc_mp->m_alloc_mnr[level != 0];
+}
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
- xfs_trans_log_buf(tp, bp, first, last);
+STATIC int
+xfs_allocbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_alloc_mxr[level != 0];
}
-/*
- * Log keys from a btree block (nonleaf).
- */
STATIC void
-xfs_alloc_log_keys(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int kfirst, /* index of first key to log */
- int klast) /* index of last key to log */
+xfs_allocbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- xfs_alloc_key_t *kp; /* key pointer in btree block */
- int last; /* last byte offset logged */
+ ASSERT(rec->alloc.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ key->alloc.ar_startblock = rec->alloc.ar_startblock;
+ key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
-/*
- * Log block pointer fields from a btree block (nonleaf).
- */
STATIC void
-xfs_alloc_log_ptrs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int pfirst, /* index of first pointer to log */
- int plast) /* index of last pointer to log */
+xfs_allocbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
+ ASSERT(key->alloc.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ rec->alloc.ar_startblock = key->alloc.ar_startblock;
+ rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
}
-/*
- * Log records from a btree block (leaf).
- */
STATIC void
-xfs_alloc_log_recs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_buf_t *bp, /* buffer containing btree block */
- int rfirst, /* index of first record to log */
- int rlast) /* index of last record to log */
+xfs_allocbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
{
- xfs_alloc_block_t *block; /* btree block to log from */
- int first; /* first byte offset logged */
- int last; /* last byte offset logged */
- xfs_alloc_rec_t *rp; /* record pointer for btree block */
-
+ ASSERT(cur->bc_rec.a.ar_startblock != 0);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
-#ifdef DEBUG
- {
- xfs_agf_t *agf;
- xfs_alloc_rec_t *p;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
- ASSERT(be32_to_cpu(p->ar_startblock) +
- be32_to_cpu(p->ar_blockcount) <=
- be32_to_cpu(agf->agf_length));
- }
-#endif
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
+ rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
-/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
- */
-STATIC int /* error */
-xfs_alloc_lookup(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_lookup_t dir, /* <=, ==, or >= */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
{
- xfs_agblock_t agbno; /* a.g. relative btree block number */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_alloc_block_t *block=NULL; /* current btree block */
- int diff; /* difference for the current key */
- int error; /* error return value */
- int keyno=0; /* current key number */
- int level; /* level in the btree */
- xfs_mount_t *mp; /* file system mount point */
-
- XFS_STATS_INC(xs_abt_lookup);
- /*
- * Get the allocation group header, and the root block number.
- */
- mp = cur->bc_mp;
-
- {
- xfs_agf_t *agf; /* a.g. freespace header */
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agno = be32_to_cpu(agf->agf_seqno);
- agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
- }
- /*
- * Iterate over each level in the btree, starting at the root.
- * For each level above the leaves, find the key we need, based
- * on the lookup record, then follow the corresponding block
- * pointer down to the next level.
- */
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- xfs_buf_t *bp; /* buffer pointer for btree block */
- xfs_daddr_t d; /* disk address of btree block */
-
- /*
- * Get the disk address we're looking for.
- */
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- /*
- * If the old buffer at this level is for a different block,
- * throw it away, otherwise just use it.
- */
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = NULL;
- if (!bp) {
- /*
- * Need to get a new buffer. Read it, then
- * set it in the cursor, releasing the old one.
- */
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
- agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
- return error;
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Point to the btree block, now that we have the buffer
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, level,
- bp)))
- return error;
- } else
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- /*
- * If we already had a key match at a higher level, we know
- * we need to use the first entry in this block.
- */
- if (diff == 0)
- keyno = 1;
- /*
- * Otherwise we need to search this block. Do a binary search.
- */
- else {
- int high; /* high entry number */
- xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
- xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
- int low; /* low entry number */
-
- /*
- * Get a pointer to keys or records.
- */
- if (level > 0)
- kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
- else
- krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
- /*
- * Set low and high entry numbers, 1-based.
- */
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- /*
- * If the block is empty, the tree must
- * be an empty leaf.
- */
- ASSERT(level == 0 && cur->bc_nlevels == 1);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- *stat = 0;
- return 0;
- }
- /*
- * Binary search the block.
- */
- while (low <= high) {
- xfs_extlen_t blockcount; /* key value */
- xfs_agblock_t startblock; /* key value */
-
- XFS_STATS_INC(xs_abt_compare);
- /*
- * keyno is average of low and high.
- */
- keyno = (low + high) >> 1;
- /*
- * Get startblock & blockcount.
- */
- if (level > 0) {
- xfs_alloc_key_t *kkp;
-
- kkp = kkbase + keyno - 1;
- startblock = be32_to_cpu(kkp->ar_startblock);
- blockcount = be32_to_cpu(kkp->ar_blockcount);
- } else {
- xfs_alloc_rec_t *krp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- krp = krbase + keyno - 1;
- startblock = be32_to_cpu(krp->ar_startblock);
- blockcount = be32_to_cpu(krp->ar_blockcount);
- }
- /*
- * Compute difference to get next direction.
- */
- if (cur->bc_btnum == XFS_BTNUM_BNO)
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- else if (!(diff = (int)blockcount -
- (int)cur->bc_rec.a.ar_blockcount))
- diff = (int)startblock -
- (int)cur->bc_rec.a.ar_startblock;
- /*
- * Less than, move right.
- */
- if (diff < 0)
- low = keyno + 1;
- /*
- * Greater than, move left.
- */
- else if (diff > 0)
- high = keyno - 1;
- /*
- * Equal, we're done.
- */
- else
- break;
- }
- }
- /*
- * If there are more levels, set up for the next level
- * by getting the block number and filling in the cursor.
- */
- if (level > 0) {
- /*
- * If we moved left, need the previous key number,
- * unless there isn't one.
- */
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, agbno, level)))
- return error;
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- /*
- * Done with the search.
- * See if we need to adjust the results.
- */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE &&
- keyno > be16_to_cpu(block->bb_numrecs) &&
- be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
- int i;
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_alloc_increment(cur, 0, &i)))
- return error;
- XFS_WANT_CORRUPTED_RETURN(i == 1);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- /*
- * Return if we succeeded or not.
- */
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
- *stat = 0;
- else
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- return 0;
+ ptr->s = agf->agf_roots[cur->bc_btnum];
}
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_lshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
+STATIC __int64_t
+xfs_allocbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop index */
-#endif
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left neighbor block */
- xfs_alloc_block_t *left; /* left neighbor btree block */
- int nrec; /* new number of left block entries */
- xfs_buf_t *rbp; /* buffer for right (current) block */
- xfs_alloc_block_t *right; /* right (current) btree block */
- xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
- xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
- xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
- /*
- * Set up variables for this block as "right".
- */
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
-#endif
- /*
- * If we've got no left sibling then we can't shift an entry left.
- */
- if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] <= 1) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the left neighbor as "left".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
- 0, &lbp, XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) -
+ rec->ar_startblock;
}
- nrec = be16_to_cpu(left->bb_numrecs) + 1;
- /*
- * If non-leaf, copy a key and a ptr to the left block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
- lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_alloc_log_keys(cur, lbp, nrec, nrec);
- lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
- return error;
-#endif
- *lpp = *rpp;
- xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
- xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
- }
- /*
- * If leaf, copy a record to the left block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
+ diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+ if (diff)
+ return diff;
- lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_alloc_log_recs(cur, lbp, nrec, nrec);
- xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
- }
- /*
- * Bump and log left's numrecs, decrement and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Slide the contents of right down one entry.
- */
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
- level)))
- return error;
- }
-#endif
- memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- } else {
- memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- }
- /*
- * Update the parent key values of right.
- */
- if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
- return error;
- /*
- * Slide the cursor value left one.
- */
- cur->bc_ptrs[level]--;
- *stat = 1;
- return 0;
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-/*
- * Allocate a new root block, fill it in.
- */
-STATIC int /* error */
-xfs_alloc_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC int
+xfs_allocbt_kill_root(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int level,
+ union xfs_btree_ptr *newroot)
{
- int error; /* error return value */
- xfs_agblock_t lbno; /* left block number */
- xfs_buf_t *lbp; /* left btree buffer */
- xfs_alloc_block_t *left; /* left btree block */
- xfs_mount_t *mp; /* mount structure */
- xfs_agblock_t nbno; /* new block number */
- xfs_buf_t *nbp; /* new (root) buffer */
- xfs_alloc_block_t *new; /* new (root) btree block */
- int nptr; /* new value for key index, 1 or 2 */
- xfs_agblock_t rbno; /* right block number */
- xfs_buf_t *rbp; /* right btree buffer */
- xfs_alloc_block_t *right; /* right btree block */
-
- mp = cur->bc_mp;
+ int error;
- ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
- /*
- * Get a buffer from the freelist blocks, for the new root.
- */
- error = xfs_alloc_get_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, &nbno, 1);
- if (error)
- return error;
- /*
- * None available, we fail.
- */
- if (nbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
- 0);
- new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
- /*
- * Set the root data in the a.g. freespace structure.
- */
- {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, killroot);
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
- be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
- seqno = be32_to_cpu(agf->agf_seqno);
- mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_ROOTS | XFS_AGF_LEVELS);
- }
/*
- * At the previous root level there are now two blocks: the old
- * root, and the new block generated when it was split.
- * We don't know which one the cursor is pointing at, so we
- * set up variables "left" and "right" for each case.
+ * Update the root pointer, decreasing the level by 1 and then
+ * free the old root.
*/
- lbp = cur->bc_bufs[cur->bc_nlevels - 1];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
+ xfs_allocbt_set_root(cur, newroot, -1);
+ error = xfs_allocbt_free_block(cur, bp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
-#endif
- if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
- /*
- * Our block is left, pick up the right block.
- */
- lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
- rbno = be32_to_cpu(left->bb_rightsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, rbno, 0, &rbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right,
- cur->bc_nlevels - 1, rbp)))
- return error;
- nptr = 1;
- } else {
- /*
- * Our block is right, pick up the left block.
- */
- rbp = lbp;
- right = left;
- rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
- lbno = be32_to_cpu(right->bb_leftsib);
- if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.a.agno, lbno, 0, &lbp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
- if ((error = xfs_btree_check_sblock(cur, left,
- cur->bc_nlevels - 1, lbp)))
- return error;
- nptr = 2;
}
- /*
- * Fill in the new block's btree header and log it.
- */
- new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- new->bb_level = cpu_to_be16(cur->bc_nlevels);
- new->bb_numrecs = cpu_to_be16(2);
- new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
- ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
- /*
- * Fill in the key data in the new root.
- */
- {
- xfs_alloc_key_t *kp; /* btree key pointer */
- kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
- if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
- kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
- } else {
- xfs_alloc_rec_t *rp; /* btree record pointer */
+ XFS_BTREE_STATS_INC(cur, free);
- rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
- kp[0].ar_startblock = rp->ar_startblock;
- kp[0].ar_blockcount = rp->ar_blockcount;
- rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- kp[1].ar_startblock = rp->ar_startblock;
- kp[1].ar_blockcount = rp->ar_blockcount;
- }
- }
- xfs_alloc_log_keys(cur, nbp, 1, 2);
- /*
- * Fill in the pointer data in the new root.
- */
- {
- xfs_alloc_ptr_t *pp; /* btree address pointer */
+ xfs_btree_setbuf(cur, level, NULL);
+ cur->bc_nlevels--;
- pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
- pp[0] = cpu_to_be32(lbno);
- pp[1] = cpu_to_be32(rbno);
- }
- xfs_alloc_log_ptrs(cur, nbp, 1, 2);
- /*
- * Fix up the cursor.
- */
- xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
- cur->bc_ptrs[cur->bc_nlevels] = nptr;
- cur->bc_nlevels++;
- *stat = 1;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_alloc_rshift(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to shift record on */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop index */
- xfs_alloc_key_t key; /* key value for leaf level upward */
- xfs_buf_t *lbp; /* buffer for left (current) block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_buf_t *rbp; /* buffer for right neighbor block */
- xfs_alloc_block_t *right; /* right neighbor btree block */
- xfs_alloc_key_t *rkp; /* key pointer for right block */
- xfs_btree_cur_t *tcur; /* temporary cursor */
-
- /*
- * Set up variables for this block as "left".
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * If we've got no right sibling then we can't shift an entry right.
- */
- if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * If the cursor entry is the one that would be moved, don't
- * do it... it's too complicated.
- */
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- *stat = 0;
- return 0;
- }
- /*
- * Set up the right neighbor as "right".
- */
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
- 0, &rbp, XFS_ALLOC_BTREE_REF)))
- return error;
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
- return error;
- /*
- * If it's full, it can't take another entry.
- */
- if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
- *stat = 0;
- return 0;
- }
- /*
- * Make a hole at the start of the right neighbor block, then
- * copy the last left block entry to the hole.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* key pointer for left block */
- xfs_alloc_ptr_t *lpp; /* address pointer for left block */
- xfs_alloc_ptr_t *rpp; /* address pointer for right block */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
- return error;
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
- return error;
-#endif
- *rkp = *lkp;
- *rpp = *lpp;
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+STATIC int
+xfs_allocbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
} else {
- xfs_alloc_rec_t *lrp; /* record pointer for left block */
- xfs_alloc_rec_t *rrp; /* record pointer for right block */
-
- lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.ar_startblock = rrp->ar_startblock;
- key.ar_blockcount = rrp->ar_blockcount;
- rkp = &key;
- xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
}
- /*
- * Decrement and log left's numrecs, bump and log right's numrecs.
- */
- be16_add_cpu(&left->bb_numrecs, -1);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, 1);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
- /*
- * Using a temporary cursor, update the parent key values of the
- * block on the right.
- */
- if ((error = xfs_btree_dup_cursor(cur, &tcur)))
- return error;
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_alloc_increment(tcur, level, &i)) ||
- (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
- goto error0;
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- *stat = 1;
- return 0;
-error0:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
}
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_alloc_split(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level to split */
- xfs_agblock_t *bnop, /* output: block number allocated */
- xfs_alloc_key_t *keyp, /* output: first key of new block */
- xfs_btree_cur_t **curp, /* output: new cursor */
- int *stat) /* success/failure */
+STATIC int
+xfs_allocbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
{
- int error; /* error return value */
- int i; /* loop index/record number */
- xfs_agblock_t lbno; /* left (current) block number */
- xfs_buf_t *lbp; /* buffer for left block */
- xfs_alloc_block_t *left; /* left (current) btree block */
- xfs_agblock_t rbno; /* right (new) block number */
- xfs_buf_t *rbp; /* buffer for right block */
- xfs_alloc_block_t *right; /* right (new) btree block */
-
- /*
- * Allocate the new block from the freelist.
- * If we can't do it, we're toast. Give up.
- */
- error = xfs_alloc_get_freelist(cur->bc_tp,
- cur->bc_private.a.agbp, &rbno, 1);
- if (error)
- return error;
- if (rbno == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
- rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
- rbno, 0);
- /*
- * Set up the new block as "right".
- */
- right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
- /*
- * "Left" is the current (according to the cursor) block.
- */
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
- return error;
-#endif
- /*
- * Fill in the btree header for the new block.
- */
- right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- /*
- * Make sure that if there's an odd number of entries now, that
- * each new block will have the same number of entries.
- */
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add_cpu(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- /*
- * For non-leaf blocks, copy keys and addresses over to the new block.
- */
- if (level > 0) {
- xfs_alloc_key_t *lkp; /* left btree key pointer */
- xfs_alloc_ptr_t *lpp; /* left btree address pointer */
- xfs_alloc_key_t *rkp; /* right btree key pointer */
- xfs_alloc_ptr_t *rpp; /* right btree address pointer */
-
- lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
- rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
- rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
- return error;
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *keyp = *rkp;
+ if (cur->bc_btnum == XFS_BTNUM_BNO) {
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+ } else {
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
- /*
- * For leaf blocks, copy records over to the new block.
- */
- else {
- xfs_alloc_rec_t *lrp; /* left btree record pointer */
- xfs_alloc_rec_t *rrp; /* right btree record pointer */
+}
+#endif /* DEBUG */
- lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
- rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->ar_startblock = rrp->ar_startblock;
- keyp->ar_blockcount = rrp->ar_blockcount;
- }
- /*
- * Find the left block number by looking in the buffer.
- * Adjust numrecs, sibling pointers.
- */
- lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
- be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be32(rbno);
- right->bb_leftsib = cpu_to_be32(lbno);
- xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
- xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- /*
- * If there's a block to the new block's right, make that block
- * point back to right instead of to left.
- */
- if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
- xfs_alloc_block_t *rrblock; /* rr btree block */
- xfs_buf_t *rrbp; /* buffer for rrblock */
+#ifdef XFS_BTREE_TRACE
+ktrace_t *xfs_allocbt_trace_buf;
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
- &rrbp, XFS_ALLOC_BTREE_REF)))
- return error;
- rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
- if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
- return error;
- rrblock->bb_leftsib = cpu_to_be32(rbno);
- xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
- }
- /*
- * If the cursor is really in the right block, move it there.
- * If it's just pointing past the last entry in left, then we'll
- * insert there, so don't change anything in that case.
- */
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- /*
- * If there are more levels, we'll need another cursor which refers to
- * the right block, no matter where this cursor was.
- */
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp)))
- return error;
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = rbno;
- *stat = 1;
- return 0;
+STATIC void
+xfs_allocbt_trace_enter(
+ struct xfs_btree_cur *cur,
+ const char *func,
+ char *s,
+ int type,
+ int line,
+ __psunsigned_t a0,
+ __psunsigned_t a1,
+ __psunsigned_t a2,
+ __psunsigned_t a3,
+ __psunsigned_t a4,
+ __psunsigned_t a5,
+ __psunsigned_t a6,
+ __psunsigned_t a7,
+ __psunsigned_t a8,
+ __psunsigned_t a9,
+ __psunsigned_t a10)
+{
+ ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
+ (void *)func, (void *)s, NULL, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
}
-/*
- * Update keys at all levels from here to the root along the cursor's path.
- */
-STATIC int /* error */
-xfs_alloc_updkey(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_alloc_key_t *keyp, /* new key value to update to */
- int level) /* starting level for update */
+STATIC void
+xfs_allocbt_trace_cursor(
+ struct xfs_btree_cur *cur,
+ __uint32_t *s0,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- int ptr; /* index of key in block */
-
- /*
- * Go up the tree from this level toward the root.
- * At each level, update the key value to the value input.
- * Stop when we reach a level where the cursor isn't pointing
- * at the first entry in the block.
- */
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* buffer for block */
-#ifdef DEBUG
- int error; /* error return value */
-#endif
- xfs_alloc_key_t *kp; /* ptr to btree block keys */
-
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_alloc_log_keys(cur, bp, ptr, ptr);
- }
- return 0;
+ *s0 = cur->bc_private.a.agno;
+ *l0 = cur->bc_rec.a.ar_startblock;
+ *l1 = cur->bc_rec.a.ar_blockcount;
}
-/*
- * Externally visible routines.
- */
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_alloc_decrement(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_trace_key(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ __uint64_t *l0,
+ __uint64_t *l1)
{
- xfs_alloc_block_t *block; /* btree block */
- int error; /* error return value */
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the left at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- /*
- * Decrement the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (--cur->bc_ptrs[level] > 0) {
- *stat = 1;
- return 0;
- }
- /*
- * Get a pointer to the btree block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level,
- cur->bc_bufs[level])))
- return error;
-#endif
- /*
- * If we just went off the left edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree decrementing pointers.
- * Stop when we don't go off the left edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- /*
- * Read-ahead the left block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
- xfs_buf_t *bp; /* buffer pointer for block */
-
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
- }
- *stat = 1;
- return 0;
+ *l0 = be32_to_cpu(key->alloc.ar_startblock);
+ *l1 = be32_to_cpu(key->alloc.ar_blockcount);
}
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-int /* error */
-xfs_alloc_delete(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
+STATIC void
+xfs_allocbt_trace_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ __uint64_t *l0,
+ __uint64_t *l1,
+ __uint64_t *l2)
{
- int error; /* error return value */
- int i; /* result code */
- int level; /* btree level */
-
- /*
- * Go up the tree, starting at leaf level.
- * If 2 is returned then a join was done; go to the next level.
- * Otherwise we are done.
- */
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_alloc_delrec(cur, level, &i)))
- return error;
- }
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_alloc_decrement(cur, level, &i)))
- return error;
- break;
- }
- }
- }
- *stat = i;
- return 0;
+ *l0 = be32_to_cpu(rec->alloc.ar_startblock);
+ *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
+ *l2 = 0;
}
+#endif /* XFS_BTREE_TRACE */
+
+static const struct xfs_btree_ops xfs_allocbt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .kill_root = xfs_allocbt_kill_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_rec_from_key = xfs_allocbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_allocbt_key_diff,
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_alloc_get_rec(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t *bno, /* output: starting block of extent */
- xfs_extlen_t *len, /* output: length of extent */
- int *stat) /* output: success/failure */
-{
- xfs_alloc_block_t *block; /* btree block */
#ifdef DEBUG
- int error; /* error return value */
+ .keys_inorder = xfs_allocbt_keys_inorder,
+ .recs_inorder = xfs_allocbt_recs_inorder,
#endif
- int ptr; /* record number */
- ptr = cur->bc_ptrs[0];
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
+#ifdef XFS_BTREE_TRACE
+ .trace_enter = xfs_allocbt_trace_enter,
+ .trace_cursor = xfs_allocbt_trace_cursor,
+ .trace_key = xfs_allocbt_trace_key,
+ .trace_record = xfs_allocbt_trace_record,
#endif
- /*
- * Off the right end or left end, return failure.
- */
- if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
- *stat = 0;
- return 0;
- }
- /*
- * Point to the record and extract its data.
- */
- {
- xfs_alloc_rec_t *rec; /* record data */
-
- rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- *bno = be32_to_cpu(rec->ar_startblock);
- *len = be32_to_cpu(rec->ar_blockcount);
- }
- *stat = 1;
- return 0;
-}
+};
/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
+ * Allocate a new allocation btree cursor.
*/
-int /* error */
-xfs_alloc_increment(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree, 0 is leaf */
- int *stat) /* success/failure */
+struct xfs_btree_cur * /* new alloc btree cursor */
+xfs_allocbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agf structure */
+ xfs_agnumber_t agno, /* allocation group number */
+ xfs_btnum_t btnum) /* btree identifier */
{
- xfs_alloc_block_t *block; /* btree block */
- xfs_buf_t *bp; /* tree block buffer */
- int error; /* error return value */
- int lev; /* btree level */
-
- ASSERT(level < cur->bc_nlevels);
- /*
- * Read-ahead to the right at this level.
- */
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- /*
- * Get a pointer to the btree block.
- */
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
- return error;
-#endif
- /*
- * Increment the ptr at this level. If we're still in the block
- * then we're done.
- */
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- *stat = 1;
- return 0;
- }
- /*
- * If we just went off the right edge of the tree, return failure.
- */
- if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
- *stat = 0;
- return 0;
- }
- /*
- * March up the tree incrementing pointers.
- * Stop when we don't go off the right edge of a block.
- */
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- bp = cur->bc_bufs[lev];
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
-#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- /*
- * Read-ahead the right block, we're going to read it
- * in the next loop.
- */
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- /*
- * If we went off the root then we are seriously confused.
- */
- ASSERT(lev < cur->bc_nlevels);
- /*
- * Now walk back down the tree, fixing up the cursor's buffer
- * pointers and key numbers.
- */
- for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- lev > level; ) {
- xfs_agblock_t agbno; /* block number of btree block */
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
- agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.a.agno, agbno, 0, &bp,
- XFS_ALLOC_BTREE_REF)))
- return error;
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_ALLOC_BLOCK(bp);
- if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
- return error;
- cur->bc_ptrs[lev] = 1;
- }
- *stat = 1;
- return 0;
-}
+ ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-int /* error */
-xfs_alloc_insert(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* result value, 0 for failure */
- int level; /* current level number in btree */
- xfs_agblock_t nbno; /* new block number (split result) */
- xfs_btree_cur_t *ncur; /* new cursor (split result) */
- xfs_alloc_rec_t nrec; /* record being inserted this level */
- xfs_btree_cur_t *pcur; /* previous level's cursor */
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
- level = 0;
- nbno = NULLAGBLOCK;
- nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
- nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
- ncur = NULL;
- pcur = cur;
- /*
- * Loop going up the tree, starting at the leaf level.
- * Stop when we don't get a split block, that must mean that
- * the insert is finished with this level.
- */
- do {
- /*
- * Insert nrec/nbno into this level of the tree.
- * Note if we fail, nbno will be null.
- */
- if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- return error;
- }
- /*
- * See if the cursor we just used is trash.
- * Can't trash the caller's cursor, but otherwise we should
- * if ncur is a new cursor or we're about to be done.
- */
- if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- /*
- * If we got a new cursor, switch to it.
- */
- if (ncur) {
- pcur = ncur;
- ncur = NULL;
- }
- } while (nbno != NULLAGBLOCK);
- *stat = i;
- return 0;
-}
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
+ cur->bc_btnum = btnum;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_eq(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
+ cur->bc_ops = &xfs_allocbt_ops;
+ if (btnum == XFS_BTNUM_CNT)
+ cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_ge(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
-}
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
- */
-int /* error */
-xfs_alloc_lookup_le(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* success/failure */
-{
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
+ return cur;
}
/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
+ * Calculate number of records in an alloc btree block.
*/
-int /* error */
-xfs_alloc_update(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t bno, /* starting block of extent */
- xfs_extlen_t len) /* length of extent */
+int
+xfs_allocbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
{
- xfs_alloc_block_t *block; /* btree block to update */
- int error; /* error return value */
- int ptr; /* current record number (updating) */
+ blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
- ASSERT(len > 0);
- /*
- * Pick up the a.g. freelist struct and the current block.
- */
- block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
-#ifdef DEBUG
- if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
- return error;
-#endif
- /*
- * Get the address of the rec to be updated.
- */
- ptr = cur->bc_ptrs[0];
- {
- xfs_alloc_rec_t *rp; /* pointer to updated record */
-
- rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
- /*
- * Fill in the new contents and log them.
- */
- rp->ar_startblock = cpu_to_be32(bno);
- rp->ar_blockcount = cpu_to_be32(len);
- xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
- }
- /*
- * If it's the by-size btree and it's the last leaf block and
- * it's the last record... then update the size of the longest
- * extent in the a.g., which we cache in the a.g. freelist header.
- */
- if (cur->bc_btnum == XFS_BTNUM_CNT &&
- be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr == be16_to_cpu(block->bb_numrecs)) {
- xfs_agf_t *agf; /* a.g. freespace header */
- xfs_agnumber_t seqno;
-
- agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
- seqno = be32_to_cpu(agf->agf_seqno);
- cur->bc_mp->m_perag[seqno].pagf_longest = len;
- agf->agf_longest = cpu_to_be32(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
- XFS_AGF_LONGEST);
- }
- /*
- * Updating first record in leaf. Pass new key value up to our parent.
- */
- if (ptr == 1) {
- xfs_alloc_key_t key; /* key containing [bno, len] */
-
- key.ar_startblock = cpu_to_be32(bno);
- key.ar_blockcount = cpu_to_be32(len);
- if ((error = xfs_alloc_updkey(cur, &key, 1)))
- return error;
- }
- return 0;
+ if (leaf)
+ return blocklen / sizeof(xfs_alloc_rec_t);
+ return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 5bd1a2c8bd07..a6caa0022c9b 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,7 +24,6 @@
struct xfs_buf;
struct xfs_btree_cur;
-struct xfs_btree_sblock;
struct xfs_mount;
/*
@@ -50,16 +49,6 @@ typedef struct xfs_alloc_rec_incore {
/* btree pointer type */
typedef __be32 xfs_alloc_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_sblock xfs_alloc_block_t;
-
-#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
-
-/*
- * Real block structures have a size equal to the disk block size.
- */
-#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
-#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
/*
* Minimum and maximum blocksize and sectorsize.
@@ -83,73 +72,39 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
/*
- * Record, key, and pointer address macros for btree blocks.
- */
-#define XFS_ALLOC_REC_ADDR(bb,i,cur) \
- XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
-
-#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
- XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
-
-#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
- XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Get the data from the pointed-to record.
- */
-extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
- xfs_extlen_t *len, int *stat);
-
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
-
-/*
- * Insert the current record at the point referenced by cur.
- * The cursor may be inconsistent on return if splits have been done.
- */
-extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
-
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
-
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
+ * Btree block header size depends on a superblock flag.
+ *
+ * (not quite yet, but soon)
*/
-extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, int *stat);
+#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
/*
- * Update the record referred to by cur, to the value given by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len);
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_ALLOC_REC_ADDR(mp, block, index) \
+ ((xfs_alloc_rec_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (((index) - 1) * sizeof(xfs_alloc_rec_t))))
+
+#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
+ ((xfs_alloc_key_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_alloc_key_t)))
+
+#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_alloc_ptr_t *) \
+ ((char *)(block) + \
+ XFS_ALLOC_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_alloc_key_t) + \
+ ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_buf *,
+ xfs_agnumber_t, xfs_btnum_t);
+extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 0b3b5efe848c..53d5e70d1360 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -41,21 +41,36 @@
#endif
#ifdef XFS_NATIVE_HOST
-#define cpu_to_be16(val) ((__be16)(val))
-#define cpu_to_be32(val) ((__be32)(val))
-#define cpu_to_be64(val) ((__be64)(val))
-#define be16_to_cpu(val) ((__uint16_t)(val))
-#define be32_to_cpu(val) ((__uint32_t)(val))
-#define be64_to_cpu(val) ((__uint64_t)(val))
+#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
+#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
+#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
+#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
+#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
+#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
#else
-#define cpu_to_be16(val) (__swab16((__uint16_t)(val)))
-#define cpu_to_be32(val) (__swab32((__uint32_t)(val)))
-#define cpu_to_be64(val) (__swab64((__uint64_t)(val)))
-#define be16_to_cpu(val) (__swab16((__be16)(val)))
-#define be32_to_cpu(val) (__swab32((__be32)(val)))
-#define be64_to_cpu(val) (__swab64((__be64)(val)))
+#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
+#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
+#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
+#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
+#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
+#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
#endif
+static inline void be16_add_cpu(__be16 *a, __s16 b)
+{
+ *a = cpu_to_be16(be16_to_cpu(*a) + b);
+}
+
+static inline void be32_add_cpu(__be32 *a, __s32 b)
+{
+ *a = cpu_to_be32(be32_to_cpu(*a) + b);
+}
+
+static inline void be64_add_cpu(__be64 *a, __s64 b)
+{
+ *a = cpu_to_be64(be64_to_cpu(*a) + b);
+}
+
#endif /* __KERNEL__ */
/* do we need conversion? */
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 8e0e463dae2d..bca7b243c319 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v)
/* Get low bit set out of 32-bit argument, -1 if none set */
static inline int xfs_lowbit32(__uint32_t v)
{
- unsigned long t = v;
- return (v) ? find_first_bit(&t, 32) : -1;
+ return ffs(v) - 1;
}
/* Get low bit set out of 64-bit argument, -1 if none set */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a1aab9275d5a..138308e70d14 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -393,8 +393,8 @@ xfs_bmap_count_leaves(
STATIC void
xfs_bmap_disk_count_leaves(
- xfs_extnum_t idx,
- xfs_bmbt_block_t *block,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
int numrecs,
int *count);
@@ -402,6 +402,53 @@ xfs_bmap_disk_count_leaves(
* Bmap internal routines.
*/
+STATIC int /* error */
+xfs_bmbt_lookup_eq(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.b.br_startoff = off;
+ cur->bc_rec.b.br_startblock = bno;
+ cur->bc_rec.b.br_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+STATIC int /* error */
+xfs_bmbt_lookup_ge(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.b.br_startoff = off;
+ cur->bc_rec.b.br_startblock = bno;
+ cur->bc_rec.b.br_blockcount = len;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+* Update the record referred to by cur to the value given
+ * by [off, bno, len, state].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_bmbt_update(
+ struct xfs_btree_cur *cur,
+ xfs_fileoff_t off,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ xfs_exntst_t state)
+{
+ union xfs_btree_rec rec;
+
+ xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
+ return xfs_btree_update(cur, &rec);
+}
+
/*
* Called from xfs_bmap_add_attrfork to handle btree format files.
*/
@@ -422,15 +469,14 @@ xfs_bmap_add_attrfork_btree(
if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
*flags |= XFS_ILOG_DBROOT;
else {
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- XFS_DATA_FORK);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
cur->bc_private.b.flist = flist;
cur->bc_private.b.firstblock = *firstblock;
if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
goto error0;
/* must be at least one entry */
XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
- if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
+ if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -818,10 +864,10 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -931,7 +977,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1007,7 +1053,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1097,7 +1143,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1152,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1379,16 +1425,16 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1428,10 +1474,10 @@ xfs_bmap_add_extent_unwritten_real(
&i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1471,10 +1517,10 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
@@ -1557,7 +1603,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_blockcount - new->br_blockcount,
oldext)))
goto done;
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
@@ -1605,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real(
oldext)))
goto done;
cur->bc_rec.b = *new;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1647,7 +1693,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_blockcount - new->br_blockcount,
oldext)))
goto done;
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto done;
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
@@ -1695,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -1743,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real(
cur->bc_rec.b = PREV;
cur->bc_rec.b.br_blockcount =
new->br_startoff - PREV.br_startoff;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/*
@@ -1758,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real(
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
/* new middle extent - newext */
cur->bc_rec.b.br_state = new->br_state;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -2106,10 +2152,10 @@ xfs_bmap_add_extent_hole_real(
right.br_blockcount, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_decrement(cur, 0, &i)))
+ if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
@@ -2218,7 +2264,7 @@ xfs_bmap_add_extent_hole_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = new->br_state;
- if ((error = xfs_bmbt_insert(cur, &i)))
+ if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
@@ -2996,24 +3042,24 @@ xfs_bmap_btree_to_extents(
int whichfork) /* data or attr fork */
{
/* REFERENCED */
- xfs_bmbt_block_t *cblock;/* child btree block */
+ struct xfs_btree_block *cblock;/* child btree block */
xfs_fsblock_t cbno; /* child block number */
xfs_buf_t *cbp; /* child block's buffer */
int error; /* error return value */
xfs_ifork_t *ifp; /* inode fork data */
xfs_mount_t *mp; /* mount point structure */
__be64 *pp; /* ptr to block address */
- xfs_bmbt_block_t *rblock;/* root btree block */
+ struct xfs_btree_block *rblock;/* root btree block */
+ mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_flags & XFS_IFEXTENTS);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
rblock = ifp->if_broot;
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
- ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
- mp = ip->i_mount;
- pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
+ ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
*logflagsp = 0;
#ifdef DEBUG
@@ -3023,8 +3069,8 @@ xfs_bmap_btree_to_extents(
if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
XFS_BMAP_BTREE_REF)))
return error;
- cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
- if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp)))
+ cblock = XFS_BUF_TO_BLOCK(cbp);
+ if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
@@ -3170,7 +3216,7 @@ xfs_bmap_del_extent(
flags |= XFS_ILOG_FEXT(whichfork);
break;
}
- if ((error = xfs_bmbt_delete(cur, &i)))
+ if ((error = xfs_btree_delete(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
break;
@@ -3254,10 +3300,10 @@ xfs_bmap_del_extent(
got.br_startblock, temp,
got.br_state)))
goto done;
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ if ((error = xfs_btree_increment(cur, 0, &i)))
goto done;
cur->bc_rec.b = new;
- error = xfs_bmbt_insert(cur, &i);
+ error = xfs_btree_insert(cur, &i);
if (error && error != ENOSPC)
goto done;
/*
@@ -3404,11 +3450,11 @@ xfs_bmap_extents_to_btree(
int *logflagsp, /* inode logging flags */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *ablock; /* allocated (child) bt block */
+ struct xfs_btree_block *ablock; /* allocated (child) bt block */
xfs_buf_t *abp; /* buffer for ablock */
xfs_alloc_arg_t args; /* allocation arguments */
xfs_bmbt_rec_t *arp; /* child record pointer */
- xfs_bmbt_block_t *block; /* btree root block */
+ struct xfs_btree_block *block; /* btree root block */
xfs_btree_cur_t *cur; /* bmap btree cursor */
xfs_bmbt_rec_host_t *ep; /* extent record pointer */
int error; /* error return value */
@@ -3428,6 +3474,7 @@ xfs_bmap_extents_to_btree(
*/
xfs_iroot_realloc(ip, 1, whichfork);
ifp->if_flags |= XFS_IFBROOT;
+
/*
* Fill in the root.
*/
@@ -3435,14 +3482,14 @@ xfs_bmap_extents_to_btree(
block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
block->bb_level = cpu_to_be16(1);
block->bb_numrecs = cpu_to_be16(1);
- block->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- block->bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
mp = ip->i_mount;
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- whichfork);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
@@ -3489,12 +3536,12 @@ xfs_bmap_extents_to_btree(
/*
* Fill in the child block.
*/
- ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
+ ablock = XFS_BUF_TO_BLOCK(abp);
ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
ablock->bb_level = 0;
- ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
- arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+ ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
for (cnt = i = 0; i < nextents; i++) {
ep = xfs_iext_get_ext(ifp, i);
@@ -3505,21 +3552,24 @@ xfs_bmap_extents_to_btree(
}
}
ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
- ablock->bb_numrecs = cpu_to_be16(cnt);
+ xfs_btree_set_numrecs(ablock, cnt);
+
/*
* Fill in the root key and pointer.
*/
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+ kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
+ arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
+ be16_to_cpu(block->bb_level)));
*pp = cpu_to_be64(args.fsbno);
+
/*
* Do all this logging at the end so that
* the root is at the right level.
*/
- xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
+ xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
+ xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
ASSERT(*curp == NULL);
*curp = cur;
*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -4176,7 +4226,7 @@ xfs_bmap_compute_maxlevels(
maxleafents = MAXAEXTNUM;
sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
}
- maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+ maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
minleafrecs = mp->m_bmap_dmnr[0];
minnoderecs = mp->m_bmap_dmnr[1];
maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -4242,9 +4292,15 @@ xfs_bmap_finish(
* We have a new transaction, so we should return committed=1,
* even though we're returning an error.
*/
- if (error) {
+ if (error)
return error;
- }
+
+ /*
+ * transaction commit worked ok so we can drop the extra ticket
+ * reference that we gained in xfs_trans_dup()
+ */
+ xfs_log_ticket_put(ntp->t_ticket);
+
if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
logcount)))
return error;
@@ -4474,6 +4530,22 @@ xfs_bmap_one_block(
return rval;
}
+STATIC int
+xfs_bmap_sanity_check(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ int level)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+ be16_to_cpu(block->bb_level) != level ||
+ be16_to_cpu(block->bb_numrecs) == 0 ||
+ be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
+ return 0;
+ return 1;
+}
+
/*
* Read in the extents to if_extents.
* All inode fields are set up by caller, we just traverse the btree
@@ -4486,7 +4558,7 @@ xfs_bmap_read_extents(
xfs_inode_t *ip, /* incore inode */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
@@ -4510,7 +4582,7 @@ xfs_bmap_read_extents(
*/
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -4523,13 +4595,13 @@ xfs_bmap_read_extents(
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
return error;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, level),
+ xfs_bmap_sanity_check(mp, bp, level),
error0);
if (level == 0)
break;
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
xfs_trans_brelse(tp, bp);
@@ -4549,7 +4621,7 @@ xfs_bmap_read_extents(
xfs_extnum_t start;
- num_recs = be16_to_cpu(block->bb_numrecs);
+ num_recs = xfs_btree_get_numrecs(block);
if (unlikely(i + num_recs > room)) {
ASSERT(i + num_recs <= room);
xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
@@ -4561,18 +4633,18 @@ xfs_bmap_read_extents(
goto error0;
}
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, 0),
+ xfs_bmap_sanity_check(mp, bp, 0),
error0);
/*
* Read-ahead the next leaf block, if any.
*/
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
if (nextbno != NULLFSBLOCK)
xfs_btree_reada_bufl(mp, nextbno, 1);
/*
* Copy records into the extent records.
*/
- frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
+ frp = XFS_BMBT_REC_ADDR(mp, block, 1);
start = i;
for (j = 0; j < num_recs; j++, i++, frp++) {
xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
@@ -4603,7 +4675,7 @@ xfs_bmap_read_extents(
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
return error;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
@@ -5029,8 +5101,7 @@ xfs_bmapi(
if (abno == NULLFSBLOCK)
break;
if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_btree_init_cursor(mp,
- tp, NULL, 0, XFS_BTNUM_BMAP,
+ cur = xfs_bmbt_init_cursor(mp, tp,
ip, whichfork);
cur->bc_private.b.firstblock =
*firstblock;
@@ -5147,9 +5218,8 @@ xfs_bmapi(
*/
ASSERT(mval->br_blockcount <= len);
if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_btree_init_cursor(mp,
- tp, NULL, 0, XFS_BTNUM_BMAP,
- ip, whichfork);
+ cur = xfs_bmbt_init_cursor(mp,
+ tp, ip, whichfork);
cur->bc_private.b.firstblock =
*firstblock;
cur->bc_private.b.flist = flist;
@@ -5440,8 +5510,7 @@ xfs_bunmapi(
logflags = 0;
if (ifp->if_flags & XFS_IFBROOT) {
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
- cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
- whichfork);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
@@ -5742,14 +5811,17 @@ error0:
STATIC int
xfs_getbmapx_fix_eof_hole(
xfs_inode_t *ip, /* xfs incore inode pointer */
- struct getbmap *out, /* output structure */
+ struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
- * preallocated data space */
+ * preallocated data space */
__int64_t end, /* last block requested */
xfs_fsblock_t startblock)
{
__int64_t fixlen;
xfs_mount_t *mp; /* file system mount point */
+ xfs_ifork_t *ifp; /* inode fork pointer */
+ xfs_extnum_t lastx; /* last extent pointer */
+ xfs_fileoff_t fileblock;
if (startblock == HOLESTARTBLOCK) {
mp = ip->i_mount;
@@ -5763,21 +5835,33 @@ xfs_getbmapx_fix_eof_hole(
out->bmv_length = fixlen;
}
} else {
- out->bmv_block = XFS_FSB_TO_DB(ip, startblock);
+ if (startblock == DELAYSTARTBLOCK)
+ out->bmv_block = -2;
+ else
+ out->bmv_block = XFS_FSB_TO_DB(ip, startblock);
+ fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
+ (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
+ out->bmv_oflags |= BMV_OF_LAST;
}
return 1;
}
/*
- * Fcntl interface to xfs_bmapi.
+ * Get inode's extents as described in bmv, and format for output.
+ * Calls formatter to fill the user's buffer until all extents
+ * are mapped, until the passed-in bmv->bmv_count slots have
+ * been filled, or until the formatter short-circuits the loop,
+ * if it is tracking filled-in extents on its own.
*/
int /* error code */
xfs_getbmap(
xfs_inode_t *ip,
- struct getbmap *bmv, /* user bmap structure */
- void __user *ap, /* pointer to user's array */
- int interface) /* interface flags */
+ struct getbmapx *bmv, /* user bmap structure */
+ xfs_bmap_format_t formatter, /* format to user */
+ void *arg) /* formatter arg */
{
__int64_t bmvend; /* last block requested */
int error; /* return value */
@@ -5790,19 +5874,17 @@ xfs_getbmap(
int nexleft; /* # of user extents left */
int subnex; /* # of bmapi's can do */
int nmap; /* number of map entries */
- struct getbmap out; /* output structure */
+ struct getbmapx out; /* output structure */
int whichfork; /* data or attr fork */
int prealloced; /* this is a file with
* preallocated data space */
- int sh_unwritten; /* true, if unwritten */
- /* extents listed separately */
+ int iflags; /* interface flags */
int bmapi_flags; /* flags for xfs_bmapi */
- __int32_t oflags; /* getbmapx bmv_oflags field */
mp = ip->i_mount;
+ iflags = bmv->bmv_iflags;
- whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
- sh_unwritten = (interface & BMV_IF_PREALLOC) != 0;
+ whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
/* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
* generate a DMAPI read event. Otherwise, if the DM_EVENT_READ
@@ -5817,7 +5899,7 @@ xfs_getbmap(
* could misinterpret holes in a DMAPI file as true holes,
* when in fact they may represent offline user data.
*/
- if ((interface & BMV_IF_NO_DMAPI_READ) == 0 &&
+ if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
whichfork == XFS_DATA_FORK) {
error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
@@ -5873,8 +5955,9 @@ xfs_getbmap(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (whichfork == XFS_DATA_FORK &&
- (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
+ if (((iflags & BMV_IF_DELALLOC) == 0) &&
+ (whichfork == XFS_DATA_FORK) &&
+ (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
error = xfs_flush_pages(ip, (xfs_off_t)0,
-1, 0, FI_REMAPF);
@@ -5884,7 +5967,8 @@ xfs_getbmap(
}
}
- ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
+ ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
+ ip->i_delayed_blks == 0);
lock = xfs_ilock_map_shared(ip);
@@ -5896,7 +5980,7 @@ xfs_getbmap(
nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
bmapi_flags = XFS_BMAPI_AFLAG(whichfork) |
- ((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE);
+ ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
/*
* Allocate enough space to handle "subnex" maps at a time.
@@ -5906,9 +5990,12 @@ xfs_getbmap(
bmv->bmv_entries = 0;
- if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) {
- error = 0;
- goto unlock_and_return;
+ if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
+ if (((iflags & BMV_IF_DELALLOC) == 0) ||
+ whichfork == XFS_ATTR_FORK) {
+ error = 0;
+ goto unlock_and_return;
+ }
}
nexleft = nex;
@@ -5924,52 +6011,40 @@ xfs_getbmap(
ASSERT(nmap <= subnex);
for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
- nexleft--;
- oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ?
- BMV_OF_PREALLOC : 0;
+ out.bmv_oflags = 0;
+ if (map[i].br_state == XFS_EXT_UNWRITTEN)
+ out.bmv_oflags |= BMV_OF_PREALLOC;
+ else if (map[i].br_startblock == DELAYSTARTBLOCK)
+ out.bmv_oflags |= BMV_OF_DELALLOC;
out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+ out.bmv_unused1 = out.bmv_unused2 = 0;
+ ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
+ (map[i].br_startblock != DELAYSTARTBLOCK));
if (map[i].br_startblock == HOLESTARTBLOCK &&
whichfork == XFS_ATTR_FORK) {
/* came to the end of attribute fork */
+ out.bmv_oflags |= BMV_OF_LAST;
goto unlock_and_return;
} else {
+ int full = 0; /* user array is full */
+
if (!xfs_getbmapx_fix_eof_hole(ip, &out,
prealloced, bmvend,
map[i].br_startblock)) {
goto unlock_and_return;
}
- /* return either getbmap/getbmapx structure. */
- if (interface & BMV_IF_EXTENDED) {
- struct getbmapx outx;
-
- GETBMAP_CONVERT(out,outx);
- outx.bmv_oflags = oflags;
- outx.bmv_unused1 = outx.bmv_unused2 = 0;
- if (copy_to_user(ap, &outx,
- sizeof(outx))) {
- error = XFS_ERROR(EFAULT);
- goto unlock_and_return;
- }
- } else {
- if (copy_to_user(ap, &out,
- sizeof(out))) {
- error = XFS_ERROR(EFAULT);
- goto unlock_and_return;
- }
- }
+ /* format results & advance arg */
+ error = formatter(&arg, &out, &full);
+ if (error || full)
+ goto unlock_and_return;
+ nexleft--;
bmv->bmv_offset =
out.bmv_offset + out.bmv_length;
bmv->bmv_length = MAX((__int64_t)0,
(__int64_t)(bmvend - bmv->bmv_offset));
bmv->bmv_entries++;
- ap = (interface & BMV_IF_EXTENDED) ?
- (void __user *)
- ((struct getbmapx __user *)ap + 1) :
- (void __user *)
- ((struct getbmap __user *)ap + 1);
}
}
} while (nmap && nexleft && bmv->bmv_length);
@@ -6131,7 +6206,7 @@ xfs_bmap_get_bp(
void
xfs_check_block(
- xfs_bmbt_block_t *block,
+ struct xfs_btree_block *block,
xfs_mount_t *mp,
int root,
short sz)
@@ -6143,36 +6218,29 @@ xfs_check_block(
ASSERT(be16_to_cpu(block->bb_level) > 0);
prevp = NULL;
- for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) {
+ for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
dmxr = mp->m_bmap_dmxr[0];
-
- if (root) {
- keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
- } else {
- keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
- }
+ keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
if (prevp) {
- xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp);
+ ASSERT(be64_to_cpu(prevp->br_startoff) <
+ be64_to_cpu(keyp->br_startoff));
}
prevp = keyp;
/*
* Compare the block numbers to see if there are dups.
*/
+ if (root)
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
+ else
+ pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
- if (root) {
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
- } else {
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
- }
for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
- if (root) {
- thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
- } else {
- thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j,
- dmxr);
- }
+ if (root)
+ thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
+ else
+ thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
if (*thispa == *pp) {
cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
__func__, j, i,
@@ -6195,7 +6263,7 @@ xfs_bmap_check_leaf_extents(
xfs_inode_t *ip, /* incore inode pointer */
int whichfork) /* data or attr fork */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
@@ -6223,7 +6291,7 @@ xfs_bmap_check_leaf_extents(
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
@@ -6245,9 +6313,9 @@ xfs_bmap_check_leaf_extents(
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
goto error_norelse;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
- XFS_BMAP_SANITY_CHECK(mp, block, level),
+ xfs_bmap_sanity_check(mp, bp, level),
error0);
if (level == 0)
break;
@@ -6258,7 +6326,7 @@ xfs_bmap_check_leaf_extents(
*/
xfs_check_block(block, mp, 0, 0);
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
if (bp_release) {
@@ -6280,13 +6348,13 @@ xfs_bmap_check_leaf_extents(
xfs_extnum_t num_recs;
- num_recs = be16_to_cpu(block->bb_numrecs);
+ num_recs = xfs_btree_get_numrecs(block);
/*
* Read-ahead the next leaf block, if any.
*/
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
/*
* Check all the extents to make sure they are OK.
@@ -6294,13 +6362,17 @@ xfs_bmap_check_leaf_extents(
* conform with the first entry in this one.
*/
- ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
+ ep = XFS_BMBT_REC_ADDR(mp, block, 1);
if (i) {
- xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep);
+ ASSERT(xfs_bmbt_disk_get_startoff(&last) +
+ xfs_bmbt_disk_get_blockcount(&last) <=
+ xfs_bmbt_disk_get_startoff(ep));
}
for (j = 1; j < num_recs; j++) {
- nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
- xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp);
+ nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
+ ASSERT(xfs_bmbt_disk_get_startoff(ep) +
+ xfs_bmbt_disk_get_blockcount(ep) <=
+ xfs_bmbt_disk_get_startoff(nextp));
ep = nextp;
}
@@ -6326,7 +6398,7 @@ xfs_bmap_check_leaf_extents(
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
goto error_norelse;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
if (bp_release) {
bp_release = 0;
@@ -6356,7 +6428,7 @@ xfs_bmap_count_blocks(
int whichfork, /* data or attr fork */
int *count) /* out: count of blocks */
{
- xfs_bmbt_block_t *block; /* current btree block */
+ struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
@@ -6379,7 +6451,7 @@ xfs_bmap_count_blocks(
block = ifp->if_broot;
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLDFSBNO);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -6413,29 +6485,29 @@ xfs_bmap_count_tree(
__be64 *pp;
xfs_fsblock_t bno = blockno;
xfs_fsblock_t nextbno;
- xfs_bmbt_block_t *block, *nextblock;
+ struct xfs_btree_block *block, *nextblock;
int numrecs;
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
if (--level) {
/* Not at node above leafs, count this level of nodes */
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
while (nextbno != NULLFSBLOCK) {
if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
0, &nbp, XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp);
- nextbno = be64_to_cpu(nextblock->bb_rightsib);
+ nextblock = XFS_BUF_TO_BLOCK(nbp);
+ nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
xfs_trans_brelse(tp, nbp);
}
/* Dive to the next level */
- pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
if (unlikely((error =
xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6448,9 +6520,9 @@ xfs_bmap_count_tree(
} else {
/* count all level 1 nodes and their leaves */
for (;;) {
- nextbno = be64_to_cpu(block->bb_rightsib);
+ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
- xfs_bmap_disk_count_leaves(0, block, numrecs, count);
+ xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
break;
@@ -6459,7 +6531,7 @@ xfs_bmap_count_tree(
XFS_BMAP_BTREE_REF)))
return error;
*count += 1;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
+ block = XFS_BUF_TO_BLOCK(bp);
}
}
return 0;
@@ -6489,8 +6561,8 @@ xfs_bmap_count_leaves(
*/
STATIC void
xfs_bmap_disk_count_leaves(
- xfs_extnum_t idx,
- xfs_bmbt_block_t *block,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
int numrecs,
int *count)
{
@@ -6498,7 +6570,7 @@ xfs_bmap_disk_count_leaves(
xfs_bmbt_rec_t *frp;
for (b = 1; b <= numrecs; b++) {
- frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
+ frp = XFS_BMBT_REC_ADDR(mp, block, b);
*count += xfs_bmbt_disk_get_blockcount(frp);
}
}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 9f3e3a836d15..284571c05ed0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -137,9 +137,7 @@ typedef struct xfs_bmalloca {
char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
-#ifdef __KERNEL__
-
-#if defined(XFS_BMAP_TRACE)
+#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
/*
* Trace operations for bmap extent tracing
*/
@@ -163,9 +161,12 @@ xfs_bmap_trace_exlist(
int whichfork); /* data or attr fork */
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(__func__,ip,c,w)
-#else
+
+#else /* __KERNEL__ && XFS_BMAP_TRACE */
+
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
-#endif
+
+#endif /* __KERNEL__ && XFS_BMAP_TRACE */
/*
* Convert inode from non-attributed to attributed.
@@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels(
int whichfork); /* data or attr fork */
/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller. Frees all the extents that need freeing, which must be done
- * last due to locking considerations.
- *
- * Return 1 if the given transaction was committed and a new one allocated,
- * and 0 otherwise.
- */
-int /* error */
-xfs_bmap_finish(
- struct xfs_trans **tp, /* transaction pointer addr */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- int *committed); /* xact committed or not */
-
-/*
* Returns the file-relative block number of the first unused block in the file.
* This is the lowest-address hole if the file has holes, else the first block
* past the end of file.
@@ -344,14 +331,43 @@ xfs_bunmapi(
int *done); /* set if not done yet */
/*
- * Fcntl interface to xfs_bmapi.
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field.
+ */
+int
+xfs_check_nostate_extents(
+ struct xfs_ifork *ifp,
+ xfs_extnum_t idx,
+ xfs_extnum_t num);
+
+#ifdef __KERNEL__
+
+/*
+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
+ * caller. Frees all the extents that need freeing, which must be done
+ * last due to locking considerations.
+ *
+ * Return 1 if the given transaction was committed and a new one allocated,
+ * and 0 otherwise.
+ */
+int /* error */
+xfs_bmap_finish(
+ struct xfs_trans **tp, /* transaction pointer addr */
+ xfs_bmap_free_t *flist, /* i/o: list extents to free */
+ int *committed); /* xact committed or not */
+
+/* bmap to userspace formatter - copy to user & advance pointer */
+typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
+
+/*
+ * Get inode's extents as described in bmv, and format for output.
*/
int /* error code */
xfs_getbmap(
xfs_inode_t *ip,
- struct getbmap *bmv, /* user bmap structure */
- void __user *ap, /* pointer to user's array */
- int iflags); /* interface flags */
+ struct getbmapx *bmv, /* user bmap structure */
+ xfs_bmap_format_t formatter, /* format to user */
+ void *arg); /* formatter arg */
/*
* Check if the endoff is outside the last extent. If so the caller will grow
@@ -375,16 +391,6 @@ xfs_bmap_count_blocks(
int *count);
/*
- * Check an extent list, which has just been read, for
- * any bit in the extent flag field.
- */
-int
-xfs_check_nostate_extents(
- struct xfs_ifork *ifp,
- xfs_extnum_t idx,
- xfs_extnum_t num);
-
-/*
* Search the extent records for the entry containing block bno.
* If bno lies in a hole, point to the next entry. If bno lies
* past eof, *eofp will be set, and *prevp will contain the last
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 23efad29a5cd..8f1ec73725d3 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,1406 +37,13 @@
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#if defined(XFS_BMBT_TRACE)
-ktrace_t *xfs_bmbt_trace_buf;
-#endif
-
-/*
- * Prototypes for internal btree functions.
- */
-
-
-STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *);
-STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
-STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
-STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
- __uint64_t *, xfs_btree_cur_t **, int *);
-STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
-
-
-#if defined(XFS_BMBT_TRACE)
-
-static char ARGS[] = "args";
-static char ENTRY[] = "entry";
-static char ERROR[] = "error";
-#undef EXIT
-static char EXIT[] = "exit";
-
-/*
- * Add a trace buffer entry for the arguments given to the routine,
- * generic form.
- */
-STATIC void
-xfs_bmbt_trace_enter(
- const char *func,
- xfs_btree_cur_t *cur,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- xfs_inode_t *ip;
- int whichfork;
-
- ip = cur->bc_private.b.ip;
- whichfork = cur->bc_private.b.whichfork;
- ktrace_enter(xfs_bmbt_trace_buf,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
- ASSERT(ip->i_btrace);
- ktrace_enter(ip->i_btrace,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-/*
- * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argbi(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_buf_t *b,
- int i,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line,
- (__psunsigned_t)b, i, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
- */
-STATIC void
-xfs_bmbt_trace_argbii(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_buf_t *b,
- int i0,
- int i1,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line,
- (__psunsigned_t)b, i0, i1, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for 3 block-length args
- * and an integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argfffi(
- const char *func,
- xfs_btree_cur_t *cur,
- xfs_dfiloff_t o,
- xfs_dfsbno_t b,
- xfs_dfilblks_t i,
- int j,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line,
- o >> 32, (int)o, b >> 32, (int)b,
- i >> 32, (int)i, (int)j, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for one integer arg.
- */
-STATIC void
-xfs_bmbt_trace_argi(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line,
- i, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, key.
- */
-STATIC void
-xfs_bmbt_trace_argifk(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_fsblock_t f,
- xfs_dfiloff_t o,
- int line)
-{
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
- i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
- (int)o, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, rec.
- */
-STATIC void
-xfs_bmbt_trace_argifr(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_fsblock_t f,
- xfs_bmbt_rec_t *r,
- int line)
-{
- xfs_dfsbno_t b;
- xfs_dfilblks_t c;
- xfs_dfsbno_t d;
- xfs_dfiloff_t o;
- xfs_bmbt_irec_t s;
-
- d = (xfs_dfsbno_t)f;
- xfs_bmbt_disk_get_all(r, &s);
- o = (xfs_dfiloff_t)s.br_startoff;
- b = (xfs_dfsbno_t)s.br_startblock;
- c = s.br_blockcount;
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line,
- i, d >> 32, (int)d, o >> 32,
- (int)o, b >> 32, (int)b, c >> 32,
- (int)c, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, key.
- */
-STATIC void
-xfs_bmbt_trace_argik(
- const char *func,
- xfs_btree_cur_t *cur,
- int i,
- xfs_bmbt_key_t *k,
- int line)
-{
- xfs_dfiloff_t o;
-
- o = be64_to_cpu(k->br_startoff);
- xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
- i, o >> 32, (int)o, 0,
- 0, 0, 0, 0,
- 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for the cursor/operation.
- */
-STATIC void
-xfs_bmbt_trace_cursor(
- const char *func,
- xfs_btree_cur_t *cur,
- char *s,
- int line)
-{
- xfs_bmbt_rec_host_t r;
-
- xfs_bmbt_set_all(&r, &cur->bc_rec.b);
- xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line,
- (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) |
- cur->bc_private.b.allocated,
- r.l0 >> 32, (int)r.l0,
- r.l1 >> 32, (int)r.l1,
- (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1],
- (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3],
- (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
- (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
-}
-
-#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
- xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
-#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
- xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
-#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
- xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
-#define XFS_BMBT_TRACE_ARGI(c,i) \
- xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
- xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
-#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
- xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
-#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
- xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
-#define XFS_BMBT_TRACE_CURSOR(c,s) \
- xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
-#else
-#define XFS_BMBT_TRACE_ARGBI(c,b,i)
-#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
-#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
-#define XFS_BMBT_TRACE_ARGI(c,i)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
-#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
-#define XFS_BMBT_TRACE_ARGIK(c,i,k)
-#define XFS_BMBT_TRACE_CURSOR(c,s)
-#endif /* XFS_BMBT_TRACE */
-
-
-/*
- * Internal functions.
- */
-
-/*
- * Delete record pointed to by cur/level.
- */
-STATIC int /* error */
-xfs_bmbt_delrec(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_fsblock_t bno; /* fs-relative block number */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop counter */
- int j; /* temp state */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
- xfs_fsblock_t lbno; /* left sibling block number */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- int lrecs=0; /* left record count */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
- int ptr; /* key/record index */
- xfs_fsblock_t rbno; /* right sibling block number */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_block_t *rrblock; /* right-right btree block */
- xfs_buf_t *rrbp; /* right-right buffer pointer */
- int rrecs=0; /* right record count */
- xfs_bmbt_rec_t *rrp; /* right record pointer */
- xfs_btree_cur_t *tcur; /* temporary btree cursor */
- int numrecs; /* temporary numrec count */
- int numlrecs, numrrecs;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ptr = cur->bc_ptrs[level];
- tcur = NULL;
- if (ptr == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- if (ptr > numrecs) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_bmbt_delrec);
- if (level > 0) {
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- }
-#endif
- if (ptr < numrecs) {
- memmove(&kp[ptr - 1], &kp[ptr],
- (numrecs - ptr) * sizeof(*kp));
- memmove(&pp[ptr - 1], &pp[ptr],
- (numrecs - ptr) * sizeof(*pp));
- xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
- xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
- }
- } else {
- rp = XFS_BMAP_REC_IADDR(block, 1, cur);
- if (ptr < numrecs) {
- memmove(&rp[ptr - 1], &rp[ptr],
- (numrecs - ptr) * sizeof(*rp));
- xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
- }
- if (ptr == 1) {
- key.br_startoff =
- cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
- kp = &key;
- }
- }
- numrecs--;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
- /*
- * We're at the root level.
- * First, shrink the root block in-memory.
- * Try to get rid of the next level down.
- * If we can't then there's nothing left to do.
- */
- if (level == cur->bc_nlevels - 1) {
- xfs_iroot_realloc(cur->bc_private.b.ip, -1,
- cur->bc_private.b.whichfork);
- if ((error = xfs_bmbt_killroot(cur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- rbno = be64_to_cpu(block->bb_rightsib);
- lbno = be64_to_cpu(block->bb_leftsib);
- /*
- * One child of root, need to get a chance to copy its contents
- * into the root and delete it. Can't go up to next level,
- * there's nothing to delete there.
- */
- if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
- level == cur->bc_nlevels - 2) {
- if ((error = xfs_bmbt_killroot(cur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
- if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- bno = NULLFSBLOCK;
- if (rbno != NULLFSBLOCK) {
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_increment(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- rbp = tcur->bc_bufs[level];
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- bno = be64_to_cpu(right->bb_leftsib);
- if (be16_to_cpu(right->bb_numrecs) - 1 >=
- XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_BMAP_BLOCK_IMINRECS(level, tcur));
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- if (level > 0) {
- if ((error = xfs_bmbt_decrement(cur,
- level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur,
- ERROR);
- goto error0;
- }
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- rrecs = be16_to_cpu(right->bb_numrecs);
- if (lbno != NULLFSBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- }
- }
- if (lbno != NULLFSBLOCK) {
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- /*
- * decrement to last in block
- */
- if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- lbp = tcur->bc_bufs[level];
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
-#endif
- bno = be64_to_cpu(left->bb_rightsib);
- if (be16_to_cpu(left->bb_numrecs) - 1 >=
- XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
- if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (i) {
- ASSERT(be16_to_cpu(block->bb_numrecs) >=
- XFS_BMAP_BLOCK_IMINRECS(level, tcur));
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- if (level == 0)
- cur->bc_ptrs[0]++;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- lrecs = be16_to_cpu(left->bb_numrecs);
- }
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- tcur = NULL;
- mp = cur->bc_mp;
- ASSERT(bno != NULLFSBLOCK);
- if (lbno != NULLFSBLOCK &&
- lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- rbno = bno;
- right = block;
- rbp = bp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- } else if (rbno != NULLFSBLOCK &&
- rrecs + be16_to_cpu(block->bb_numrecs) <=
- XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- lbno = bno;
- left = block;
- lbp = bp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- lrecs = be16_to_cpu(left->bb_numrecs);
- } else {
- if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- numlrecs = be16_to_cpu(left->bb_numrecs);
- numrrecs = be16_to_cpu(right->bb_numrecs);
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
- lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < numrrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- }
-#endif
- memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
- memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
- xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
- xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
- }
- be16_add_cpu(&left->bb_numrecs, numrrecs);
- left->bb_rightsib = right->bb_rightsib;
- xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
- if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
- be64_to_cpu(left->bb_rightsib),
- 0, &rrbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- rrblock->bb_leftsib = cpu_to_be64(lbno);
- xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
- }
- xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
- cur->bc_private.b.flist, mp);
- cur->bc_private.b.ip->i_d.di_nblocks--;
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, -1L);
- xfs_trans_binval(cur->bc_tp, rbp);
- if (bp != lbp) {
- cur->bc_bufs[level] = lbp;
- cur->bc_ptrs[level] += lrecs;
- cur->bc_ra[level] = 0;
- } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- goto error0;
- }
- if (level > 0)
- cur->bc_ptrs[level]--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 2;
- return 0;
-
-error0:
- if (tcur)
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
-}
-
-/*
- * Insert one record/level. Return information to the caller
- * allowing the next level up to proceed if necessary.
- */
-STATIC int /* error */
-xfs_bmbt_insrec(
- xfs_btree_cur_t *cur,
- int level,
- xfs_fsblock_t *bnop,
- xfs_bmbt_rec_t *recp,
- xfs_btree_cur_t **curp,
- int *stat) /* no-go/done/continue */
-{
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_buf_t *bp; /* buffer for block */
- int error; /* error return value */
- int i; /* loop index */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
- int logflags; /* inode logging flags */
- xfs_fsblock_t nbno; /* new block number */
- struct xfs_btree_cur *ncur; /* new btree cursor */
- __uint64_t startoff; /* new btree key value */
- xfs_bmbt_rec_t nrec; /* new record count */
- int optr; /* old key/record index */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
- int ptr; /* key/record index */
- xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */
- int numrecs;
-
- ASSERT(level < cur->bc_nlevels);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
- ncur = NULL;
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
- optr = ptr = cur->bc_ptrs[level];
- if (ptr == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- XFS_STATS_INC(xs_bmbt_insrec);
- block = xfs_bmbt_get_block(cur, level, &bp);
- numrecs = be16_to_cpu(block->bb_numrecs);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (ptr <= numrecs) {
- if (level == 0) {
- rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
- xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
- } else {
- kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
- xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
- }
- }
-#endif
- nbno = NULLFSBLOCK;
- if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
- /*
- * A root block, that can be made bigger.
- */
- xfs_iroot_realloc(cur->bc_private.b.ip, 1,
- cur->bc_private.b.whichfork);
- block = xfs_bmbt_get_block(cur, level, &bp);
- } else if (level == cur->bc_nlevels - 1) {
- if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
- *stat == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
- logflags);
- block = xfs_bmbt_get_block(cur, level, &bp);
- } else {
- if ((error = xfs_bmbt_rshift(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (i) {
- /* nothing */
- } else {
- if ((error = xfs_bmbt_lshift(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (i) {
- optr = ptr = cur->bc_ptrs[level];
- } else {
- if ((error = xfs_bmbt_split(cur, level,
- &nbno, &startoff, &ncur,
- &i))) {
- XFS_BMBT_TRACE_CURSOR(cur,
- ERROR);
- return error;
- }
- if (i) {
- block = xfs_bmbt_get_block(
- cur, level, &bp);
-#ifdef DEBUG
- if ((error =
- xfs_btree_check_lblock(cur,
- block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(
- cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[level];
- xfs_bmbt_disk_set_allf(&nrec,
- startoff, 0, 0,
- XFS_EXT_NORM);
- } else {
- XFS_BMBT_TRACE_CURSOR(cur,
- EXIT);
- *stat = 0;
- return 0;
- }
- }
- }
- }
- }
- numrecs = be16_to_cpu(block->bb_numrecs);
- if (level > 0) {
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
-#ifdef DEBUG
- for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
- level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(&kp[ptr], &kp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*kp));
- memmove(&pp[ptr], &pp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- kp[ptr - 1] = key;
- pp[ptr - 1] = cpu_to_be64(*bnop);
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
- xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
- } else {
- rp = XFS_BMAP_REC_IADDR(block, 1, cur);
- memmove(&rp[ptr], &rp[ptr - 1],
- (numrecs - ptr + 1) * sizeof(*rp));
- rp[ptr - 1] = *recp;
- numrecs++;
- block->bb_numrecs = cpu_to_be16(numrecs);
- xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
- }
- xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- if (ptr < numrecs) {
- if (level == 0)
- xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
- rp + ptr);
- else
- xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
- kp + ptr);
- }
-#endif
- if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- *bnop = nbno;
- if (nbno != NULLFSBLOCK) {
- *recp = nrec;
- *curp = ncur;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-STATIC int
-xfs_bmbt_killroot(
- xfs_btree_cur_t *cur)
-{
- xfs_bmbt_block_t *block;
- xfs_bmbt_block_t *cblock;
- xfs_buf_t *cbp;
- xfs_bmbt_key_t *ckp;
- xfs_bmbt_ptr_t *cpp;
-#ifdef DEBUG
- int error;
-#endif
- int i;
- xfs_bmbt_key_t *kp;
- xfs_inode_t *ip;
- xfs_ifork_t *ifp;
- int level;
- xfs_bmbt_ptr_t *pp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = cur->bc_nlevels - 1;
- ASSERT(level >= 1);
- /*
- * Don't deal with the root block needs to be a leaf case.
- * We're just going to turn the thing back into extents anyway.
- */
- if (level == 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &cbp);
- /*
- * Give up if the root has multiple children.
- */
- if (be16_to_cpu(block->bb_numrecs) != 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- /*
- * Only do this if the next level will fit.
- * Then the data must be copied up to the inode,
- * instead of freeing the root you free the next level.
- */
- cbp = cur->bc_bufs[level - 1];
- cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
- if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
- ip = cur->bc_private.b.ip;
- ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
- ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
- XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
- i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
- if (i) {
- xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
- block = ifp->if_broot;
- }
- be16_add_cpu(&block->bb_numrecs, i);
- ASSERT(block->bb_numrecs == cblock->bb_numrecs);
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
- memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
- cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
- xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
- cur->bc_private.b.flist, cur->bc_mp);
- ip->i_d.di_nblocks--;
- XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
- XFS_TRANS_DQ_BCOUNT, -1L);
- xfs_trans_binval(cur->bc_tp, cbp);
- cur->bc_bufs[level - 1] = NULL;
- be16_add_cpu(&block->bb_level, -1);
- xfs_trans_log_inode(cur->bc_tp, ip,
- XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- cur->bc_nlevels--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
-/*
- * Log key values from the btree block.
- */
-STATIC void
-xfs_bmbt_log_keys(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int kfirst,
- int klast)
-{
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
- tp = cur->bc_tp;
- if (bp) {
- xfs_bmbt_block_t *block;
- int first;
- xfs_bmbt_key_t *kp;
- int last;
-
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- } else {
- xfs_inode_t *ip;
-
- ip = cur->bc_private.b.ip;
- xfs_trans_log_inode(tp, ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Log pointer values from the btree block.
- */
-STATIC void
-xfs_bmbt_log_ptrs(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int pfirst,
- int plast)
-{
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
- tp = cur->bc_tp;
- if (bp) {
- xfs_bmbt_block_t *block;
- int first;
- int last;
- xfs_bmbt_ptr_t *pp;
-
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- } else {
- xfs_inode_t *ip;
-
- ip = cur->bc_private.b.ip;
- xfs_trans_log_inode(tp, ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Lookup the record. The cursor is made to point to it, based on dir.
- */
-STATIC int /* error */
-xfs_bmbt_lookup(
- xfs_btree_cur_t *cur,
- xfs_lookup_t dir,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block=NULL;
- xfs_buf_t *bp;
- xfs_daddr_t d;
- xfs_sfiloff_t diff;
- int error; /* error return value */
- xfs_fsblock_t fsbno=0;
- int high;
- int i;
- int keyno=0;
- xfs_bmbt_key_t *kkbase=NULL;
- xfs_bmbt_key_t *kkp;
- xfs_bmbt_rec_t *krbase=NULL;
- xfs_bmbt_rec_t *krp;
- int level;
- int low;
- xfs_mount_t *mp;
- xfs_bmbt_ptr_t *pp;
- xfs_bmbt_irec_t *rp;
- xfs_fileoff_t startoff;
- xfs_trans_t *tp;
-
- XFS_STATS_INC(xs_bmbt_lookup);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, (int)dir);
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- rp = &cur->bc_rec.b;
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
- if (level < cur->bc_nlevels - 1) {
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- bp = cur->bc_bufs[level];
- if (bp && XFS_BUF_ADDR(bp) != d)
- bp = NULL;
- if (!bp) {
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
- 0, &bp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- xfs_btree_setbuf(cur, level, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block,
- level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- } else
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- } else
- block = xfs_bmbt_get_block(cur, level, &bp);
- if (diff == 0)
- keyno = 1;
- else {
- if (level > 0)
- kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
- else
- krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
- low = 1;
- if (!(high = be16_to_cpu(block->bb_numrecs))) {
- ASSERT(level == 0);
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- while (low <= high) {
- XFS_STATS_INC(xs_bmbt_compare);
- keyno = (low + high) >> 1;
- if (level > 0) {
- kkp = kkbase + keyno - 1;
- startoff = be64_to_cpu(kkp->br_startoff);
- } else {
- krp = krbase + keyno - 1;
- startoff = xfs_bmbt_disk_get_startoff(krp);
- }
- diff = (xfs_sfiloff_t)
- (startoff - rp->br_startoff);
- if (diff < 0)
- low = keyno + 1;
- else if (diff > 0)
- high = keyno - 1;
- else
- break;
- }
- }
- if (level > 0) {
- if (diff > 0 && --keyno < 1)
- keyno = 1;
- pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
- fsbno = be64_to_cpu(*pp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- cur->bc_ptrs[level] = keyno;
- }
- }
- if (dir != XFS_LOOKUP_LE && diff < 0) {
- keyno++;
- /*
- * If ge search and we went off the end of the block, but it's
- * not the last block, we're in the wrong block.
- */
- if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
- be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
- cur->bc_ptrs[0] = keyno;
- if ((error = xfs_bmbt_increment(cur, 0, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_WANT_CORRUPTED_RETURN(i == 1);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- }
- else if (dir == XFS_LOOKUP_LE && diff > 0)
- keyno--;
- cur->bc_ptrs[0] = keyno;
- if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- } else {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
- }
- return 0;
-}
-
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_bmbt_lshift(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
-#ifdef DEBUG
- int i; /* loop counter */
-#endif
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp=NULL; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- int lrecs; /* left record count */
- xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp=NULL; /* right btree key */
- xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */
- xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
- int rrecs; /* right record count */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- if (level == cur->bc_nlevels - 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- rbp = cur->bc_bufs[level];
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (cur->bc_ptrs[level] <= 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- mp = cur->bc_mp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
- &lbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- lrecs = be16_to_cpu(left->bb_numrecs) + 1;
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- *lkp = *rkp;
- xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
- lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *lpp = *rpp;
- xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- *lrp = *rrp;
- xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
- }
- left->bb_numrecs = cpu_to_be16(lrecs);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
- else
- xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
-#endif
- rrecs = be16_to_cpu(right->bb_numrecs) - 1;
- right->bb_numrecs = cpu_to_be16(rrecs);
- xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
- if (level > 0) {
-#ifdef DEBUG
- for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
- level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
- memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
- xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
- xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
- } else {
- memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
- xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
- rkp = &key;
- }
- if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[level]--;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int /* error */
-xfs_bmbt_rshift(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i; /* loop counter */
- xfs_bmbt_key_t key; /* bmap btree key */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_mount_t *mp; /* file system mount point */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
- struct xfs_btree_cur *tcur; /* temporary btree cursor */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- if (level == cur->bc_nlevels - 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- lbp = cur->bc_bufs[level];
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- mp = cur->bc_mp;
- if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
- &rbp, XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
- if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *rkp = *lkp;
- *rpp = *lpp;
- xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- *rrp = *lrp;
- xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
- rkp = &key;
- }
- be16_add_cpu(&left->bb_numrecs, -1);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
- be16_add_cpu(&right->bb_numrecs, 1);
-#ifdef DEBUG
- if (level > 0)
- xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
- else
- xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
-#endif
- xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
- if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_increment(tcur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
- goto error1;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) {
- XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
- goto error1;
- }
- xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-error0:
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
-error1:
- xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
- return error;
-}
-
/*
* Determine the extent state.
*/
@@ -1453,229 +60,15 @@ xfs_extent_state(
return XFS_EXT_NORM;
}
-
-/*
- * Split cur/level block in half.
- * Return new block number and its first record (to be inserted into parent).
- */
-STATIC int /* error */
-xfs_bmbt_split(
- xfs_btree_cur_t *cur,
- int level,
- xfs_fsblock_t *bnop,
- __uint64_t *startoff,
- xfs_btree_cur_t **curp,
- int *stat) /* success/failure */
-{
- xfs_alloc_arg_t args; /* block allocation args */
- int error; /* error return value */
- int i; /* loop counter */
- xfs_fsblock_t lbno; /* left sibling block number */
- xfs_buf_t *lbp; /* left buffer pointer */
- xfs_bmbt_block_t *left; /* left btree block */
- xfs_bmbt_key_t *lkp; /* left btree key */
- xfs_bmbt_ptr_t *lpp; /* left address pointer */
- xfs_bmbt_rec_t *lrp; /* left record pointer */
- xfs_buf_t *rbp; /* right buffer pointer */
- xfs_bmbt_block_t *right; /* right btree block */
- xfs_bmbt_key_t *rkp; /* right btree key */
- xfs_bmbt_ptr_t *rpp; /* right address pointer */
- xfs_bmbt_block_t *rrblock; /* right-right btree block */
- xfs_buf_t *rrbp; /* right-right buffer pointer */
- xfs_bmbt_rec_t *rrp; /* right record pointer */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
- args.tp = cur->bc_tp;
- args.mp = cur->bc_mp;
- lbp = cur->bc_bufs[level];
- lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
- left = XFS_BUF_TO_BMBT_BLOCK(lbp);
- args.fsbno = cur->bc_private.b.firstblock;
- args.firstblock = args.fsbno;
- args.minleft = 0;
- if (args.fsbno == NULLFSBLOCK) {
- args.fsbno = lbno;
- args.type = XFS_ALLOCTYPE_START_BNO;
- /*
- * Make sure there is sufficient room left in the AG to
- * complete a full tree split for an extent insert. If
- * we are converting the middle part of an extent then
- * we may need space for two tree splits.
- *
- * We are relying on the caller to make the correct block
- * reservation for this operation to succeed. If the
- * reservation amount is insufficient then we may fail a
- * block allocation here and corrupt the filesystem.
- */
- args.minleft = xfs_trans_get_block_res(args.tp);
- } else if (cur->bc_private.b.flist->xbf_low)
- args.type = XFS_ALLOCTYPE_START_BNO;
- else
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.alignment = args.total = args.isfl =
- args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
- if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return XFS_ERROR(ENOSPC);
- }
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (args.fsbno == NULLFSBLOCK && args.minleft) {
- /*
- * Could not find an AG with enough free space to satisfy
- * a full btree split. Try again without minleft and if
- * successful activate the lowspace algorithm.
- */
- args.fsbno = 0;
- args.type = XFS_ALLOCTYPE_FIRST_AG;
- args.minleft = 0;
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_private.b.flist->xbf_low = 1;
- }
- if (args.fsbno == NULLFSBLOCK) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- cur->bc_private.b.firstblock = args.fsbno;
- cur->bc_private.b.allocated++;
- cur->bc_private.b.ip->i_d.di_nblocks++;
- xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, 1L);
- rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
- right = XFS_BUF_TO_BMBT_BLOCK(rbp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
- right->bb_level = left->bb_level;
- right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
- if ((be16_to_cpu(left->bb_numrecs) & 1) &&
- cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
- be16_add_cpu(&right->bb_numrecs, 1);
- i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
- if (level > 0) {
- lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
- lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
- rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
- rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
- memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
- xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *startoff = be64_to_cpu(rkp->br_startoff);
- } else {
- lrp = XFS_BMAP_REC_IADDR(left, i, cur);
- rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
- memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
- xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- *startoff = xfs_bmbt_disk_get_startoff(rrp);
- }
- be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
- right->bb_rightsib = left->bb_rightsib;
- left->bb_rightsib = cpu_to_be64(args.fsbno);
- right->bb_leftsib = cpu_to_be64(lbno);
- xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
- if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
- if ((error = xfs_btree_read_bufl(args.mp, args.tp,
- be64_to_cpu(right->bb_rightsib), 0, &rrbp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
- if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
- xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
- }
- if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
- xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
- }
- if (level + 1 < cur->bc_nlevels) {
- if ((error = xfs_btree_dup_cursor(cur, curp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- (*curp)->bc_ptrs[level + 1]++;
- }
- *bnop = args.fsbno;
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-
-/*
- * Update keys for the record.
- */
-STATIC int
-xfs_bmbt_updkey(
- xfs_btree_cur_t *cur,
- xfs_bmbt_key_t *keyp, /* on-disk format */
- int level)
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
-#ifdef DEBUG
- int error;
-#endif
- xfs_bmbt_key_t *kp;
- int ptr;
-
- ASSERT(level >= 1);
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
- for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[level];
- kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
- *kp = *keyp;
- xfs_bmbt_log_keys(cur, bp, ptr, ptr);
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
/*
* Convert on-disk form of btree root to in-memory form.
*/
void
xfs_bmdr_to_bmbt(
+ struct xfs_mount *mp,
xfs_bmdr_block_t *dblock,
int dblocklen,
- xfs_bmbt_block_t *rblock,
+ struct xfs_btree_block *rblock,
int rblocklen)
{
int dmxr;
@@ -1688,129 +81,19 @@ xfs_bmdr_to_bmbt(
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
- rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
- rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
- dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
- fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
- tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
- tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+ rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+ tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+ fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
+ tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_bmbt_decrement(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error; /* error return value */
- xfs_fsblock_t fsbno;
- int lev;
- xfs_mount_t *mp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ASSERT(level < cur->bc_nlevels);
- if (level < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
- if (--cur->bc_ptrs[level] > 0) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
- break;
- if (lev < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
- }
- if (lev == cur->bc_nlevels) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Delete the record pointed to by cur.
- */
-int /* error */
-xfs_bmbt_delete(
- xfs_btree_cur_t *cur,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i;
- int level;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- for (level = 0, i = 2; i == 2; level++) {
- if ((error = xfs_bmbt_delrec(cur, level, &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
- if (i == 0) {
- for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
- if ((error = xfs_bmbt_decrement(cur, level,
- &i))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- break;
- }
- }
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = i;
- return 0;
-}
-
-/*
* Convert a compressed bmap extent record to an uncompressed form.
* This code must be in sync with the routines xfs_bmbt_get_startoff,
* xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
@@ -1864,31 +147,6 @@ xfs_bmbt_get_all(
}
/*
- * Get the block pointer for the given level of the cursor.
- * Fill in the buffer pointer, if applicable.
- */
-xfs_bmbt_block_t *
-xfs_bmbt_get_block(
- xfs_btree_cur_t *cur,
- int level,
- xfs_buf_t **bpp)
-{
- xfs_ifork_t *ifp;
- xfs_bmbt_block_t *rval;
-
- if (level < cur->bc_nlevels - 1) {
- *bpp = cur->bc_bufs[level];
- rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
- } else {
- *bpp = NULL;
- ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
- cur->bc_private.b.whichfork);
- rval = ifp->if_broot;
- }
- return rval;
-}
-
-/*
* Extract the blockcount field from an in memory bmap extent record.
*/
xfs_filblks_t
@@ -1950,7 +208,8 @@ xfs_bmbt_disk_get_all(
xfs_bmbt_rec_t *r,
xfs_bmbt_irec_t *s)
{
- __xfs_bmbt_get_all(be64_to_cpu(r->l0), be64_to_cpu(r->l1), s);
+ __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
+ get_unaligned_be64(&r->l1), s);
}
/*
@@ -1974,348 +233,6 @@ xfs_bmbt_disk_get_startoff(
XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
}
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int /* error */
-xfs_bmbt_increment(
- xfs_btree_cur_t *cur,
- int level,
- int *stat) /* success/failure */
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error; /* error return value */
- xfs_fsblock_t fsbno;
- int lev;
- xfs_mount_t *mp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGI(cur, level);
- ASSERT(level < cur->bc_nlevels);
- if (level < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
- block = xfs_bmbt_get_block(cur, level, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
- }
- if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- block = xfs_bmbt_get_block(cur, lev, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
- break;
- if (lev < cur->bc_nlevels - 1)
- xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
- }
- if (lev == cur->bc_nlevels) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- tp = cur->bc_tp;
- mp = cur->bc_mp;
- for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
- if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
- XFS_BMAP_BTREE_REF))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- lev--;
- xfs_btree_setbuf(cur, lev, bp);
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- cur->bc_ptrs[lev] = 1;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 1;
- return 0;
-}
-
-/*
- * Insert the current record at the point referenced by cur.
- *
- * A multi-level split of the tree on insert will invalidate the original
- * cursor. All callers of this function should assume that the cursor is
- * no longer valid and revalidate it.
- */
-int /* error */
-xfs_bmbt_insert(
- xfs_btree_cur_t *cur,
- int *stat) /* success/failure */
-{
- int error; /* error return value */
- int i;
- int level;
- xfs_fsblock_t nbno;
- xfs_btree_cur_t *ncur;
- xfs_bmbt_rec_t nrec;
- xfs_btree_cur_t *pcur;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = 0;
- nbno = NULLFSBLOCK;
- xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
- ncur = NULL;
- pcur = cur;
- do {
- if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
- &i))) {
- if (pcur != cur)
- xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
- cur->bc_nlevels = pcur->bc_nlevels;
- cur->bc_private.b.allocated +=
- pcur->bc_private.b.allocated;
- pcur->bc_private.b.allocated = 0;
- ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
- XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
- cur->bc_private.b.firstblock =
- pcur->bc_private.b.firstblock;
- ASSERT(cur->bc_private.b.flist ==
- pcur->bc_private.b.flist);
- xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
- }
- if (ncur) {
- pcur = ncur;
- ncur = NULL;
- }
- } while (nbno != NULLFSBLOCK);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = i;
- return 0;
-error0:
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
-}
-
-/*
- * Log fields from the btree block header.
- */
-void
-xfs_bmbt_log_block(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int fields)
-{
- int first;
- int last;
- xfs_trans_t *tp;
- static const short offsets[] = {
- offsetof(xfs_bmbt_block_t, bb_magic),
- offsetof(xfs_bmbt_block_t, bb_level),
- offsetof(xfs_bmbt_block_t, bb_numrecs),
- offsetof(xfs_bmbt_block_t, bb_leftsib),
- offsetof(xfs_bmbt_block_t, bb_rightsib),
- sizeof(xfs_bmbt_block_t)
- };
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
- tp = cur->bc_tp;
- if (bp) {
- xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
- &last);
- xfs_trans_log_buf(tp, bp, first, last);
- } else
- xfs_trans_log_inode(tp, cur->bc_private.b.ip,
- XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-/*
- * Log record values from the btree block.
- */
-void
-xfs_bmbt_log_recs(
- xfs_btree_cur_t *cur,
- xfs_buf_t *bp,
- int rfirst,
- int rlast)
-{
- xfs_bmbt_block_t *block;
- int first;
- int last;
- xfs_bmbt_rec_t *rp;
- xfs_trans_t *tp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
- ASSERT(bp);
- tp = cur->bc_tp;
- block = XFS_BUF_TO_BMBT_BLOCK(bp);
- rp = XFS_BMAP_REC_DADDR(block, 1, cur);
- first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
- last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
- xfs_trans_log_buf(tp, bp, first, last);
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
-}
-
-int /* error */
-xfs_bmbt_lookup_eq(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- int *stat) /* success/failure */
-{
- cur->bc_rec.b.br_startoff = off;
- cur->bc_rec.b.br_startblock = bno;
- cur->bc_rec.b.br_blockcount = len;
- return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
-
-int /* error */
-xfs_bmbt_lookup_ge(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- int *stat) /* success/failure */
-{
- cur->bc_rec.b.br_startoff = off;
- cur->bc_rec.b.br_startblock = bno;
- cur->bc_rec.b.br_blockcount = len;
- return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
-}
-
-/*
- * Give the bmap btree a new root block. Copy the old broot contents
- * down into a real block and make the broot point to it.
- */
-int /* error */
-xfs_bmbt_newroot(
- xfs_btree_cur_t *cur, /* btree cursor */
- int *logflags, /* logging flags for inode */
- int *stat) /* return status - 0 fail */
-{
- xfs_alloc_arg_t args; /* allocation arguments */
- xfs_bmbt_block_t *block; /* bmap btree block */
- xfs_buf_t *bp; /* buffer for block */
- xfs_bmbt_block_t *cblock; /* child btree block */
- xfs_bmbt_key_t *ckp; /* child key pointer */
- xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
- int error; /* error return code */
-#ifdef DEBUG
- int i; /* loop counter */
-#endif
- xfs_bmbt_key_t *kp; /* pointer to bmap btree key */
- int level; /* btree level */
- xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- level = cur->bc_nlevels - 1;
- block = xfs_bmbt_get_block(cur, level, &bp);
- /*
- * Copy the root into a real block.
- */
- args.mp = cur->bc_mp;
- pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
- args.tp = cur->bc_tp;
- args.fsbno = cur->bc_private.b.firstblock;
- args.mod = args.minleft = args.alignment = args.total = args.isfl =
- args.userdata = args.minalignslop = 0;
- args.minlen = args.maxlen = args.prod = 1;
- args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
- args.firstblock = args.fsbno;
- if (args.fsbno == NULLFSBLOCK) {
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- args.fsbno = be64_to_cpu(*pp);
- args.type = XFS_ALLOCTYPE_START_BNO;
- } else if (cur->bc_private.b.flist->xbf_low)
- args.type = XFS_ALLOCTYPE_START_BNO;
- else
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- if ((error = xfs_alloc_vextent(&args))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- if (args.fsbno == NULLFSBLOCK) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *stat = 0;
- return 0;
- }
- ASSERT(args.len == 1);
- cur->bc_private.b.firstblock = args.fsbno;
- cur->bc_private.b.allocated++;
- cur->bc_private.b.ip->i_d.di_nblocks++;
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
- XFS_TRANS_DQ_BCOUNT, 1L);
- bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
- cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
- *cblock = *block;
- be16_add_cpu(&block->bb_level, 1);
- block->bb_numrecs = cpu_to_be16(1);
- cur->bc_nlevels++;
- cur->bc_ptrs[level + 1] = 1;
- kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
- ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
- memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
- cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
-#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- }
-#endif
- memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
-#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- *pp = cpu_to_be64(args.fsbno);
- xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
- cur->bc_private.b.whichfork);
- xfs_btree_setbuf(cur, level, bp);
- /*
- * Do all this logging at the end so that
- * the root is at the right level.
- */
- xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
- xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
- xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- *logflags |=
- XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
- *stat = 1;
- return 0;
-}
/*
* Set all the fields in a bmap extent record from the arguments.
@@ -2512,7 +429,8 @@ xfs_bmbt_set_state(
*/
void
xfs_bmbt_to_bmdr(
- xfs_bmbt_block_t *rblock,
+ struct xfs_mount *mp,
+ struct xfs_btree_block *rblock,
int rblocklen,
xfs_bmdr_block_t *dblock,
int dblocklen)
@@ -2524,67 +442,22 @@ xfs_bmbt_to_bmdr(
__be64 *tpp;
ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
- ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO);
+ ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
+ ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
- dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
- fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
- fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
- tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
+ dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
+ fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+ tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+ fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
+ tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
- * Update the record to the passed values.
- */
-int
-xfs_bmbt_update(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t off,
- xfs_fsblock_t bno,
- xfs_filblks_t len,
- xfs_exntst_t state)
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
- int error;
- xfs_bmbt_key_t key;
- int ptr;
- xfs_bmbt_rec_t *rp;
-
- XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
- (xfs_dfilblks_t)len, (int)state);
- block = xfs_bmbt_get_block(cur, 0, &bp);
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
-#endif
- ptr = cur->bc_ptrs[0];
- rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
- xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
- xfs_bmbt_log_recs(cur, bp, ptr, ptr);
- if (ptr > 1) {
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
- }
- key.br_startoff = cpu_to_be64(off);
- if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
- XFS_BMBT_TRACE_CURSOR(cur, ERROR);
- return error;
- }
- XFS_BMBT_TRACE_CURSOR(cur, EXIT);
- return 0;
-}
-
-/*
* Check extent records, which have just been read, for
* any bit in the extent flag field. ASSERT on debug
* kernels, as this condition should not occur.
@@ -2608,3 +481,451 @@ xfs_check_nostate_extents(
}
return 0;
}
+
+
+STATIC struct xfs_btree_cur *
+xfs_bmbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_btree_cur *new;
+
+ new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+
+ /*
+ * Copy the firstblock, flist, and flags values,
+ * since init cursor doesn't get them.
+ */
+ new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
+ new->bc_private.b.flist = cur->bc_private.b.flist;
+ new->bc_private.b.flags = cur->bc_private.b.flags;
+
+ return new;
+}
+
+STATIC void
+xfs_bmbt_update_cursor(
+ struct xfs_btree_cur *src,
+ struct xfs_btree_cur *dst)
+{
+ ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
+ (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
+ ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
+
+ dst->bc_private.b.allocated += src->bc_private.b.allocated;
+ dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
+
+ src->bc_private.b.allocated = 0;
+}
+
+STATIC int
+xfs_bmbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int length,
+ int *stat)
+{
+ xfs_alloc_arg_t args; /* block allocation args */
+ int error; /* error return value */
+
+ memset(&args, 0, sizeof(args));
+ args.tp = cur->bc_tp;
+ args.mp = cur->bc_mp;
+ args.fsbno = cur->bc_private.b.firstblock;
+ args.firstblock = args.fsbno;
+
+ if (args.fsbno == NULLFSBLOCK) {
+ args.fsbno = be64_to_cpu(start->l);
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ /*
+ * Make sure there is sufficient room left in the AG to
+ * complete a full tree split for an extent insert. If
+ * we are converting the middle part of an extent then
+ * we may need space for two tree splits.
+ *
+ * We are relying on the caller to make the correct block
+ * reservation for this operation to succeed. If the
+ * reservation amount is insufficient then we may fail a
+ * block allocation here and corrupt the filesystem.
+ */
+ args.minleft = xfs_trans_get_block_res(args.tp);
+ } else if (cur->bc_private.b.flist->xbf_low) {
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ } else {
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ }
+
+ args.minlen = args.maxlen = args.prod = 1;
+ args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+ if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+ error = XFS_ERROR(ENOSPC);
+ goto error0;
+ }
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto error0;
+
+ if (args.fsbno == NULLFSBLOCK && args.minleft) {
+ /*
+ * Could not find an AG with enough free space to satisfy
+ * a full btree split. Try again without minleft and if
+ * successful activate the lowspace algorithm.
+ */
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.minleft = 0;
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto error0;
+ cur->bc_private.b.flist->xbf_low = 1;
+ }
+ if (args.fsbno == NULLFSBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+ ASSERT(args.len == 1);
+ cur->bc_private.b.firstblock = args.fsbno;
+ cur->bc_private.b.allocated++;
+ cur->bc_private.b.ip->i_d.di_nblocks++;
+ xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+ XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+ XFS_TRANS_DQ_BCOUNT, 1L);
+
+ new->l = cpu_to_be64(args.fsbno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+ error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+STATIC int
+xfs_bmbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_trans *tp = cur->bc_tp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+
+ xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+ ip->i_d.di_nblocks--;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_binval(tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_bmbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+ cur->bc_private.b.whichfork);
+
+ return xfs_bmbt_maxrecs(cur->bc_mp,
+ ifp->if_broot_bytes, level == 0) / 2;
+ }
+
+ return cur->bc_mp->m_bmap_dmnr[level != 0];
+}
+
+int
+xfs_bmbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+ cur->bc_private.b.whichfork);
+
+ return xfs_bmbt_maxrecs(cur->bc_mp,
+ ifp->if_broot_bytes, level == 0);
+ }
+
+ return cur->bc_mp->m_bmap_dmxr[level != 0];
+
+}
+
+/*
+ * Get the maximum records we could store in the on-disk format.
+ *
+ * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
+ * for the root node this checks the available space in the dinode fork
+ * so that we can resize the in-memory buffer to match it. After a
+ * resize to the maximum size this function returns the same value
+ * as xfs_bmbt_get_maxrecs for the root node, too.
+ */
+STATIC int
+xfs_bmbt_get_dmaxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level != cur->bc_nlevels - 1)
+ return cur->bc_mp->m_bmap_dmxr[level != 0];
+ return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
+ level == 0);
+}
+
+STATIC void
+xfs_bmbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->bmbt.br_startoff =
+ cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(key->bmbt.br_startoff != 0);
+
+ xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
+ 0, 0, XFS_EXT_NORM);
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
+}
+
+STATIC void
+xfs_bmbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ ptr->l = 0;
+}
+
+STATIC __int64_t
+xfs_bmbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+ cur->bc_rec.b.br_startoff;
+}
+
+#ifdef DEBUG
+STATIC int
+xfs_bmbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be64_to_cpu(k1->bmbt.br_startoff) <
+ be64_to_cpu(k2->bmbt.br_startoff);
+}
+
+STATIC int
+xfs_bmbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
+ xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
+ xfs_bmbt_disk_get_startoff(&r2->bmbt);
+}
+#endif /* DEBUG */
+
+#ifdef XFS_BTREE_TRACE
+ktrace_t *xfs_bmbt_trace_buf;
+
+STATIC void
+xfs_bmbt_trace_enter(
+ struct xfs_btree_cur *cur,
+ const char *func,
+ char *s,
+ int type,
+ int line,
+ __psunsigned_t a0,
+ __psunsigned_t a1,
+ __psunsigned_t a2,
+ __psunsigned_t a3,
+ __psunsigned_t a4,
+ __psunsigned_t a5,
+ __psunsigned_t a6,
+ __psunsigned_t a7,
+ __psunsigned_t a8,
+ __psunsigned_t a9,
+ __psunsigned_t a10)
+{
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ int whichfork = cur->bc_private.b.whichfork;
+
+ ktrace_enter(xfs_bmbt_trace_buf,
+ (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
+ (void *)func, (void *)s, (void *)ip, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
+ ktrace_enter(ip->i_btrace,
+ (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
+ (void *)func, (void *)s, (void *)ip, (void *)cur,
+ (void *)a0, (void *)a1, (void *)a2, (void *)a3,
+ (void *)a4, (void *)a5, (void *)a6, (void *)a7,
+ (void *)a8, (void *)a9, (void *)a10);
+}
+
+STATIC void
+xfs_bmbt_trace_cursor(
+ struct xfs_btree_cur *cur,
+ __uint32_t *s0,
+ __uint64_t *l0,
+ __uint64_t *l1)
+{
+ struct xfs_bmbt_rec_host r;
+
+ xfs_bmbt_set_all(&r, &cur->bc_rec.b);
+
+ *s0 = (cur->bc_nlevels << 24) |
+ (cur->bc_private.b.flags << 16) |
+ cur->bc_private.b.allocated;
+ *l0 = r.l0;
+ *l1 = r.l1;
+}
+
+STATIC void
+xfs_bmbt_trace_key(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ __uint64_t *l0,
+ __uint64_t *l1)
+{
+ *l0 = be64_to_cpu(key->bmbt.br_startoff);
+ *l1 = 0;
+}
+
+STATIC void
+xfs_bmbt_trace_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ __uint64_t *l0,
+ __uint64_t *l1,
+ __uint64_t *l2)
+{
+ struct xfs_bmbt_irec irec;
+
+ xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
+ *l0 = irec.br_startoff;
+ *l1 = irec.br_startblock;
+ *l2 = irec.br_blockcount;
+}
+#endif /* XFS_BTREE_TRACE */
+
+static const struct xfs_btree_ops xfs_bmbt_ops = {
+ .rec_len = sizeof(xfs_bmbt_rec_t),
+ .key_len = sizeof(xfs_bmbt_key_t),
+
+ .dup_cursor = xfs_bmbt_dup_cursor,
+ .update_cursor = xfs_bmbt_update_cursor,
+ .alloc_block = xfs_bmbt_alloc_block,
+ .free_block = xfs_bmbt_free_block,
+ .get_maxrecs = xfs_bmbt_get_maxrecs,
+ .get_minrecs = xfs_bmbt_get_minrecs,
+ .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
+ .init_key_from_rec = xfs_bmbt_init_key_from_rec,
+ .init_rec_from_key = xfs_bmbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
+ .key_diff = xfs_bmbt_key_diff,
+
+#ifdef DEBUG
+ .keys_inorder = xfs_bmbt_keys_inorder,
+ .recs_inorder = xfs_bmbt_recs_inorder,
+#endif
+
+#ifdef XFS_BTREE_TRACE
+ .trace_enter = xfs_bmbt_trace_enter,
+ .trace_cursor = xfs_bmbt_trace_cursor,
+ .trace_key = xfs_bmbt_trace_key,
+ .trace_record = xfs_bmbt_trace_record,
+#endif
+};
+
+/*
+ * Allocate a new bmap btree cursor.
+ */
+struct xfs_btree_cur * /* new bmap btree cursor */
+xfs_bmbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_inode *ip, /* inode owning the btree */
+ int whichfork) /* data or attr fork */
+{
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_btree_cur *cur;
+
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+ cur->bc_btnum = XFS_BTNUM_BMAP;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+
+ cur->bc_ops = &xfs_bmbt_ops;
+ cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
+
+ cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
+ cur->bc_private.b.ip = ip;
+ cur->bc_private.b.firstblock = NULLFSBLOCK;
+ cur->bc_private.b.flist = NULL;
+ cur->bc_private.b.allocated = 0;
+ cur->bc_private.b.flags = 0;
+ cur->bc_private.b.whichfork = whichfork;
+
+ return cur;
+}
+
+/*
+ * Calculate number of records in a bmap btree block.
+ */
+int
+xfs_bmbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= XFS_BMBT_BLOCK_LEN(mp);
+
+ if (leaf)
+ return blocklen / sizeof(xfs_bmbt_rec_t);
+ return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
+}
+
+/*
+ * Calculate number of records in a bmap btree inode root.
+ */
+int
+xfs_bmdr_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= sizeof(xfs_bmdr_block_t);
+
+ if (leaf)
+ return blocklen / sizeof(xfs_bmdr_rec_t);
+ return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
+}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cd0d4b4bb816..a4555abb6622 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -21,9 +21,10 @@
#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
struct xfs_btree_cur;
-struct xfs_btree_lblock;
+struct xfs_btree_block;
struct xfs_mount;
struct xfs_inode;
+struct xfs_trans;
/*
* Bmap root header, on-disk form only.
@@ -145,71 +146,60 @@ typedef struct xfs_bmbt_key {
/* btree pointer type */
typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
-/* btree block header type */
-typedef struct xfs_btree_lblock xfs_bmbt_block_t;
-
-#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
-
-#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
-#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
- ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
- (cur)->bc_private.b.whichfork)->if_broot_bytes)
-
-#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
- xfs_bmdr, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
-#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
- xfs_bmbt, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])))
-
-#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\
- xfs_bmdr, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
-#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\
- xfs_bmbt, (lev) == 0) : \
- ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
-
-#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_KEY_DADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_KEY_IADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
-
-#define XFS_BMAP_PTR_DADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
-#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
+/*
+ * Btree block header size depends on a superblock flag.
+ *
+ * (not quite yet, but soon)
+ */
+#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
+
+#define XFS_BMBT_REC_ADDR(mp, block, index) \
+ ((xfs_bmbt_rec_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
+
+#define XFS_BMBT_KEY_ADDR(mp, block, index) \
+ ((xfs_bmbt_key_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ ((index) - 1) * sizeof(xfs_bmbt_key_t)))
+
+#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
+ ((xfs_bmbt_ptr_t *) \
+ ((char *)(block) + \
+ XFS_BMBT_BLOCK_LEN(mp) + \
+ (maxrecs) * sizeof(xfs_bmbt_key_t) + \
+ ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
+
+#define XFS_BMDR_REC_ADDR(block, index) \
+ ((xfs_bmdr_rec_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
+
+#define XFS_BMDR_KEY_ADDR(block, index) \
+ ((xfs_bmdr_key_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ ((index) - 1) * sizeof(xfs_bmdr_key_t)))
+
+#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_bmdr_ptr_t *) \
+ ((char *)(block) + \
+ sizeof(struct xfs_bmdr_block) + \
+ (maxrecs) * sizeof(xfs_bmdr_key_t) + \
+ ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
/*
* These are to be used when we know the size of the block and
* we don't have a cursor.
*/
-#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
- (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i))
-#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
- (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
-#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
- (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
-
-#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
-#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
+#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
+ XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
- (int)(sizeof(xfs_bmbt_block_t) + \
+ (int)(XFS_BTREE_LBLOCK_LEN + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
#define XFS_BMAP_BROOT_SPACE(bb) \
@@ -223,42 +213,12 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
*/
#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
-#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
- (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
- be16_to_cpu((bb)->bb_level) == level && \
- be16_to_cpu((bb)->bb_numrecs) > 0 && \
- be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
-
-
-#ifdef __KERNEL__
-
-#if defined(XFS_BMBT_TRACE)
-/*
- * Trace buffer entry types.
- */
-#define XFS_BMBT_KTRACE_ARGBI 1
-#define XFS_BMBT_KTRACE_ARGBII 2
-#define XFS_BMBT_KTRACE_ARGFFFI 3
-#define XFS_BMBT_KTRACE_ARGI 4
-#define XFS_BMBT_KTRACE_ARGIFK 5
-#define XFS_BMBT_KTRACE_ARGIFR 6
-#define XFS_BMBT_KTRACE_ARGIK 7
-#define XFS_BMBT_KTRACE_CUR 8
-
-#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
-#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
-extern ktrace_t *xfs_bmbt_trace_buf;
-#endif
-
/*
* Prototypes for xfs_bmap.c to call.
*/
-extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int);
-extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *);
-extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
+extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
+ struct xfs_btree_block *, int);
extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
-extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
- int, struct xfs_buf **bpp);
extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
@@ -268,22 +228,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
-extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
-extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
-extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
-extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
- int);
-extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, int *);
-extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, int *);
-
-/*
- * Give the bmap btree a new root block. Copy the old broot contents
- * down into a real block and make the broot point to it.
- */
-extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
-
extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
@@ -296,10 +240,15 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
-extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int);
-extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t,
- xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t);
+extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
+ xfs_bmdr_block_t *, int);
+
+extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
+extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+
+extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_inode *, int);
-#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cc593a84c345..7ed59267420d 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -34,7 +34,9 @@
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_error.h"
@@ -50,135 +52,33 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
};
-/*
- * Checking routine: return maxrecs for the block.
- */
-STATIC int /* number of records fitting in block */
-xfs_btree_maxrecs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block) /* generic btree block pointer */
-{
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- return (int)XFS_ALLOC_BLOCK_MAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- case XFS_BTNUM_BMAP:
- return (int)XFS_BMAP_BLOCK_IMAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- case XFS_BTNUM_INO:
- return (int)XFS_INOBT_BLOCK_MAXRECS(
- be16_to_cpu(block->bb_h.bb_level), cur);
- default:
- ASSERT(0);
- return 0;
- }
-}
-
-/*
- * External routines.
- */
-
-#ifdef DEBUG
-/*
- * Debug routine: check that block header is ok.
- */
-void
-xfs_btree_check_block(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block, /* generic btree block pointer */
- int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer containing block, if any */
-{
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
- xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
- bp);
- else
- xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
- bp);
-}
-
-/*
- * Debug routine: check that keys are in the right order.
- */
-void
-xfs_btree_check_key(
- xfs_btnum_t btnum, /* btree identifier */
- void *ak1, /* pointer to left (lower) key */
- void *ak2) /* pointer to right (higher) key */
-{
- switch (btnum) {
- case XFS_BTNUM_BNO: {
- xfs_alloc_key_t *k1;
- xfs_alloc_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
- break;
- }
- case XFS_BTNUM_CNT: {
- xfs_alloc_key_t *k1;
- xfs_alloc_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
- (k1->ar_blockcount == k2->ar_blockcount &&
- be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
- break;
- }
- case XFS_BTNUM_BMAP: {
- xfs_bmbt_key_t *k1;
- xfs_bmbt_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
- break;
- }
- case XFS_BTNUM_INO: {
- xfs_inobt_key_t *k1;
- xfs_inobt_key_t *k2;
-
- k1 = ak1;
- k2 = ak2;
- ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
- break;
- }
- default:
- ASSERT(0);
- }
-}
-#endif /* DEBUG */
-/*
- * Checking routine: check that long form block header is ok.
- */
-/* ARGSUSED */
-int /* error (0 or EFSCORRUPTED) */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_lblock_t *block, /* btree long form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree long form block pointer */
int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer for block, if any */
+ struct xfs_buf *bp) /* buffer for block, if any */
{
int lblock_ok; /* block passes checks */
- xfs_mount_t *mp; /* file system mount point */
+ struct xfs_mount *mp; /* file system mount point */
mp = cur->bc_mp;
lblock_ok =
be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
- xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
- block->bb_leftsib &&
- (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
- XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
- block->bb_rightsib &&
- (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
- XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
- if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ block->bb_u.l.bb_leftsib &&
+ (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+ block->bb_u.l.bb_rightsib &&
+ (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+ XFS_FSB_SANITY_CHECK(mp,
+ be64_to_cpu(block->bb_u.l.bb_rightsib)));
+ if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
+ XFS_ERRTAG_BTREE_CHECK_LBLOCK,
XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
if (bp)
xfs_buftrace("LBTREE ERROR", bp);
@@ -189,98 +89,15 @@ xfs_btree_check_lblock(
return 0;
}
-/*
- * Checking routine: check that (long) pointer is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_dfsbno_t ptr, /* btree block disk address */
- int level) /* btree block level */
-{
- xfs_mount_t *mp; /* file system mount point */
-
- mp = cur->bc_mp;
- XFS_WANT_CORRUPTED_RETURN(
- level > 0 &&
- ptr != NULLDFSBNO &&
- XFS_FSB_SANITY_CHECK(mp, ptr));
- return 0;
-}
-
-#ifdef DEBUG
-/*
- * Debug routine: check that records are in the right order.
- */
-void
-xfs_btree_check_rec(
- xfs_btnum_t btnum, /* btree identifier */
- void *ar1, /* pointer to left (lower) record */
- void *ar2) /* pointer to right (higher) record */
-{
- switch (btnum) {
- case XFS_BTNUM_BNO: {
- xfs_alloc_rec_t *r1;
- xfs_alloc_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ar_startblock) +
- be32_to_cpu(r1->ar_blockcount) <=
- be32_to_cpu(r2->ar_startblock));
- break;
- }
- case XFS_BTNUM_CNT: {
- xfs_alloc_rec_t *r1;
- xfs_alloc_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
- (r1->ar_blockcount == r2->ar_blockcount &&
- be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
- break;
- }
- case XFS_BTNUM_BMAP: {
- xfs_bmbt_rec_t *r1;
- xfs_bmbt_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(xfs_bmbt_disk_get_startoff(r1) +
- xfs_bmbt_disk_get_blockcount(r1) <=
- xfs_bmbt_disk_get_startoff(r2));
- break;
- }
- case XFS_BTNUM_INO: {
- xfs_inobt_rec_t *r1;
- xfs_inobt_rec_t *r2;
-
- r1 = ar1;
- r2 = ar2;
- ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
- be32_to_cpu(r2->ir_startino));
- break;
- }
- default:
- ASSERT(0);
- }
-}
-#endif /* DEBUG */
-
-/*
- * Checking routine: check that block header is ok.
- */
-/* ARGSUSED */
-int /* error (0 or EFSCORRUPTED) */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_sblock_t *block, /* btree short form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* btree short form block pointer */
int level, /* level of the btree block */
- xfs_buf_t *bp) /* buffer containing block */
+ struct xfs_buf *bp) /* buffer containing block */
{
- xfs_buf_t *agbp; /* buffer for ag. freespace struct */
- xfs_agf_t *agf; /* ag. freespace structure */
+ struct xfs_buf *agbp; /* buffer for ag. freespace struct */
+ struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
int sblock_ok; /* block passes checks */
@@ -291,13 +108,13 @@ xfs_btree_check_sblock(
be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
- xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
- (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
- be32_to_cpu(block->bb_leftsib) < agflen) &&
- block->bb_leftsib &&
- (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
- be32_to_cpu(block->bb_rightsib) < agflen) &&
- block->bb_rightsib;
+ cur->bc_ops->get_maxrecs(cur, level) &&
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+ be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
+ block->bb_u.s.bb_leftsib &&
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+ be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
+ block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
XFS_ERRTAG_BTREE_CHECK_SBLOCK,
XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
@@ -311,27 +128,78 @@ xfs_btree_check_sblock(
}
/*
- * Checking routine: check that (short) pointer is ok.
+ * Debug routine: check that block header is ok.
+ */
+int
+xfs_btree_check_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* generic btree block pointer */
+ int level, /* level of the btree block */
+ struct xfs_buf *bp) /* buffer containing block, if any */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_check_lblock(cur, block, level, bp);
+ else
+ return xfs_btree_check_sblock(cur, block, level, bp);
+}
+
+/*
+ * Check that (long) pointer is ok.
*/
int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_dfsbno_t bno, /* btree block disk address */
+ int level) /* btree block level */
+{
+ XFS_WANT_CORRUPTED_RETURN(
+ level > 0 &&
+ bno != NULLDFSBNO &&
+ XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
+ return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that (short) pointer is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t ptr, /* btree block disk address */
- int level) /* btree block level */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* btree block disk address */
+ int level) /* btree block level */
{
- xfs_buf_t *agbp; /* buffer for ag. freespace struct */
- xfs_agf_t *agf; /* ag. freespace structure */
+ xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
- agbp = cur->bc_private.a.agbp;
- agf = XFS_BUF_TO_AGF(agbp);
XFS_WANT_CORRUPTED_RETURN(
level > 0 &&
- ptr != NULLAGBLOCK && ptr != 0 &&
- ptr < be32_to_cpu(agf->agf_length));
+ bno != NULLAGBLOCK &&
+ bno != 0 &&
+ bno < agblocks);
return 0;
}
/*
+ * Check that block ptr is ok.
+ */
+STATIC int /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_ptr(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ union xfs_btree_ptr *ptr, /* btree block disk address */
+ int index, /* offset from ptr to check */
+ int level) /* btree block level */
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ return xfs_btree_check_lptr(cur,
+ be64_to_cpu((&ptr->l)[index]), level);
+ } else {
+ return xfs_btree_check_sptr(cur,
+ be32_to_cpu((&ptr->s)[index]), level);
+ }
+}
+#endif
+
+/*
* Delete the btree cursor.
*/
void
@@ -387,16 +255,17 @@ xfs_btree_dup_cursor(
tp = cur->bc_tp;
mp = cur->bc_mp;
+
/*
* Allocate a new cursor like the old one.
*/
- new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
- cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
- cur->bc_private.b.whichfork);
+ new = cur->bc_ops->dup_cursor(cur);
+
/*
* Copy the record currently in the cursor.
*/
new->bc_rec = cur->bc_rec;
+
/*
* For each level current, re-get the buffer and copy the ptr value.
*/
@@ -416,46 +285,174 @@ xfs_btree_dup_cursor(
} else
new->bc_bufs[i] = NULL;
}
- /*
- * For bmap btrees, copy the firstblock, flist, and flags values,
- * since init cursor doesn't get them.
- */
- if (new->bc_btnum == XFS_BTNUM_BMAP) {
- new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
- new->bc_private.b.flist = cur->bc_private.b.flist;
- new->bc_private.b.flags = cur->bc_private.b.flags;
- }
*ncur = new;
return 0;
}
/*
+ * XFS btree block layout and addressing:
+ *
+ * There are two types of blocks in the btree: leaf and non-leaf blocks.
+ *
+ * The leaf record start with a header then followed by records containing
+ * the values. A non-leaf block also starts with the same header, and
+ * then first contains lookup keys followed by an equal number of pointers
+ * to the btree blocks at the previous level.
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * +--------+-------+-------+-------+-------+-------+-------+
+ * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
+ * +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * The header is called struct xfs_btree_block for reasons better left unknown
+ * and comes in different versions for short (32bit) and long (64bit) block
+ * pointers. The record and key structures are defined by the btree instances
+ * and opaque to the btree core. The block pointers are simple disk endian
+ * integers, available in a short (32bit) and long (64bit) variant.
+ *
+ * The helpers below calculate the offset of a given record, key or pointer
+ * into a btree block (xfs_btree_*_offset) or return a pointer to the given
+ * record, key or pointer (xfs_btree_*_addr). Note that all addressing
+ * inside the btree block is done using indices starting at one, not zero!
+ */
+
+/*
+ * Return size of the btree block header for this btree instance.
+ */
+static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
+{
+ return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ XFS_BTREE_LBLOCK_LEN :
+ XFS_BTREE_SBLOCK_LEN;
+}
+
+/*
+ * Return size of btree block pointers for this btree instance.
+ */
+static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
+{
+ return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ sizeof(__be64) : sizeof(__be32);
+}
+
+/*
+ * Calculate offset of the n-th record in a btree block.
+ */
+STATIC size_t
+xfs_btree_rec_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->rec_len;
+}
+
+/*
+ * Calculate offset of the n-th key in a btree block.
+ */
+STATIC size_t
+xfs_btree_key_offset(
+ struct xfs_btree_cur *cur,
+ int n)
+{
+ return xfs_btree_block_len(cur) +
+ (n - 1) * cur->bc_ops->key_len;
+}
+
+/*
+ * Calculate offset of the n-th block pointer in a btree block.
+ */
+STATIC size_t
+xfs_btree_ptr_offset(
+ struct xfs_btree_cur *cur,
+ int n,
+ int level)
+{
+ return xfs_btree_block_len(cur) +
+ cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
+ (n - 1) * xfs_btree_ptr_len(cur);
+}
+
+/*
+ * Return a pointer to the n-th record in the btree block.
+ */
+STATIC union xfs_btree_rec *
+xfs_btree_rec_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_rec *)
+ ((char *)block + xfs_btree_rec_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th key in the btree block.
+ */
+STATIC union xfs_btree_key *
+xfs_btree_key_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ return (union xfs_btree_key *)
+ ((char *)block + xfs_btree_key_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th block pointer in the btree block.
+ */
+STATIC union xfs_btree_ptr *
+xfs_btree_ptr_addr(
+ struct xfs_btree_cur *cur,
+ int n,
+ struct xfs_btree_block *block)
+{
+ int level = xfs_btree_get_level(block);
+
+ ASSERT(block->bb_level != 0);
+
+ return (union xfs_btree_ptr *)
+ ((char *)block + xfs_btree_ptr_offset(cur, n, level));
+}
+
+/*
+ * Get a the root block which is stored in the inode.
+ *
+ * For now this btree implementation assumes the btree root is always
+ * stored in the if_broot field of an inode fork.
+ */
+STATIC struct xfs_btree_block *
+xfs_btree_get_iroot(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+ return (struct xfs_btree_block *)ifp->if_broot;
+}
+
+/*
* Retrieve the block pointer from the cursor at the given level.
- * This may be a bmap btree root or from a buffer.
+ * This may be an inode btree root or from a buffer.
*/
-STATIC xfs_btree_block_t * /* generic btree block pointer */
+STATIC struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in btree */
- xfs_buf_t **bpp) /* buffer containing the block */
-{
- xfs_btree_block_t *block; /* return value */
- xfs_buf_t *bp; /* return buffer */
- xfs_ifork_t *ifp; /* inode fork pointer */
- int whichfork; /* data or attr fork */
-
- if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
- whichfork = cur->bc_private.b.whichfork;
- ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
- block = (xfs_btree_block_t *)ifp->if_broot;
- bp = NULL;
- } else {
- bp = cur->bc_bufs[level];
- block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf **bpp) /* buffer containing the block */
+{
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1)) {
+ *bpp = NULL;
+ return xfs_btree_get_iroot(cur);
}
- ASSERT(block != NULL);
- *bpp = bp;
- return block;
+
+ *bpp = cur->bc_bufs[level];
+ return XFS_BUF_TO_BLOCK(*bpp);
}
/*
@@ -505,97 +502,6 @@ xfs_btree_get_bufs(
}
/*
- * Allocate a new btree cursor.
- * The cursor is either for allocation (A) or bmap (B) or inodes (I).
- */
-xfs_btree_cur_t * /* new btree cursor */
-xfs_btree_init_cursor(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* (A only) buffer for agf structure */
- /* (I only) buffer for agi structure */
- xfs_agnumber_t agno, /* (AI only) allocation group number */
- xfs_btnum_t btnum, /* btree identifier */
- xfs_inode_t *ip, /* (B only) inode owning the btree */
- int whichfork) /* (B only) data or attr fork */
-{
- xfs_agf_t *agf; /* (A) allocation group freespace */
- xfs_agi_t *agi; /* (I) allocation group inodespace */
- xfs_btree_cur_t *cur; /* return value */
- xfs_ifork_t *ifp; /* (I) inode fork pointer */
- int nlevels=0; /* number of levels in the btree */
-
- ASSERT(xfs_btree_cur_zone != NULL);
- /*
- * Allocate a new cursor.
- */
- cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
- /*
- * Deduce the number of btree levels from the arguments.
- */
- switch (btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- agf = XFS_BUF_TO_AGF(agbp);
- nlevels = be32_to_cpu(agf->agf_levels[btnum]);
- break;
- case XFS_BTNUM_BMAP:
- ifp = XFS_IFORK_PTR(ip, whichfork);
- nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
- break;
- case XFS_BTNUM_INO:
- agi = XFS_BUF_TO_AGI(agbp);
- nlevels = be32_to_cpu(agi->agi_level);
- break;
- default:
- ASSERT(0);
- }
- /*
- * Fill in the common fields.
- */
- cur->bc_tp = tp;
- cur->bc_mp = mp;
- cur->bc_nlevels = nlevels;
- cur->bc_btnum = btnum;
- cur->bc_blocklog = mp->m_sb.sb_blocklog;
- /*
- * Fill in private fields.
- */
- switch (btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- /*
- * Allocation btree fields.
- */
- cur->bc_private.a.agbp = agbp;
- cur->bc_private.a.agno = agno;
- break;
- case XFS_BTNUM_INO:
- /*
- * Inode allocation btree fields.
- */
- cur->bc_private.a.agbp = agbp;
- cur->bc_private.a.agno = agno;
- break;
- case XFS_BTNUM_BMAP:
- /*
- * Bmap btree fields.
- */
- cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
- cur->bc_private.b.ip = ip;
- cur->bc_private.b.firstblock = NULLFSBLOCK;
- cur->bc_private.b.flist = NULL;
- cur->bc_private.b.allocated = 0;
- cur->bc_private.b.flags = 0;
- cur->bc_private.b.whichfork = whichfork;
- break;
- default:
- ASSERT(0);
- }
- return cur;
-}
-
-/*
* Check for the cursor referring to the last block at the given level.
*/
int /* 1=is last block, 0=not last block */
@@ -603,12 +509,12 @@ xfs_btree_islastblock(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to check */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
else
return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
@@ -618,12 +524,12 @@ xfs_btree_islastblock(
* Change the cursor to point to the first record at the given level.
* Other levels are unaffected.
*/
-int /* success=1, failure=0 */
+STATIC int /* success=1, failure=0 */
xfs_btree_firstrec(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to change */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
/*
@@ -634,7 +540,7 @@ xfs_btree_firstrec(
/*
* It's empty, there is no such record.
*/
- if (!block->bb_h.bb_numrecs)
+ if (!block->bb_numrecs)
return 0;
/*
* Set the ptr value to 1, that's the first record/key.
@@ -647,12 +553,12 @@ xfs_btree_firstrec(
* Change the cursor to point to the last record in the current block
* at the given level. Other levels are unaffected.
*/
-int /* success=1, failure=0 */
+STATIC int /* success=1, failure=0 */
xfs_btree_lastrec(
xfs_btree_cur_t *cur, /* btree cursor */
int level) /* level to change */
{
- xfs_btree_block_t *block; /* generic btree block pointer */
+ struct xfs_btree_block *block; /* generic btree block pointer */
xfs_buf_t *bp; /* buffer containing block */
/*
@@ -663,12 +569,12 @@ xfs_btree_lastrec(
/*
* It's empty, there is no such record.
*/
- if (!block->bb_h.bb_numrecs)
+ if (!block->bb_numrecs)
return 0;
/*
* Set the ptr value to numrecs, that's the last record/key.
*/
- cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
+ cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
return 1;
}
@@ -817,66 +723,84 @@ xfs_btree_reada_bufs(
xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
}
+STATIC int
+xfs_btree_readahead_lblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, left, 1);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+ xfs_btree_reada_bufl(cur->bc_mp, right, 1);
+ rval++;
+ }
+
+ return rval;
+}
+
+STATIC int
+xfs_btree_readahead_sblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
+{
+ int rval = 0;
+ xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
+ xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
+
+
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ left, 1);
+ rval++;
+ }
+
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+ right, 1);
+ rval++;
+ }
+
+ return rval;
+}
+
/*
* Read-ahead btree blocks, at the given level.
* Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
*/
-int
-xfs_btree_readahead_core(
- xfs_btree_cur_t *cur, /* btree cursor */
+STATIC int
+xfs_btree_readahead(
+ struct xfs_btree_cur *cur, /* btree cursor */
int lev, /* level in btree */
int lr) /* left/right bits */
{
- xfs_alloc_block_t *a;
- xfs_bmbt_block_t *b;
- xfs_inobt_block_t *i;
- int rval = 0;
+ struct xfs_btree_block *block;
+
+ /*
+ * No readahead needed if we are at the root level and the
+ * btree root is stored in the inode.
+ */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (lev == cur->bc_nlevels - 1))
+ return 0;
+
+ if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+ return 0;
- ASSERT(cur->bc_bufs[lev] != NULL);
cur->bc_ra[lev] |= lr;
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(a->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(a->bb_rightsib), 1);
- rval++;
- }
- break;
- case XFS_BTNUM_BMAP:
- b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
- xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
- xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
- rval++;
- }
- break;
- case XFS_BTNUM_INO:
- i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
- if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(i->bb_leftsib), 1);
- rval++;
- }
- if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- be32_to_cpu(i->bb_rightsib), 1);
- rval++;
- }
- break;
- default:
- ASSERT(0);
- }
- return rval;
+ block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return xfs_btree_readahead_lblock(cur, lr, block);
+ return xfs_btree_readahead_sblock(cur, lr, block);
}
/*
@@ -889,7 +813,7 @@ xfs_btree_setbuf(
int lev, /* level in btree */
xfs_buf_t *bp) /* new buffer to set */
{
- xfs_btree_block_t *b; /* btree block */
+ struct xfs_btree_block *b; /* btree block */
xfs_buf_t *obp; /* old buffer pointer */
obp = cur->bc_bufs[lev];
@@ -900,7 +824,7 @@ xfs_btree_setbuf(
if (!bp)
return;
b = XFS_BUF_TO_BLOCK(bp);
- if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
@@ -912,3 +836,2855 @@ xfs_btree_setbuf(
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
}
}
+
+STATIC int
+xfs_btree_ptr_is_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return be64_to_cpu(ptr->l) == NULLFSBLOCK;
+ else
+ return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+}
+
+STATIC void
+xfs_btree_set_ptr_null(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(NULLFSBLOCK);
+ else
+ ptr->s = cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Get/set/init sibling pointers
+ */
+STATIC void
+xfs_btree_get_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->l = block->bb_u.l.bb_rightsib;
+ else
+ ptr->l = block->bb_u.l.bb_leftsib;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ ptr->s = block->bb_u.s.bb_rightsib;
+ else
+ ptr->s = block->bb_u.s.bb_leftsib;
+ }
+}
+
+STATIC void
+xfs_btree_set_sibling(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_ptr *ptr,
+ int lr)
+{
+ ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.l.bb_rightsib = ptr->l;
+ else
+ block->bb_u.l.bb_leftsib = ptr->l;
+ } else {
+ if (lr == XFS_BB_RIGHTSIB)
+ block->bb_u.s.bb_rightsib = ptr->s;
+ else
+ block->bb_u.s.bb_leftsib = ptr->s;
+ }
+}
+
+STATIC void
+xfs_btree_init_block(
+ struct xfs_btree_cur *cur,
+ int level,
+ int numrecs,
+ struct xfs_btree_block *new) /* new block */
+{
+ new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
+ new->bb_level = cpu_to_be16(level);
+ new->bb_numrecs = cpu_to_be16(numrecs);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
+ new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
+ } else {
+ new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ }
+}
+
+/*
+ * Return true if ptr is the last record in the btree and
+ * we need to track updateѕ to this record. The decision
+ * will be further refined in the update_lastrec method.
+ */
+STATIC int
+xfs_btree_is_lastrec(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level)
+{
+ union xfs_btree_ptr ptr;
+
+ if (level > 0)
+ return 0;
+ if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+ return 0;
+
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &ptr))
+ return 0;
+ return 1;
+}
+
+STATIC void
+xfs_btree_buf_to_ptr(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ else {
+ ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
+ XFS_BUF_ADDR(bp)));
+ }
+}
+
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
+
+ return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ } else {
+ ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+ ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+
+ return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+ be32_to_cpu(ptr->s));
+ }
+}
+
+STATIC void
+xfs_btree_set_refs(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ switch (cur->bc_btnum) {
+ case XFS_BTNUM_BNO:
+ case XFS_BTNUM_CNT:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+ break;
+ case XFS_BTNUM_INO:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+ break;
+ case XFS_BTNUM_BMAP:
+ XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+ break;
+ default:
+ ASSERT(0);
+ }
+}
+
+STATIC int
+xfs_btree_get_buf_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int flags,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_daddr_t d;
+
+ /* need to sort out how callers deal with failures first */
+ ASSERT(!(flags & XFS_BUF_TRYLOCK));
+
+ d = xfs_btree_ptr_to_daddr(cur, ptr);
+ *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
+ mp->m_bsize, flags);
+
+ ASSERT(*bpp);
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ *block = XFS_BUF_TO_BLOCK(*bpp);
+ return 0;
+}
+
+/*
+ * Read in the buffer at the given ptr and return the buffer and
+ * the block pointer within the buffer.
+ */
+STATIC int
+xfs_btree_read_buf_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int level,
+ int flags,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_daddr_t d;
+ int error;
+
+ /* need to sort out how callers deal with failures first */
+ ASSERT(!(flags & XFS_BUF_TRYLOCK));
+
+ d = xfs_btree_ptr_to_daddr(cur, ptr);
+ error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
+ mp->m_bsize, flags, bpp);
+ if (error)
+ return error;
+
+ ASSERT(*bpp != NULL);
+ ASSERT(!XFS_BUF_GETERROR(*bpp));
+
+ xfs_btree_set_refs(cur, *bpp);
+ *block = XFS_BUF_TO_BLOCK(*bpp);
+
+ error = xfs_btree_check_block(cur, *block, level, *bpp);
+ if (error)
+ xfs_trans_brelse(cur->bc_tp, *bpp);
+ return error;
+}
+
+/*
+ * Copy keys from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *dst_key,
+ union xfs_btree_key *src_key,
+ int numkeys)
+{
+ ASSERT(numkeys >= 0);
+ memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Copy records from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_recs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *dst_rec,
+ union xfs_btree_rec *src_rec,
+ int numrecs)
+{
+ ASSERT(numrecs >= 0);
+ memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Copy block pointers from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_ptrs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *dst_ptr,
+ union xfs_btree_ptr *src_ptr,
+ int numptrs)
+{
+ ASSERT(numptrs >= 0);
+ memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Shift keys one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ int dir,
+ int numkeys)
+{
+ char *dst_key;
+
+ ASSERT(numkeys >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_key = (char *)key + (dir * cur->bc_ops->key_len);
+ memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Shift records one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_recs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ int dir,
+ int numrecs)
+{
+ char *dst_rec;
+
+ ASSERT(numrecs >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
+ memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Shift block pointers one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_ptrs(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int dir,
+ int numptrs)
+{
+ char *dst_ptr;
+
+ ASSERT(numptrs >= 0);
+ ASSERT(dir == 1 || dir == -1);
+
+ dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
+ memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Log key values from the btree block.
+ */
+STATIC void
+xfs_btree_log_keys(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int first,
+ int last)
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ if (bp) {
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_key_offset(cur, first),
+ xfs_btree_key_offset(cur, last + 1) - 1);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log record values from the btree block.
+ */
+void
+xfs_btree_log_recs(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int first,
+ int last)
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_rec_offset(cur, first),
+ xfs_btree_rec_offset(cur, last + 1) - 1);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_btree_log_ptrs(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_buf *bp, /* buffer containing btree block */
+ int first, /* index of first pointer to log */
+ int last) /* index of last pointer to log */
+{
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+ if (bp) {
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ int level = xfs_btree_get_level(block);
+
+ xfs_trans_log_buf(cur->bc_tp, bp,
+ xfs_btree_ptr_offset(cur, first, level),
+ xfs_btree_ptr_offset(cur, last + 1, level) - 1);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log fields from a btree block header.
+ */
+void
+xfs_btree_log_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_buf *bp, /* buffer containing btree block */
+ int fields) /* mask of fields: XFS_BB_... */
+{
+ int first; /* first byte offset logged */
+ int last; /* last byte offset logged */
+ static const short soffsets[] = { /* table of offsets (short) */
+ offsetof(struct xfs_btree_block, bb_magic),
+ offsetof(struct xfs_btree_block, bb_level),
+ offsetof(struct xfs_btree_block, bb_numrecs),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
+ XFS_BTREE_SBLOCK_LEN
+ };
+ static const short loffsets[] = { /* table of offsets (long) */
+ offsetof(struct xfs_btree_block, bb_magic),
+ offsetof(struct xfs_btree_block, bb_level),
+ offsetof(struct xfs_btree_block, bb_numrecs),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
+ XFS_BTREE_LBLOCK_LEN
+ };
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
+
+ if (bp) {
+ xfs_btree_offsets(fields,
+ (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ loffsets : soffsets,
+ XFS_BB_NUM_BITS, &first, &last);
+ xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+ } else {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+ xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int /* error */
+xfs_btree_increment(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block;
+ union xfs_btree_ptr ptr;
+ struct xfs_buf *bp;
+ int error; /* error return value */
+ int lev;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ ASSERT(level < cur->bc_nlevels);
+
+ /* Read-ahead to the right at this level. */
+ xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+ /* Get a pointer to the btree block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* We're done if we remain in the block after the increment. */
+ if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
+ goto out1;
+
+ /* Fail if we just went off the right edge of the tree. */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (xfs_btree_ptr_is_null(cur, &ptr))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, increment);
+
+ /*
+ * March up the tree incrementing pointers.
+ * Stop when we don't go off the right edge of a block.
+ */
+ for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+ block = xfs_btree_get_block(cur, lev, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, lev, bp);
+ if (error)
+ goto error0;
+#endif
+
+ if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
+ break;
+
+ /* Read-ahead the right block for the next loop. */
+ xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+ }
+
+ /*
+ * If we went off the root then we are either seriously
+ * confused or have the tree root in an inode.
+ */
+ if (lev == cur->bc_nlevels) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ goto out0;
+ ASSERT(0);
+ error = EFSCORRUPTED;
+ goto error0;
+ }
+ ASSERT(lev < cur->bc_nlevels);
+
+ /*
+ * Now walk back down the tree, fixing up the cursor's buffer
+ * pointers and key numbers.
+ */
+ for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+ union xfs_btree_ptr *ptrp;
+
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ error = xfs_btree_read_buf_block(cur, ptrp, --lev,
+ 0, &block, &bp);
+ if (error)
+ goto error0;
+
+ xfs_btree_setbuf(cur, lev, bp);
+ cur->bc_ptrs[lev] = 1;
+ }
+out1:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int /* error */
+xfs_btree_decrement(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block;
+ xfs_buf_t *bp;
+ int error; /* error return value */
+ int lev;
+ union xfs_btree_ptr ptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ ASSERT(level < cur->bc_nlevels);
+
+ /* Read-ahead to the left at this level. */
+ xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+
+ /* We're done if we remain in the block after the decrement. */
+ if (--cur->bc_ptrs[level] > 0)
+ goto out1;
+
+ /* Get a pointer to the btree block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* Fail if we just went off the left edge of the tree. */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+ if (xfs_btree_ptr_is_null(cur, &ptr))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, decrement);
+
+ /*
+ * March up the tree decrementing pointers.
+ * Stop when we don't go off the left edge of a block.
+ */
+ for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+ if (--cur->bc_ptrs[lev] > 0)
+ break;
+ /* Read-ahead the left block for the next loop. */
+ xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+ }
+
+ /*
+ * If we went off the root then we are seriously confused.
+ * or the root of the tree is in an inode.
+ */
+ if (lev == cur->bc_nlevels) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ goto out0;
+ ASSERT(0);
+ error = EFSCORRUPTED;
+ goto error0;
+ }
+ ASSERT(lev < cur->bc_nlevels);
+
+ /*
+ * Now walk back down the tree, fixing up the cursor's buffer
+ * pointers and key numbers.
+ */
+ for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+ union xfs_btree_ptr *ptrp;
+
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ error = xfs_btree_read_buf_block(cur, ptrp, --lev,
+ 0, &block, &bp);
+ if (error)
+ goto error0;
+ xfs_btree_setbuf(cur, lev, bp);
+ cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
+ }
+out1:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+STATIC int
+xfs_btree_lookup_get_block(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level in the btree */
+ union xfs_btree_ptr *pp, /* ptr to btree block */
+ struct xfs_btree_block **blkp) /* return btree block */
+{
+ struct xfs_buf *bp; /* buffer pointer for btree block */
+ int error = 0;
+
+ /* special case the root block if in an inode */
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1)) {
+ *blkp = xfs_btree_get_iroot(cur);
+ return 0;
+ }
+
+ /*
+ * If the old buffer at this level for the disk address we are
+ * looking for re-use it.
+ *
+ * Otherwise throw it away and get a new one.
+ */
+ bp = cur->bc_bufs[level];
+ if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
+ *blkp = XFS_BUF_TO_BLOCK(bp);
+ return 0;
+ }
+
+ error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
+ if (error)
+ return error;
+
+ xfs_btree_setbuf(cur, level, bp);
+ return 0;
+}
+
+/*
+ * Get current search key. For level 0 we don't actually have a key
+ * structure so we make one up from the record. For all other levels
+ * we just return the right key.
+ */
+STATIC union xfs_btree_key *
+xfs_lookup_get_search_key(
+ struct xfs_btree_cur *cur,
+ int level,
+ int keyno,
+ struct xfs_btree_block *block,
+ union xfs_btree_key *kp)
+{
+ if (level == 0) {
+ cur->bc_ops->init_key_from_rec(kp,
+ xfs_btree_rec_addr(cur, keyno, block));
+ return kp;
+ }
+
+ return xfs_btree_key_addr(cur, keyno, block);
+}
+
+/*
+ * Lookup the record. The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+int /* error */
+xfs_btree_lookup(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_lookup_t dir, /* <=, ==, or >= */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* current btree block */
+ __int64_t diff; /* difference for the current key */
+ int error; /* error return value */
+ int keyno; /* current key number */
+ int level; /* level in the btree */
+ union xfs_btree_ptr *pp; /* ptr to btree block */
+ union xfs_btree_ptr ptr; /* ptr to btree block */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, dir);
+
+ XFS_BTREE_STATS_INC(cur, lookup);
+
+ block = NULL;
+ keyno = 0;
+
+ /* initialise start pointer from cursor */
+ cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ pp = &ptr;
+
+ /*
+ * Iterate over each level in the btree, starting at the root.
+ * For each level above the leaves, find the key we need, based
+ * on the lookup record, then follow the corresponding block
+ * pointer down to the next level.
+ */
+ for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+ /* Get the block we need to do the lookup on. */
+ error = xfs_btree_lookup_get_block(cur, level, pp, &block);
+ if (error)
+ goto error0;
+
+ if (diff == 0) {
+ /*
+ * If we already had a key match at a higher level, we
+ * know we need to use the first entry in this block.
+ */
+ keyno = 1;
+ } else {
+ /* Otherwise search this block. Do a binary search. */
+
+ int high; /* high entry number */
+ int low; /* low entry number */
+
+ /* Set low and high entry numbers, 1-based. */
+ low = 1;
+ high = xfs_btree_get_numrecs(block);
+ if (!high) {
+ /* Block is empty, must be an empty leaf. */
+ ASSERT(level == 0 && cur->bc_nlevels == 1);
+
+ cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Binary search the block. */
+ while (low <= high) {
+ union xfs_btree_key key;
+ union xfs_btree_key *kp;
+
+ XFS_BTREE_STATS_INC(cur, compare);
+
+ /* keyno is average of low and high. */
+ keyno = (low + high) >> 1;
+
+ /* Get current search key */
+ kp = xfs_lookup_get_search_key(cur, level,
+ keyno, block, &key);
+
+ /*
+ * Compute difference to get next direction:
+ * - less than, move right
+ * - greater than, move left
+ * - equal, we're done
+ */
+ diff = cur->bc_ops->key_diff(cur, kp);
+ if (diff < 0)
+ low = keyno + 1;
+ else if (diff > 0)
+ high = keyno - 1;
+ else
+ break;
+ }
+ }
+
+ /*
+ * If there are more levels, set up for the next level
+ * by getting the block number and filling in the cursor.
+ */
+ if (level > 0) {
+ /*
+ * If we moved left, need the previous key number,
+ * unless there isn't one.
+ */
+ if (diff > 0 && --keyno < 1)
+ keyno = 1;
+ pp = xfs_btree_ptr_addr(cur, keyno, block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, pp, 0, level);
+ if (error)
+ goto error0;
+#endif
+ cur->bc_ptrs[level] = keyno;
+ }
+ }
+
+ /* Done with the search. See if we need to adjust the results. */
+ if (dir != XFS_LOOKUP_LE && diff < 0) {
+ keyno++;
+ /*
+ * If ge search and we went off the end of the block, but it's
+ * not the last block, we're in the wrong block.
+ */
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ if (dir == XFS_LOOKUP_GE &&
+ keyno > xfs_btree_get_numrecs(block) &&
+ !xfs_btree_ptr_is_null(cur, &ptr)) {
+ int i;
+
+ cur->bc_ptrs[0] = keyno;
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+ }
+ } else if (dir == XFS_LOOKUP_LE && diff > 0)
+ keyno--;
+ cur->bc_ptrs[0] = keyno;
+
+ /* Return if we succeeded or not. */
+ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
+ *stat = 0;
+ else if (dir != XFS_LOOKUP_EQ || diff == 0)
+ *stat = 1;
+ else
+ *stat = 0;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int
+xfs_btree_updkey(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *keyp,
+ int level)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ union xfs_btree_key *kp;
+ int ptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
+
+ ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
+
+ /*
+ * Go up the tree from this level toward the root.
+ * At each level, update the key value to the value input.
+ * Stop when we reach a level where the cursor isn't pointing
+ * at the first entry in the block.
+ */
+ for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+#ifdef DEBUG
+ int error;
+#endif
+ block = xfs_btree_get_block(cur, level, &bp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+#endif
+ ptr = cur->bc_ptrs[level];
+ kp = xfs_btree_key_addr(cur, ptr, block);
+ xfs_btree_copy_keys(cur, kp, keyp, 1);
+ xfs_btree_log_keys(cur, bp, ptr, ptr);
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+}
+
+/*
+ * Update the record referred to by cur to the value in the
+ * given record. This either works (return 0) or gets an
+ * EFSCORRUPTED error.
+ */
+int
+xfs_btree_update(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ int error;
+ int ptr;
+ union xfs_btree_rec *rp;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGR(cur, rec);
+
+ /* Pick up the current block. */
+ block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, 0, bp);
+ if (error)
+ goto error0;
+#endif
+ /* Get the address of the rec to be updated. */
+ ptr = cur->bc_ptrs[0];
+ rp = xfs_btree_rec_addr(cur, ptr, block);
+
+ /* Fill in the new contents and log them. */
+ xfs_btree_copy_recs(cur, rp, rec, 1);
+ xfs_btree_log_recs(cur, bp, ptr, ptr);
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, 0)) {
+ cur->bc_ops->update_lastrec(cur, block, rec,
+ ptr, LASTREC_UPDATE);
+ }
+
+ /* Updating first rec in leaf. Pass new key value up to our parent. */
+ if (ptr == 1) {
+ union xfs_btree_key key;
+
+ cur->bc_ops->init_key_from_rec(&key, rec);
+ error = xfs_btree_updkey(cur, &key, 1);
+ if (error)
+ goto error0;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int /* error */
+xfs_btree_lshift(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_key key; /* btree key */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ int lrecs; /* left record count */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ int rrecs; /* right record count */
+ union xfs_btree_ptr lptr; /* left btree pointer */
+ union xfs_btree_key *rkp = NULL; /* right btree key */
+ union xfs_btree_ptr *rpp = NULL; /* right address pointer */
+ union xfs_btree_rec *rrp = NULL; /* right record pointer */
+ int error; /* error return value */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 1)
+ goto out0;
+
+ /* Set up variables for this block as "right". */
+ right = xfs_btree_get_block(cur, level, &rbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, right, level, rbp);
+ if (error)
+ goto error0;
+#endif
+
+ /* If we've got no left sibling then we can't shift an entry left. */
+ xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ if (xfs_btree_ptr_is_null(cur, &lptr))
+ goto out0;
+
+ /*
+ * If the cursor entry is the one that would be moved, don't
+ * do it... it's too complicated.
+ */
+ if (cur->bc_ptrs[level] <= 1)
+ goto out0;
+
+ /* Set up the left neighbor as "left". */
+ error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
+ if (error)
+ goto error0;
+
+ /* If it's full, it can't take another entry. */
+ lrecs = xfs_btree_get_numrecs(left);
+ if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
+ goto out0;
+
+ rrecs = xfs_btree_get_numrecs(right);
+
+ /*
+ * We add one entry to the left side and remove one for the right side.
+ * Accout for it here, the changes will be updated on disk and logged
+ * later.
+ */
+ lrecs++;
+ rrecs--;
+
+ XFS_BTREE_STATS_INC(cur, lshift);
+ XFS_BTREE_STATS_ADD(cur, moves, 1);
+
+ /*
+ * If non-leaf, copy a key and a ptr to the left block.
+ * Log the changes to the left block.
+ */
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+
+ lkp = xfs_btree_key_addr(cur, lrecs, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+
+ lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, rpp, 0, level);
+ if (error)
+ goto error0;
+#endif
+ xfs_btree_copy_keys(cur, lkp, rkp, 1);
+ xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
+
+ xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
+ xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
+
+ ASSERT(cur->bc_ops->keys_inorder(cur,
+ xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, lrecs, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, lrp, rrp, 1);
+ xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
+
+ ASSERT(cur->bc_ops->recs_inorder(cur,
+ xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
+ }
+
+ xfs_btree_set_numrecs(left, lrecs);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+ xfs_btree_set_numrecs(right, rrecs);
+ xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+ /*
+ * Slide the contents of right down one entry.
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
+ if (level > 0) {
+ /* It's a nonleaf. operate on keys and ptrs */
+#ifdef DEBUG
+ int i; /* loop index */
+
+ for (i = 0; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_shift_keys(cur,
+ xfs_btree_key_addr(cur, 2, right),
+ -1, rrecs);
+ xfs_btree_shift_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 2, right),
+ -1, rrecs);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+ } else {
+ /* It's a leaf. operate on records */
+ xfs_btree_shift_recs(cur,
+ xfs_btree_rec_addr(cur, 2, right),
+ -1, rrecs);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+ /*
+ * If it's the first record in the block, we'll need a key
+ * structure to pass up to the next level (updkey).
+ */
+ cur->bc_ops->init_key_from_rec(&key,
+ xfs_btree_rec_addr(cur, 1, right));
+ rkp = &key;
+ }
+
+ /* Update the parent key values of right. */
+ error = xfs_btree_updkey(cur, rkp, level + 1);
+ if (error)
+ goto error0;
+
+ /* Slide the cursor value left one. */
+ cur->bc_ptrs[level]--;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int /* error */
+xfs_btree_rshift(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_key key; /* btree key */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ struct xfs_btree_cur *tcur; /* temporary btree cursor */
+ union xfs_btree_ptr rptr; /* right block pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ int rrecs; /* right record count */
+ int lrecs; /* left record count */
+ int error; /* error return value */
+ int i; /* loop counter */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level == cur->bc_nlevels - 1))
+ goto out0;
+
+ /* Set up variables for this block as "left". */
+ left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+
+ /* If we've got no right sibling then we can't shift an entry right. */
+ xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+ if (xfs_btree_ptr_is_null(cur, &rptr))
+ goto out0;
+
+ /*
+ * If the cursor entry is the one that would be moved, don't
+ * do it... it's too complicated.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ if (cur->bc_ptrs[level] >= lrecs)
+ goto out0;
+
+ /* Set up the right neighbor as "right". */
+ error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /* If it's full, it can't take another entry. */
+ rrecs = xfs_btree_get_numrecs(right);
+ if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, rshift);
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+ /*
+ * Make a hole at the start of the right neighbor block, then
+ * copy the last left block entry to the hole.
+ */
+ if (level > 0) {
+ /* It's a nonleaf. make a hole in the keys and ptrs */
+ union xfs_btree_key *lkp;
+ union xfs_btree_ptr *lpp;
+ union xfs_btree_ptr *rpp;
+
+ lkp = xfs_btree_key_addr(cur, lrecs, left);
+ lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+ for (i = rrecs - 1; i >= 0; i--) {
+ error = xfs_btree_check_ptr(cur, rpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ xfs_btree_shift_keys(cur, rkp, 1, rrecs);
+ xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, lpp, 0, level);
+ if (error)
+ goto error0;
+#endif
+
+ /* Now put the new data in, and log it. */
+ xfs_btree_copy_keys(cur, rkp, lkp, 1);
+ xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
+
+ ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
+ xfs_btree_key_addr(cur, 2, right)));
+ } else {
+ /* It's a leaf. make a hole in the records */
+ union xfs_btree_rec *lrp;
+ union xfs_btree_rec *rrp;
+
+ lrp = xfs_btree_rec_addr(cur, lrecs, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_shift_recs(cur, rrp, 1, rrecs);
+
+ /* Now put the new data in, and log it. */
+ xfs_btree_copy_recs(cur, rrp, lrp, 1);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
+
+ cur->bc_ops->init_key_from_rec(&key, rrp);
+ rkp = &key;
+
+ ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
+ xfs_btree_rec_addr(cur, 2, right)));
+ }
+
+ /*
+ * Decrement and log left's numrecs, bump and log right's numrecs.
+ */
+ xfs_btree_set_numrecs(left, --lrecs);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+ xfs_btree_set_numrecs(right, ++rrecs);
+ xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+ /*
+ * Using a temporary cursor, update the parent key values of the
+ * block on the right.
+ */
+ error = xfs_btree_dup_cursor(cur, &tcur);
+ if (error)
+ goto error0;
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_increment(tcur, level, &i);
+ if (error)
+ goto error1;
+
+ error = xfs_btree_updkey(tcur, rkp, level + 1);
+ if (error)
+ goto error1;
+
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+
+error1:
+ XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
+ xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and the key to its first
+ * record (to be inserted into parent).
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ union xfs_btree_ptr lptr; /* left sibling block ptr */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ union xfs_btree_ptr rptr; /* right sibling block ptr */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ union xfs_btree_ptr rrptr; /* right-right sibling ptr */
+ struct xfs_buf *rrbp; /* right-right buffer pointer */
+ struct xfs_btree_block *rrblock; /* right-right btree block */
+ int lrecs;
+ int rrecs;
+ int src_index;
+ int error; /* error return value */
+#ifdef DEBUG
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
+
+ XFS_BTREE_STATS_INC(cur, split);
+
+ /* Set up left block (current one). */
+ left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+
+ xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0)
+ goto out0;
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Set up the new block as "right". */
+ error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /* Fill in the btree header for the new right block. */
+ xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
+
+ /*
+ * Split the entries between the old and the new block evenly.
+ * Make sure that if there's an odd number of entries now, that
+ * each new block will have the same number of entries.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ rrecs = lrecs / 2;
+ if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
+ rrecs++;
+ src_index = (lrecs - rrecs + 1);
+
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+ /*
+ * Copy btree block entries from the left block over to the
+ * new block, the right. Update the right block and log the
+ * changes.
+ */
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ union xfs_btree_ptr *rpp; /* right address pointer */
+
+ lkp = xfs_btree_key_addr(cur, src_index, left);
+ lpp = xfs_btree_ptr_addr(cur, src_index, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+ for (i = src_index; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, lpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
+ xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
+
+ xfs_btree_log_keys(cur, rbp, 1, rrecs);
+ xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+
+ /* Grab the keys to the entries moved to the right block */
+ xfs_btree_copy_keys(cur, key, rkp, 1);
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+ union xfs_btree_rec *rrp; /* right record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, src_index, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
+ xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+ cur->bc_ops->init_key_from_rec(key,
+ xfs_btree_rec_addr(cur, 1, right));
+ }
+
+
+ /*
+ * Find the left block number by looking in the buffer.
+ * Adjust numrecs, sibling pointers.
+ */
+ xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
+ xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
+ xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+
+ lrecs -= rrecs;
+ xfs_btree_set_numrecs(left, lrecs);
+ xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
+
+ xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+ /*
+ * If there's a block to the new block's right, make that block
+ * point back to right instead of to left.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
+ error = xfs_btree_read_buf_block(cur, &rrptr, level,
+ 0, &rrblock, &rrbp);
+ if (error)
+ goto error0;
+ xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
+ xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+ }
+ /*
+ * If the cursor is really in the right block, move it there.
+ * If it's just pointing past the last entry in left, then we'll
+ * insert there, so don't change anything in that case.
+ */
+ if (cur->bc_ptrs[level] > lrecs + 1) {
+ xfs_btree_setbuf(cur, level, rbp);
+ cur->bc_ptrs[level] -= lrecs;
+ }
+ /*
+ * If there are more levels, we'll need another cursor which refers
+ * the right block, no matter where this cursor was.
+ */
+ if (level + 1 < cur->bc_nlevels) {
+ error = xfs_btree_dup_cursor(cur, curp);
+ if (error)
+ goto error0;
+ (*curp)->bc_ptrs[level + 1]++;
+ }
+ *ptrp = rptr;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Copy the old inode root contents into a real block and make the
+ * broot point to it.
+ */
+int /* error */
+xfs_btree_new_iroot(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int *logflags, /* logging flags for inode */
+ int *stat) /* return status - 0 fail */
+{
+ struct xfs_buf *cbp; /* buffer for cblock */
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_btree_block *cblock; /* child btree block */
+ union xfs_btree_key *ckp; /* child key pointer */
+ union xfs_btree_ptr *cpp; /* child ptr pointer */
+ union xfs_btree_key *kp; /* pointer to btree key */
+ union xfs_btree_ptr *pp; /* pointer to block addr */
+ union xfs_btree_ptr nptr; /* new block addr */
+ int level; /* btree level */
+ int error; /* error return code */
+#ifdef DEBUG
+ int i; /* loop counter */
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, newroot);
+
+ ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+
+ level = cur->bc_nlevels - 1;
+
+ block = xfs_btree_get_iroot(cur);
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+ }
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Copy the root into a real block. */
+ error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+ if (error)
+ goto error0;
+
+ memcpy(cblock, block, xfs_btree_block_len(cur));
+
+ be16_add_cpu(&block->bb_level, 1);
+ xfs_btree_set_numrecs(block, 1);
+ cur->bc_nlevels++;
+ cur->bc_ptrs[level + 1] = 1;
+
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
+
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+ for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
+ error = xfs_btree_check_ptr(cur, pp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, &nptr, 0, level);
+ if (error)
+ goto error0;
+#endif
+ xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
+
+ xfs_iroot_realloc(cur->bc_private.b.ip,
+ 1 - xfs_btree_get_numrecs(cblock),
+ cur->bc_private.b.whichfork);
+
+ xfs_btree_setbuf(cur, level, cbp);
+
+ /*
+ * Do all this logging at the end so that
+ * the root is at the right level.
+ */
+ xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+ xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+
+ *logflags |=
+ XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
+ *stat = 1;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int /* error */
+xfs_btree_new_root(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* one half of the old root block */
+ struct xfs_buf *bp; /* buffer containing block */
+ int error; /* error return value */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ struct xfs_buf *nbp; /* new (root) buffer */
+ struct xfs_btree_block *new; /* new (root) btree block */
+ int nptr; /* new value for key index, 1 or 2 */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ union xfs_btree_ptr rptr;
+ union xfs_btree_ptr lptr;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_STATS_INC(cur, newroot);
+
+ /* initialise our start point from the cursor */
+ cur->bc_ops->init_ptr_from_cur(cur, &rptr);
+
+ /* Allocate the new block. If we can't do it, we're toast. Give up. */
+ error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
+ if (error)
+ goto error0;
+ if (*stat == 0)
+ goto out0;
+ XFS_BTREE_STATS_INC(cur, alloc);
+
+ /* Set up the new block. */
+ error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+ if (error)
+ goto error0;
+
+ /* Set the root in the holding structure increasing the level by 1. */
+ cur->bc_ops->set_root(cur, &lptr, 1);
+
+ /*
+ * At the previous root level there are now two blocks: the old root,
+ * and the new block generated when it was split. We don't know which
+ * one the cursor is pointing at, so we set up variables "left" and
+ * "right" for each case.
+ */
+ block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
+ if (error)
+ goto error0;
+#endif
+
+ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+ /* Our block is left, pick up the right block. */
+ lbp = bp;
+ xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+ left = block;
+ error = xfs_btree_read_buf_block(cur, &rptr,
+ cur->bc_nlevels - 1, 0, &right, &rbp);
+ if (error)
+ goto error0;
+ bp = rbp;
+ nptr = 1;
+ } else {
+ /* Our block is right, pick up the left block. */
+ rbp = bp;
+ xfs_btree_buf_to_ptr(cur, rbp, &rptr);
+ right = block;
+ xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+ error = xfs_btree_read_buf_block(cur, &lptr,
+ cur->bc_nlevels - 1, 0, &left, &lbp);
+ if (error)
+ goto error0;
+ bp = lbp;
+ nptr = 2;
+ }
+ /* Fill in the new block's btree header and log it. */
+ xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
+ xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
+ ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
+ !xfs_btree_ptr_is_null(cur, &rptr));
+
+ /* Fill in the key data in the new root. */
+ if (xfs_btree_get_level(left) > 0) {
+ xfs_btree_copy_keys(cur,
+ xfs_btree_key_addr(cur, 1, new),
+ xfs_btree_key_addr(cur, 1, left), 1);
+ xfs_btree_copy_keys(cur,
+ xfs_btree_key_addr(cur, 2, new),
+ xfs_btree_key_addr(cur, 1, right), 1);
+ } else {
+ cur->bc_ops->init_key_from_rec(
+ xfs_btree_key_addr(cur, 1, new),
+ xfs_btree_rec_addr(cur, 1, left));
+ cur->bc_ops->init_key_from_rec(
+ xfs_btree_key_addr(cur, 2, new),
+ xfs_btree_rec_addr(cur, 1, right));
+ }
+ xfs_btree_log_keys(cur, nbp, 1, 2);
+
+ /* Fill in the pointer data in the new root. */
+ xfs_btree_copy_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
+ xfs_btree_copy_ptrs(cur,
+ xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
+ xfs_btree_log_ptrs(cur, nbp, 1, 2);
+
+ /* Fix up the cursor. */
+ xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+ cur->bc_ptrs[cur->bc_nlevels] = nptr;
+ cur->bc_nlevels++;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+}
+
+STATIC int
+xfs_btree_make_block_unfull(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* btree level */
+ int numrecs,/* # of recs in block */
+ int *oindex,/* old tree index */
+ int *index, /* new tree index */
+ union xfs_btree_ptr *nptr, /* new btree ptr */
+ struct xfs_btree_cur **ncur, /* new btree cursor */
+ union xfs_btree_rec *nrec, /* new record */
+ int *stat)
+{
+ union xfs_btree_key key; /* new btree key value */
+ int error = 0;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 1) {
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+
+ if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
+ /* A root block that can be made bigger. */
+
+ xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
+ } else {
+ /* A root block that needs replacing */
+ int logflags = 0;
+
+ error = xfs_btree_new_iroot(cur, &logflags, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_trans_log_inode(cur->bc_tp, ip, logflags);
+ }
+
+ return 0;
+ }
+
+ /* First, try shifting an entry to the right neighbor. */
+ error = xfs_btree_rshift(cur, level, stat);
+ if (error || *stat)
+ return error;
+
+ /* Next, try shifting an entry to the left neighbor. */
+ error = xfs_btree_lshift(cur, level, stat);
+ if (error)
+ return error;
+
+ if (*stat) {
+ *oindex = *index = cur->bc_ptrs[level];
+ return 0;
+ }
+
+ /*
+ * Next, try splitting the current block in half.
+ *
+ * If this works we have to re-set our variables because we
+ * could be in a different block now.
+ */
+ error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
+ if (error || *stat == 0)
+ return error;
+
+
+ *index = cur->bc_ptrs[level];
+ cur->bc_ops->init_rec_from_key(&key, nrec);
+ return 0;
+}
+
+/*
+ * Insert one record/level. Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int
+xfs_btree_insrec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level to insert record at */
+ union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
+ union xfs_btree_rec *recp, /* i/o: record data inserted */
+ struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer for block */
+ union xfs_btree_key key; /* btree key */
+ union xfs_btree_ptr nptr; /* new block ptr */
+ struct xfs_btree_cur *ncur; /* new btree cursor */
+ union xfs_btree_rec nrec; /* new record count */
+ int optr; /* old key/record index */
+ int ptr; /* key/record index */
+ int numrecs;/* number of records */
+ int error; /* error return value */
+#ifdef DEBUG
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
+
+ ncur = NULL;
+
+ /*
+ * If we have an external root pointer, and we've made it to the
+ * root level, allocate a new root block and we're done.
+ */
+ if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ (level >= cur->bc_nlevels)) {
+ error = xfs_btree_new_root(cur, stat);
+ xfs_btree_set_ptr_null(cur, ptrp);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return error;
+ }
+
+ /* If we're off the left edge, return failure. */
+ ptr = cur->bc_ptrs[level];
+ if (ptr == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Make a key out of the record data to be inserted, and save it. */
+ cur->bc_ops->init_key_from_rec(&key, recp);
+
+ optr = ptr;
+
+ XFS_BTREE_STATS_INC(cur, insrec);
+
+ /* Get pointers to the btree buffer and block. */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+
+ /* Check that the new entry is being inserted in the right place. */
+ if (ptr <= numrecs) {
+ if (level == 0) {
+ ASSERT(cur->bc_ops->recs_inorder(cur, recp,
+ xfs_btree_rec_addr(cur, ptr, block)));
+ } else {
+ ASSERT(cur->bc_ops->keys_inorder(cur, &key,
+ xfs_btree_key_addr(cur, ptr, block)));
+ }
+ }
+#endif
+
+ /*
+ * If the block is full, we can't insert the new entry until we
+ * make the block un-full.
+ */
+ xfs_btree_set_ptr_null(cur, &nptr);
+ if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
+ error = xfs_btree_make_block_unfull(cur, level, numrecs,
+ &optr, &ptr, &nptr, &ncur, &nrec, stat);
+ if (error || *stat == 0)
+ goto error0;
+ }
+
+ /*
+ * The current block may have changed if the block was
+ * previously full and we have just made space in it.
+ */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ return error;
+#endif
+
+ /*
+ * At this point we know there's room for our new entry in the block
+ * we're pointing at.
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
+
+ if (level > 0) {
+ /* It's a nonleaf. make a hole in the keys and ptrs */
+ union xfs_btree_key *kp;
+ union xfs_btree_ptr *pp;
+
+ kp = xfs_btree_key_addr(cur, ptr, block);
+ pp = xfs_btree_ptr_addr(cur, ptr, block);
+
+#ifdef DEBUG
+ for (i = numrecs - ptr; i >= 0; i--) {
+ error = xfs_btree_check_ptr(cur, pp, i, level);
+ if (error)
+ return error;
+ }
+#endif
+
+ xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+ xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
+
+#ifdef DEBUG
+ error = xfs_btree_check_ptr(cur, ptrp, 0, level);
+ if (error)
+ goto error0;
+#endif
+
+ /* Now put the new data in, bump numrecs and log it. */
+ xfs_btree_copy_keys(cur, kp, &key, 1);
+ xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
+ numrecs++;
+ xfs_btree_set_numrecs(block, numrecs);
+ xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
+ xfs_btree_log_keys(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+ if (ptr < numrecs) {
+ ASSERT(cur->bc_ops->keys_inorder(cur, kp,
+ xfs_btree_key_addr(cur, ptr + 1, block)));
+ }
+#endif
+ } else {
+ /* It's a leaf. make a hole in the records */
+ union xfs_btree_rec *rp;
+
+ rp = xfs_btree_rec_addr(cur, ptr, block);
+
+ xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
+
+ /* Now put the new data in, bump numrecs and log it. */
+ xfs_btree_copy_recs(cur, rp, recp, 1);
+ xfs_btree_set_numrecs(block, ++numrecs);
+ xfs_btree_log_recs(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+ if (ptr < numrecs) {
+ ASSERT(cur->bc_ops->recs_inorder(cur, rp,
+ xfs_btree_rec_addr(cur, ptr + 1, block)));
+ }
+#endif
+ }
+
+ /* Log the new number of records in the btree header. */
+ xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+ /* If we inserted at the start of a block, update the parents' keys. */
+ if (optr == 1) {
+ error = xfs_btree_updkey(cur, &key, level + 1);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, level)) {
+ cur->bc_ops->update_lastrec(cur, block, recp,
+ ptr, LASTREC_INSREC);
+ }
+
+ /*
+ * Return the new block number, if any.
+ * If there is one, give back a record value and a cursor too.
+ */
+ *ptrp = nptr;
+ if (!xfs_btree_ptr_is_null(cur, &nptr)) {
+ *recp = nrec;
+ *curp = ncur;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Insert the record at the point referenced by cur.
+ *
+ * A multi-level split of the tree on insert will invalidate the original
+ * cursor. All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
+ */
+int
+xfs_btree_insert(
+ struct xfs_btree_cur *cur,
+ int *stat)
+{
+ int error; /* error return value */
+ int i; /* result value, 0 for failure */
+ int level; /* current level number in btree */
+ union xfs_btree_ptr nptr; /* new block number (split result) */
+ struct xfs_btree_cur *ncur; /* new cursor (split result) */
+ struct xfs_btree_cur *pcur; /* previous level's cursor */
+ union xfs_btree_rec rec; /* record to insert */
+
+ level = 0;
+ ncur = NULL;
+ pcur = cur;
+
+ xfs_btree_set_ptr_null(cur, &nptr);
+ cur->bc_ops->init_rec_from_cur(cur, &rec);
+
+ /*
+ * Loop going up the tree, starting at the leaf level.
+ * Stop when we don't get a split block, that must mean that
+ * the insert is finished with this level.
+ */
+ do {
+ /*
+ * Insert nrec/nptr into this level of the tree.
+ * Note if we fail, nptr will be null.
+ */
+ error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
+ if (error) {
+ if (pcur != cur)
+ xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+ goto error0;
+ }
+
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ level++;
+
+ /*
+ * See if the cursor we just used is trash.
+ * Can't trash the caller's cursor, but otherwise we should
+ * if ncur is a new cursor or we're about to be done.
+ */
+ if (pcur != cur &&
+ (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
+ /* Save the state from the cursor before we trash it */
+ if (cur->bc_ops->update_cursor)
+ cur->bc_ops->update_cursor(pcur, cur);
+ cur->bc_nlevels = pcur->bc_nlevels;
+ xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+ }
+ /* If we got a new cursor, switch to it. */
+ if (ncur) {
+ pcur = ncur;
+ ncur = NULL;
+ }
+ } while (!xfs_btree_ptr_is_null(cur, &nptr));
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = i;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Try to merge a non-leaf block back into the inode root.
+ *
+ * Note: the killroot names comes from the fact that we're effectively
+ * killing the old root block. But because we can't just delete the
+ * inode we have to copy the single block it was pointing to into the
+ * inode.
+ */
+int
+xfs_btree_kill_iroot(
+ struct xfs_btree_cur *cur)
+{
+ int whichfork = cur->bc_private.b.whichfork;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *cblock;
+ union xfs_btree_key *kp;
+ union xfs_btree_key *ckp;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_ptr *cpp;
+ struct xfs_buf *cbp;
+ int level;
+ int index;
+ int numrecs;
+#ifdef DEBUG
+ union xfs_btree_ptr ptr;
+ int i;
+#endif
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_nlevels > 1);
+
+ /*
+ * Don't deal with the root block needs to be a leaf case.
+ * We're just going to turn the thing back into extents anyway.
+ */
+ level = cur->bc_nlevels - 1;
+ if (level == 1)
+ goto out0;
+
+ /*
+ * Give up if the root has multiple children.
+ */
+ block = xfs_btree_get_iroot(cur);
+ if (xfs_btree_get_numrecs(block) != 1)
+ goto out0;
+
+ cblock = xfs_btree_get_block(cur, level - 1, &cbp);
+ numrecs = xfs_btree_get_numrecs(cblock);
+
+ /*
+ * Only do this if the next level will fit.
+ * Then the data must be copied up to the inode,
+ * instead of freeing the root you free the next level.
+ */
+ if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
+ goto out0;
+
+ XFS_BTREE_STATS_INC(cur, killroot);
+
+#ifdef DEBUG
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+ ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+ xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+ ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+#endif
+
+ index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
+ if (index) {
+ xfs_iroot_realloc(cur->bc_private.b.ip, index,
+ cur->bc_private.b.whichfork);
+ block = ifp->if_broot;
+ }
+
+ be16_add_cpu(&block->bb_numrecs, index);
+ ASSERT(block->bb_numrecs == cblock->bb_numrecs);
+
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, kp, ckp, numrecs);
+
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+ for (i = 0; i < numrecs; i++) {
+ int error;
+
+ error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+ }
+#endif
+ xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
+
+ cur->bc_ops->free_block(cur, cbp);
+ XFS_BTREE_STATS_INC(cur, free);
+
+ cur->bc_bufs[level - 1] = NULL;
+ be16_add_cpu(&block->bb_level, -1);
+ xfs_trans_log_inode(cur->bc_tp, ip,
+ XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+ cur->bc_nlevels--;
+out0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ return 0;
+}
+
+STATIC int
+xfs_btree_dec_cursor(
+ struct xfs_btree_cur *cur,
+ int level,
+ int *stat)
+{
+ int error;
+ int i;
+
+ if (level > 0) {
+ error = xfs_btree_decrement(cur, level, &i);
+ if (error)
+ return error;
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
+
+/*
+ * Single level of the btree record deletion routine.
+ * Delete record pointed to by cur/level.
+ * Remove the record from its block then rebalance the tree.
+ * Return 0 for error, 1 for done, 2 to go on to the next level.
+ */
+STATIC int /* error */
+xfs_btree_delrec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level removing record from */
+ int *stat) /* fail/done/go-on */
+{
+ struct xfs_btree_block *block; /* btree block */
+ union xfs_btree_ptr cptr; /* current block ptr */
+ struct xfs_buf *bp; /* buffer for block */
+ int error; /* error return value */
+ int i; /* loop counter */
+ union xfs_btree_key key; /* storage for keyp */
+ union xfs_btree_key *keyp = &key; /* passed to the next level */
+ union xfs_btree_ptr lptr; /* left sibling block ptr */
+ struct xfs_buf *lbp; /* left buffer pointer */
+ struct xfs_btree_block *left; /* left btree block */
+ int lrecs = 0; /* left record count */
+ int ptr; /* key/record index */
+ union xfs_btree_ptr rptr; /* right sibling block ptr */
+ struct xfs_buf *rbp; /* right buffer pointer */
+ struct xfs_btree_block *right; /* right btree block */
+ struct xfs_btree_block *rrblock; /* right-right btree block */
+ struct xfs_buf *rrbp; /* right-right buffer pointer */
+ int rrecs = 0; /* right record count */
+ struct xfs_btree_cur *tcur; /* temporary btree cursor */
+ int numrecs; /* temporary numrec count */
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+ XFS_BTREE_TRACE_ARGI(cur, level);
+
+ tcur = NULL;
+
+ /* Get the index of the entry being deleted, check for nothing there. */
+ ptr = cur->bc_ptrs[level];
+ if (ptr == 0) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ /* Get the buffer & block containing the record or key/ptr. */
+ block = xfs_btree_get_block(cur, level, &bp);
+ numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+ goto error0;
+#endif
+
+ /* Fail if we're off the end of the block. */
+ if (ptr > numrecs) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ XFS_BTREE_STATS_INC(cur, delrec);
+ XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
+
+ /* Excise the entries being deleted. */
+ if (level > 0) {
+ /* It's a nonleaf. operate on keys and ptrs */
+ union xfs_btree_key *lkp;
+ union xfs_btree_ptr *lpp;
+
+ lkp = xfs_btree_key_addr(cur, ptr + 1, block);
+ lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
+
+#ifdef DEBUG
+ for (i = 0; i < numrecs - ptr; i++) {
+ error = xfs_btree_check_ptr(cur, lpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+
+ if (ptr < numrecs) {
+ xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
+ xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
+ xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
+ xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
+ }
+
+ /*
+ * If it's the first record in the block, we'll need to pass a
+ * key up to the next level (updkey).
+ */
+ if (ptr == 1)
+ keyp = xfs_btree_key_addr(cur, 1, block);
+ } else {
+ /* It's a leaf. operate on records */
+ if (ptr < numrecs) {
+ xfs_btree_shift_recs(cur,
+ xfs_btree_rec_addr(cur, ptr + 1, block),
+ -1, numrecs - ptr);
+ xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
+ }
+
+ /*
+ * If it's the first record in the block, we'll need a key
+ * structure to pass up to the next level (updkey).
+ */
+ if (ptr == 1) {
+ cur->bc_ops->init_key_from_rec(&key,
+ xfs_btree_rec_addr(cur, 1, block));
+ keyp = &key;
+ }
+ }
+
+ /*
+ * Decrement and log the number of entries in the block.
+ */
+ xfs_btree_set_numrecs(block, --numrecs);
+ xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+ /*
+ * If we are tracking the last record in the tree and
+ * we are at the far right edge of the tree, update it.
+ */
+ if (xfs_btree_is_lastrec(cur, block, level)) {
+ cur->bc_ops->update_lastrec(cur, block, NULL,
+ ptr, LASTREC_DELREC);
+ }
+
+ /*
+ * We're at the root level. First, shrink the root block in-memory.
+ * Try to get rid of the next level down. If we can't then there's
+ * nothing left to do.
+ */
+ if (level == cur->bc_nlevels - 1) {
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ xfs_iroot_realloc(cur->bc_private.b.ip, -1,
+ cur->bc_private.b.whichfork);
+
+ error = xfs_btree_kill_iroot(cur);
+ if (error)
+ goto error0;
+
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ *stat = 1;
+ return 0;
+ }
+
+ /*
+ * If this is the root level, and there's only one entry left,
+ * and it's NOT the leaf level, then we can get rid of this
+ * level.
+ */
+ if (numrecs == 1 && level > 0) {
+ union xfs_btree_ptr *pp;
+ /*
+ * pp is still set to the first pointer in the block.
+ * Make it the new root of the btree.
+ */
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ error = cur->bc_ops->kill_root(cur, bp, level, pp);
+ if (error)
+ goto error0;
+ } else if (level > 0) {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ }
+ *stat = 1;
+ return 0;
+ }
+
+ /*
+ * If we deleted the leftmost entry in the block, update the
+ * key values above us in the tree.
+ */
+ if (ptr == 1) {
+ error = xfs_btree_updkey(cur, keyp, level + 1);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * If the number of records remaining in the block is at least
+ * the minimum, we're done.
+ */
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+
+ /*
+ * Otherwise, we have to move some records around to keep the
+ * tree balanced. Look at the left and right sibling blocks to
+ * see if we can re-balance by moving only one record.
+ */
+ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+ xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
+
+ if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ /*
+ * One child of root, need to get a chance to copy its contents
+ * into the root and delete it. Can't go up to next level,
+ * there's nothing to delete there.
+ */
+ if (xfs_btree_ptr_is_null(cur, &rptr) &&
+ xfs_btree_ptr_is_null(cur, &lptr) &&
+ level == cur->bc_nlevels - 2) {
+ error = xfs_btree_kill_iroot(cur);
+ if (!error)
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+ }
+
+ ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
+ !xfs_btree_ptr_is_null(cur, &lptr));
+
+ /*
+ * Duplicate the cursor so our btree manipulations here won't
+ * disrupt the next level up.
+ */
+ error = xfs_btree_dup_cursor(cur, &tcur);
+ if (error)
+ goto error0;
+
+ /*
+ * If there's a right sibling, see if it's ok to shift an entry
+ * out of it.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+ /*
+ * Move the temp cursor to the last entry in the next block.
+ * Actually any entry but the first would suffice.
+ */
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_increment(tcur, level, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ i = xfs_btree_lastrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ /* Grab a pointer to the block. */
+ right = xfs_btree_get_block(tcur, level, &rbp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(tcur, right, level, rbp);
+ if (error)
+ goto error0;
+#endif
+ /* Grab the current block number, for future use. */
+ xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
+
+ /*
+ * If right block is full enough so that removing one entry
+ * won't make it too empty, and left-shifting an entry out
+ * of right to us works, we're done.
+ */
+ if (xfs_btree_get_numrecs(right) - 1 >=
+ cur->bc_ops->get_minrecs(tcur, level)) {
+ error = xfs_btree_lshift(tcur, level, &i);
+ if (error)
+ goto error0;
+ if (i) {
+ ASSERT(xfs_btree_get_numrecs(block) >=
+ cur->bc_ops->get_minrecs(tcur, level));
+
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+ }
+
+ /*
+ * Otherwise, grab the number of records in right for
+ * future reference, and fix up the temp cursor to point
+ * to our block again (last record).
+ */
+ rrecs = xfs_btree_get_numrecs(right);
+ if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_decrement(tcur, level, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ }
+ }
+
+ /*
+ * If there's a left sibling, see if it's ok to shift an entry
+ * out of it.
+ */
+ if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+ /*
+ * Move the temp cursor to the first entry in the
+ * previous block.
+ */
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ error = xfs_btree_decrement(tcur, level, &i);
+ if (error)
+ goto error0;
+ i = xfs_btree_firstrec(tcur, level);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+ /* Grab a pointer to the block. */
+ left = xfs_btree_get_block(tcur, level, &lbp);
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, left, level, lbp);
+ if (error)
+ goto error0;
+#endif
+ /* Grab the current block number, for future use. */
+ xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
+
+ /*
+ * If left block is full enough so that removing one entry
+ * won't make it too empty, and right-shifting an entry out
+ * of left to us works, we're done.
+ */
+ if (xfs_btree_get_numrecs(left) - 1 >=
+ cur->bc_ops->get_minrecs(tcur, level)) {
+ error = xfs_btree_rshift(tcur, level, &i);
+ if (error)
+ goto error0;
+ if (i) {
+ ASSERT(xfs_btree_get_numrecs(block) >=
+ cur->bc_ops->get_minrecs(tcur, level));
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+ if (level == 0)
+ cur->bc_ptrs[0]++;
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+ }
+ }
+
+ /*
+ * Otherwise, grab the number of records in right for
+ * future reference.
+ */
+ lrecs = xfs_btree_get_numrecs(left);
+ }
+
+ /* Delete the temp cursor, we're done with it. */
+ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+ tcur = NULL;
+
+ /* If here, we need to do a join to keep the tree balanced. */
+ ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
+
+ if (!xfs_btree_ptr_is_null(cur, &lptr) &&
+ lrecs + xfs_btree_get_numrecs(block) <=
+ cur->bc_ops->get_maxrecs(cur, level)) {
+ /*
+ * Set "right" to be the starting block,
+ * "left" to be the left neighbor.
+ */
+ rptr = cptr;
+ right = block;
+ rbp = bp;
+ error = xfs_btree_read_buf_block(cur, &lptr, level,
+ 0, &left, &lbp);
+ if (error)
+ goto error0;
+
+ /*
+ * If that won't work, see if we can join with the right neighbor block.
+ */
+ } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
+ rrecs + xfs_btree_get_numrecs(block) <=
+ cur->bc_ops->get_maxrecs(cur, level)) {
+ /*
+ * Set "left" to be the starting block,
+ * "right" to be the right neighbor.
+ */
+ lptr = cptr;
+ left = block;
+ lbp = bp;
+ error = xfs_btree_read_buf_block(cur, &rptr, level,
+ 0, &right, &rbp);
+ if (error)
+ goto error0;
+
+ /*
+ * Otherwise, we can't fix the imbalance.
+ * Just return. This is probably a logic error, but it's not fatal.
+ */
+ } else {
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ return 0;
+ }
+
+ rrecs = xfs_btree_get_numrecs(right);
+ lrecs = xfs_btree_get_numrecs(left);
+
+ /*
+ * We're now going to join "left" and "right" by moving all the stuff
+ * in "right" to "left" and deleting "right".
+ */
+ XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+ if (level > 0) {
+ /* It's a non-leaf. Move keys and pointers. */
+ union xfs_btree_key *lkp; /* left btree key */
+ union xfs_btree_ptr *lpp; /* left address pointer */
+ union xfs_btree_key *rkp; /* right btree key */
+ union xfs_btree_ptr *rpp; /* right address pointer */
+
+ lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
+ lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
+ rkp = xfs_btree_key_addr(cur, 1, right);
+ rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+ for (i = 1; i < rrecs; i++) {
+ error = xfs_btree_check_ptr(cur, rpp, i, level);
+ if (error)
+ goto error0;
+ }
+#endif
+ xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
+ xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
+
+ xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
+ xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
+ } else {
+ /* It's a leaf. Move records. */
+ union xfs_btree_rec *lrp; /* left record pointer */
+ union xfs_btree_rec *rrp; /* right record pointer */
+
+ lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
+ rrp = xfs_btree_rec_addr(cur, 1, right);
+
+ xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
+ xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
+ }
+
+ XFS_BTREE_STATS_INC(cur, join);
+
+ /*
+ * Fix up the the number of records and right block pointer in the
+ * surviving block, and log it.
+ */
+ xfs_btree_set_numrecs(left, lrecs + rrecs);
+ xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+ xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+ xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+ /* If there is a right sibling, point it to the remaining block. */
+ xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+ if (!xfs_btree_ptr_is_null(cur, &cptr)) {
+ error = xfs_btree_read_buf_block(cur, &cptr, level,
+ 0, &rrblock, &rrbp);
+ if (error)
+ goto error0;
+ xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
+ xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+ }
+
+ /* Free the deleted block. */
+ error = cur->bc_ops->free_block(cur, rbp);
+ if (error)
+ goto error0;
+ XFS_BTREE_STATS_INC(cur, free);
+
+ /*
+ * If we joined with the left neighbor, set the buffer in the
+ * cursor to the left block, and fix up the index.
+ */
+ if (bp != lbp) {
+ cur->bc_bufs[level] = lbp;
+ cur->bc_ptrs[level] += lrecs;
+ cur->bc_ra[level] = 0;
+ }
+ /*
+ * If we joined with the right neighbor and there's a level above
+ * us, increment the cursor at that level.
+ */
+ else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
+ (level + 1 < cur->bc_nlevels)) {
+ error = xfs_btree_increment(cur, level + 1, &i);
+ if (error)
+ goto error0;
+ }
+
+ /*
+ * Readjust the ptr at this level if it's not a leaf, since it's
+ * still pointing at the deletion point, which makes the cursor
+ * inconsistent. If this makes the ptr 0, the caller fixes it up.
+ * We can't use decrement because it would change the next level up.
+ */
+ if (level > 0)
+ cur->bc_ptrs[level]--;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ /* Return value means the next level up has something to do. */
+ *stat = 2;
+ return 0;
+
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (tcur)
+ xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int /* error */
+xfs_btree_delete(
+ struct xfs_btree_cur *cur,
+ int *stat) /* success/failure */
+{
+ int error; /* error return value */
+ int level;
+ int i;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /*
+ * Go up the tree, starting at leaf level.
+ *
+ * If 2 is returned then a join was done; go to the next level.
+ * Otherwise we are done.
+ */
+ for (level = 0, i = 2; i == 2; level++) {
+ error = xfs_btree_delrec(cur, level, &i);
+ if (error)
+ goto error0;
+ }
+
+ if (i == 0) {
+ for (level = 1; level < cur->bc_nlevels; level++) {
+ if (cur->bc_ptrs[level] == 0) {
+ error = xfs_btree_decrement(cur, level, &i);
+ if (error)
+ goto error0;
+ break;
+ }
+ }
+ }
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = i;
+ return 0;
+error0:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_btree_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ union xfs_btree_rec **recp, /* output: btree record */
+ int *stat) /* output: success/failure */
+{
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer pointer */
+ int ptr; /* record number */
+#ifdef DEBUG
+ int error; /* error return value */
+#endif
+
+ ptr = cur->bc_ptrs[0];
+ block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, 0, bp);
+ if (error)
+ return error;
+#endif
+
+ /*
+ * Off the right end or left end, return failure.
+ */
+ if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
+ *stat = 0;
+ return 0;
+ }
+
+ /*
+ * Point to the record and extract its data.
+ */
+ *recp = xfs_btree_rec_addr(cur, ptr, block);
+ *stat = 1;
+ return 0;
+}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 1f528a2a3754..789fffdf8b2f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -39,39 +39,19 @@ extern kmem_zone_t *xfs_btree_cur_zone;
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
/*
- * Short form header: space allocation btrees.
- */
-typedef struct xfs_btree_sblock {
- __be32 bb_magic; /* magic number for block type */
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
- __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */
- __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */
-} xfs_btree_sblock_t;
-
-/*
- * Long form header: bmap btrees.
- */
-typedef struct xfs_btree_lblock {
- __be32 bb_magic; /* magic number for block type */
- __be16 bb_level; /* 0 is a leaf */
- __be16 bb_numrecs; /* current # of data records */
- __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */
- __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */
-} xfs_btree_lblock_t;
-
-/*
- * Combined header and structure, used by common code.
+ * Generic btree header.
+ *
+ * This is a comination of the actual format used on disk for short and long
+ * format btrees. The first three fields are shared by both format, but
+ * the pointers are different and should be used with care.
+ *
+ * To get the size of the actual short or long form headers please use
+ * the size macros below. Never use sizeof(xfs_btree_block).
*/
-typedef struct xfs_btree_hdr
-{
+struct xfs_btree_block {
__be32 bb_magic; /* magic number for block type */
__be16 bb_level; /* 0 is a leaf */
__be16 bb_numrecs; /* current # of data records */
-} xfs_btree_hdr_t;
-
-typedef struct xfs_btree_block {
- xfs_btree_hdr_t bb_h; /* header */
union {
struct {
__be32 bb_leftsib;
@@ -82,7 +62,36 @@ typedef struct xfs_btree_block {
__be64 bb_rightsib;
} l; /* long form pointers */
} bb_u; /* rest */
-} xfs_btree_block_t;
+};
+
+#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
+#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
+
+
+/*
+ * Generic key, ptr and record wrapper structures.
+ *
+ * These are disk format structures, and are converted where necessary
+ * by the btree specific code that needs to interpret them.
+ */
+union xfs_btree_ptr {
+ __be32 s; /* short form ptr */
+ __be64 l; /* long form ptr */
+};
+
+union xfs_btree_key {
+ xfs_bmbt_key_t bmbt;
+ xfs_bmdr_key_t bmbr; /* bmbt root block */
+ xfs_alloc_key_t alloc;
+ xfs_inobt_key_t inobt;
+};
+
+union xfs_btree_rec {
+ xfs_bmbt_rec_t bmbt;
+ xfs_bmdr_rec_t bmbr; /* bmbt root block */
+ xfs_alloc_rec_t alloc;
+ xfs_inobt_rec_t inobt;
+};
/*
* For logging record fields.
@@ -96,46 +105,131 @@ typedef struct xfs_btree_block {
#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
/*
- * Boolean to select which form of xfs_btree_block_t.bb_u to use.
- */
-#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP)
-
-/*
* Magic numbers for btree blocks.
*/
extern const __uint32_t xfs_magics[];
/*
- * Maximum and minimum records in a btree block.
- * Given block size, type prefix, and leaf flag (0 or 1).
- * The divisor below is equivalent to lf ? (e1) : (e2) but that produces
- * compiler warnings.
- */
-#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \
- ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \
- (((lf) * (uint)sizeof(t ## _rec_t)) + \
- ((1 - (lf)) * \
- ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t))))))
-#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \
- (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2)
-
-/*
- * Record, key, and pointer address calculation macros.
- * Given block size, type prefix, block pointer, and index of requested entry
- * (first entry numbered 1).
- */
-#define XFS_BTREE_REC_ADDR(t,bb,i) \
- ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- ((i) - 1) * sizeof(t ## _rec_t)))
-#define XFS_BTREE_KEY_ADDR(t,bb,i) \
- ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- ((i) - 1) * sizeof(t ## _key_t)))
-#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \
- ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
- (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
+ * Generic stats interface
+ */
+#define __XFS_BTREE_STATS_INC(type, stat) \
+ XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
+#define XFS_BTREE_STATS_INC(cur, stat) \
+do { \
+ switch (cur->bc_btnum) { \
+ case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
+ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
+ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
+ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
+ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
+ } \
+} while (0)
+
+#define __XFS_BTREE_STATS_ADD(type, stat, val) \
+ XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
+#define XFS_BTREE_STATS_ADD(cur, stat, val) \
+do { \
+ switch (cur->bc_btnum) { \
+ case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
+ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
+ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
+ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
+ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
+ } \
+} while (0)
#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
+struct xfs_btree_ops {
+ /* size of the key and record structures */
+ size_t key_len;
+ size_t rec_len;
+
+ /* cursor operations */
+ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
+ void (*update_cursor)(struct xfs_btree_cur *src,
+ struct xfs_btree_cur *dst);
+
+ /* update btree root pointer */
+ void (*set_root)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *nptr, int level_change);
+ int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
+ int level, union xfs_btree_ptr *newroot);
+
+ /* block allocation / freeing */
+ int (*alloc_block)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start_bno,
+ union xfs_btree_ptr *new_bno,
+ int length, int *stat);
+ int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
+
+ /* update last record information */
+ void (*update_lastrec)(struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ union xfs_btree_rec *rec,
+ int ptr, int reason);
+
+ /* records in block/level */
+ int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
+ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
+
+ /* records on disk. Matter for the root in inode case. */
+ int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
+
+ /* init values of btree structures */
+ void (*init_key_from_rec)(union xfs_btree_key *key,
+ union xfs_btree_rec *rec);
+ void (*init_rec_from_key)(union xfs_btree_key *key,
+ union xfs_btree_rec *rec);
+ void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec);
+ void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr);
+
+ /* difference between key value and cursor value */
+ __int64_t (*key_diff)(struct xfs_btree_cur *cur,
+ union xfs_btree_key *key);
+
+#ifdef DEBUG
+ /* check that k1 is lower than k2 */
+ int (*keys_inorder)(struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2);
+
+ /* check that r1 is lower than r2 */
+ int (*recs_inorder)(struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2);
+#endif
+
+ /* btree tracing */
+#ifdef XFS_BTREE_TRACE
+ void (*trace_enter)(struct xfs_btree_cur *, const char *,
+ char *, int, int, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t,
+ __psunsigned_t, __psunsigned_t);
+ void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
+ __uint64_t *, __uint64_t *);
+ void (*trace_key)(struct xfs_btree_cur *,
+ union xfs_btree_key *, __uint64_t *,
+ __uint64_t *);
+ void (*trace_record)(struct xfs_btree_cur *,
+ union xfs_btree_rec *, __uint64_t *,
+ __uint64_t *, __uint64_t *);
+#endif
+};
+
+/*
+ * Reasons for the update_lastrec method to be called.
+ */
+#define LASTREC_UPDATE 0
+#define LASTREC_INSREC 1
+#define LASTREC_DELREC 2
+
+
/*
* Btree cursor structure.
* This collects all information needed by the btree code in one place.
@@ -144,6 +238,8 @@ typedef struct xfs_btree_cur
{
struct xfs_trans *bc_tp; /* transaction we're in, if any */
struct xfs_mount *bc_mp; /* file system mount struct */
+ const struct xfs_btree_ops *bc_ops;
+ uint bc_flags; /* btree features - below */
union {
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
@@ -175,94 +271,40 @@ typedef struct xfs_btree_cur
} bc_private; /* per-btree type data */
} xfs_btree_cur_t;
+/* cursor flags */
+#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
+#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
+#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
+
+
#define XFS_BTREE_NOERROR 0
#define XFS_BTREE_ERROR 1
/*
* Convert from buffer to btree block header.
*/
-#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp))
-#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
-#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
-#ifdef __KERNEL__
-
-#ifdef DEBUG
/*
- * Debug routine: check that block header is ok.
+ * Check that block header is ok.
*/
-void
+int
xfs_btree_check_block(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block, /* generic btree block pointer */
- int level, /* level of the btree block */
- struct xfs_buf *bp); /* buffer containing block, if any */
-
-/*
- * Debug routine: check that keys are in the right order.
- */
-void
-xfs_btree_check_key(
- xfs_btnum_t btnum, /* btree identifier */
- void *ak1, /* pointer to left (lower) key */
- void *ak2); /* pointer to right (higher) key */
-
-/*
- * Debug routine: check that records are in the right order.
- */
-void
-xfs_btree_check_rec(
- xfs_btnum_t btnum, /* btree identifier */
- void *ar1, /* pointer to left (lower) record */
- void *ar2); /* pointer to right (higher) record */
-#else
-#define xfs_btree_check_block(a,b,c,d)
-#define xfs_btree_check_key(a,b,c)
-#define xfs_btree_check_rec(a,b,c)
-#endif /* DEBUG */
-
-/*
- * Checking routine: check that long form block header is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_lblock_t *block, /* btree long form block pointer */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ struct xfs_btree_block *block, /* generic btree block pointer */
int level, /* level of the btree block */
struct xfs_buf *bp); /* buffer containing block, if any */
/*
- * Checking routine: check that (long) pointer is ok.
+ * Check that (long) pointer is ok.
*/
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
xfs_dfsbno_t ptr, /* btree block disk address */
int level); /* btree block level */
-#define xfs_btree_check_lptr_disk(cur, ptr, level) \
- xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
-
-/*
- * Checking routine: check that short form block header is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_sblock_t *block, /* btree short form block pointer */
- int level, /* level of the btree block */
- struct xfs_buf *bp); /* buffer containing block */
-
-/*
- * Checking routine: check that (short) pointer is ok.
- */
-int /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sptr(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_agblock_t ptr, /* btree block disk address */
- int level); /* btree block level */
-
/*
* Delete the btree cursor.
*/
@@ -281,15 +323,6 @@ xfs_btree_dup_cursor(
xfs_btree_cur_t **ncur);/* output cursor */
/*
- * Change the cursor to point to the first record in the current block
- * at the given level. Other levels are unaffected.
- */
-int /* success=1, failure=0 */
-xfs_btree_firstrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level); /* level to change */
-
-/*
* Get a buffer for the block, return it with no data read.
* Long-form addressing.
*/
@@ -313,20 +346,6 @@ xfs_btree_get_bufs(
uint lock); /* lock flags for get_buf */
/*
- * Allocate a new btree cursor.
- * The cursor is either for allocation (A) or bmap (B).
- */
-xfs_btree_cur_t * /* new btree cursor */
-xfs_btree_init_cursor(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_buf *agbp, /* (A only) buffer for agf structure */
- xfs_agnumber_t agno, /* (A only) allocation group number */
- xfs_btnum_t btnum, /* btree identifier */
- struct xfs_inode *ip, /* (B only) inode owning the btree */
- int whichfork); /* (B only) data/attr fork */
-
-/*
* Check for the cursor referring to the last block at the given level.
*/
int /* 1=is last block, 0=not last block */
@@ -335,15 +354,6 @@ xfs_btree_islastblock(
int level); /* level to check */
/*
- * Change the cursor to point to the last record in the current block
- * at the given level. Other levels are unaffected.
- */
-int /* success=1, failure=0 */
-xfs_btree_lastrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level); /* level to change */
-
-/*
* Compute first and last byte offsets for the fields given.
* Interprets the offsets table, which contains struct field offsets.
*/
@@ -404,39 +414,53 @@ xfs_btree_reada_bufs(
xfs_extlen_t count); /* count of filesystem blocks */
/*
- * Read-ahead btree blocks, at the given level.
- * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
*/
-int /* readahead block count */
-xfs_btree_readahead_core(
+void
+xfs_btree_setbuf(
xfs_btree_cur_t *cur, /* btree cursor */
int lev, /* level in btree */
- int lr); /* left/right bits */
+ struct xfs_buf *bp); /* new buffer to set */
-static inline int /* readahead block count */
-xfs_btree_readahead(
- xfs_btree_cur_t *cur, /* btree cursor */
- int lev, /* level in btree */
- int lr) /* left/right bits */
-{
- if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
- return 0;
- return xfs_btree_readahead_core(cur, lev, lr);
-}
+/*
+ * Common btree core entry points.
+ */
+int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
+int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
+int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
+int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
+int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
+int xfs_btree_kill_iroot(struct xfs_btree_cur *);
+int xfs_btree_insert(struct xfs_btree_cur *, int *);
+int xfs_btree_delete(struct xfs_btree_cur *, int *);
+int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
+/*
+ * Internal btree helpers also used by xfs_bmap.c.
+ */
+void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
+void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
+ * Helpers.
*/
-void
-xfs_btree_setbuf(
- xfs_btree_cur_t *cur, /* btree cursor */
- int lev, /* level in btree */
- struct xfs_buf *bp); /* new buffer to set */
+static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
+{
+ return be16_to_cpu(block->bb_numrecs);
+}
+
+static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
+ __uint16_t numrecs)
+{
+ block->bb_numrecs = cpu_to_be16(numrecs);
+}
-#endif /* __KERNEL__ */
+static inline int xfs_btree_get_level(struct xfs_btree_block *block)
+{
+ return be16_to_cpu(block->bb_level);
+}
/*
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 000000000000..44ff942a0fda
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
+
+STATIC void
+xfs_btree_trace_ptr(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr ptr,
+ __psunsigned_t *high,
+ __psunsigned_t *low)
+{
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ __u64 val = be64_to_cpu(ptr.l);
+ *high = val >> 32;
+ *low = (int)val;
+ } else {
+ *high = 0;
+ *low = be32_to_cpu(ptr.s);
+ }
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
+ */
+void
+xfs_btree_trace_argbi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *b,
+ int i,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
+ line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
+ */
+void
+xfs_btree_trace_argbii(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *b,
+ int i0,
+ int i1,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
+ line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for 3 block-length args
+ * and an integer arg.
+ */
+void
+xfs_btree_trace_argfffi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ xfs_dfiloff_t o,
+ xfs_dfsbno_t b,
+ xfs_dfilblks_t i,
+ int j,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
+ line,
+ o >> 32, (int)o,
+ b >> 32, (int)b,
+ i >> 32, (int)i,
+ (int)j, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for one integer arg.
+ */
+void
+xfs_btree_trace_argi(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ int line)
+{
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
+ line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, key.
+ */
+void
+xfs_btree_trace_argipk(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_ptr ptr,
+ union xfs_btree_key *key,
+ int line)
+{
+ __psunsigned_t high, low;
+ __uint64_t l0, l1;
+
+ xfs_btree_trace_ptr(cur, ptr, &high, &low);
+ cur->bc_ops->trace_key(cur, key, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
+ line, i, high, low,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, fsblock, rec.
+ */
+void
+xfs_btree_trace_argipr(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_ptr ptr,
+ union xfs_btree_rec *rec,
+ int line)
+{
+ __psunsigned_t high, low;
+ __uint64_t l0, l1, l2;
+
+ xfs_btree_trace_ptr(cur, ptr, &high, &low);
+ cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
+ line, i,
+ high, low,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ l2 >> 32, (int)l2,
+ 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for int, key.
+ */
+void
+xfs_btree_trace_argik(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int i,
+ union xfs_btree_key *key,
+ int line)
+{
+ __uint64_t l0, l1;
+
+ cur->bc_ops->trace_key(cur, key, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
+ line, i,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ 0, 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for arguments, for record.
+ */
+void
+xfs_btree_trace_argr(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ int line)
+{
+ __uint64_t l0, l1, l2;
+
+ cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
+ cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
+ line,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ l2 >> 32, (int)l2,
+ 0, 0, 0, 0, 0);
+}
+
+/*
+ * Add a trace buffer entry for the cursor/operation.
+ */
+void
+xfs_btree_trace_cursor(
+ const char *func,
+ struct xfs_btree_cur *cur,
+ int type,
+ int line)
+{
+ __uint32_t s0;
+ __uint64_t l0, l1;
+ char *s;
+
+ switch (type) {
+ case XBT_ARGS:
+ s = "args";
+ break;
+ case XBT_ENTRY:
+ s = "entry";
+ break;
+ case XBT_ERROR:
+ s = "error";
+ break;
+ case XBT_EXIT:
+ s = "exit";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+
+ cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
+ cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
+ s0,
+ l0 >> 32, (int)l0,
+ l1 >> 32, (int)l1,
+ (__psunsigned_t)cur->bc_bufs[0],
+ (__psunsigned_t)cur->bc_bufs[1],
+ (__psunsigned_t)cur->bc_bufs[2],
+ (__psunsigned_t)cur->bc_bufs[3],
+ (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
+ (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
+}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 000000000000..b3f5eb3c3c6c
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_BTREE_TRACE_H__
+#define __XFS_BTREE_TRACE_H__
+
+struct xfs_btree_cur;
+struct xfs_buf;
+
+
+/*
+ * Trace hooks.
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+
+#ifdef XFS_BTREE_TRACE
+
+/*
+ * Trace buffer entry types.
+ */
+#define XFS_BTREE_KTRACE_ARGBI 1
+#define XFS_BTREE_KTRACE_ARGBII 2
+#define XFS_BTREE_KTRACE_ARGFFFI 3
+#define XFS_BTREE_KTRACE_ARGI 4
+#define XFS_BTREE_KTRACE_ARGIPK 5
+#define XFS_BTREE_KTRACE_ARGIPR 6
+#define XFS_BTREE_KTRACE_ARGIK 7
+#define XFS_BTREE_KTRACE_ARGR 8
+#define XFS_BTREE_KTRACE_CUR 9
+
+/*
+ * Sub-types for cursor traces.
+ */
+#define XBT_ARGS 0
+#define XBT_ENTRY 1
+#define XBT_ERROR 2
+#define XBT_EXIT 3
+
+void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
+ struct xfs_buf *, int, int);
+void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
+ struct xfs_buf *, int, int, int);
+void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
+ xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
+void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
+void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_ptr, union xfs_btree_key *, int);
+void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_ptr, union xfs_btree_rec *, int);
+void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
+ union xfs_btree_key *, int);
+void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
+ union xfs_btree_rec *, int);
+void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
+
+
+#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
+extern ktrace_t *xfs_allocbt_trace_buf;
+
+#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
+extern ktrace_t *xfs_inobt_trace_buf;
+
+#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
+#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
+extern ktrace_t *xfs_bmbt_trace_buf;
+
+
+#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
+ xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
+ xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
+#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
+ xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
+#define XFS_BTREE_TRACE_ARGI(c, i) \
+ xfs_btree_trace_argi(__func__, c, i, __LINE__)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
+ xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
+ xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
+ xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
+#define XFS_BTREE_TRACE_ARGR(c, r) \
+ xfs_btree_trace_argr(__func__, c, r, __LINE__)
+#define XFS_BTREE_TRACE_CURSOR(c, t) \
+ xfs_btree_trace_cursor(__func__, c, t, __LINE__)
+#else
+#define XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
+#define XFS_BTREE_TRACE_ARGI(c, i)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define XFS_BTREE_TRACE_CURSOR(c, t)
+#endif /* XFS_BTREE_TRACE */
+
+#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 002fc2617c8e..92af4098c7e8 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -375,7 +375,7 @@ xfs_buf_item_unpin(
xfs_buf_log_item_t *bip,
int stale)
{
- xfs_mount_t *mp;
+ struct xfs_ail *ailp;
xfs_buf_t *bp;
int freed;
@@ -387,7 +387,7 @@ xfs_buf_item_unpin(
xfs_buftrace("XFS_UNPIN", bp);
freed = atomic_dec_and_test(&bip->bli_refcount);
- mp = bip->bli_item.li_mountp;
+ ailp = bip->bli_item.li_ailp;
xfs_bunpin(bp);
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -399,17 +399,17 @@ xfs_buf_item_unpin(