aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commite6dcd2dc9c489108648e2ed543315dd134d50a9a (patch)
treecddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/transaction.c
parentBtrfs: Drop some verbose printks (diff)
downloadlinux-dev-e6dcd2dc9c489108648e2ed543315dd134d50a9a.tar.xz
linux-dev-e6dcd2dc9c489108648e2ed543315dd134d50a9a.zip
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c67
1 files changed, 9 insertions, 58 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a8a3cb03de59..86a5acc19ce7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
cur_trans->start_time = get_seconds();
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
- btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
extent_io_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping,
GFP_NOFS);
@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
wake_up(&cur_trans->writer_wait);
if (cur_trans->in_commit && throttle) {
- int ret;
+ DEFINE_WAIT(wait);
mutex_unlock(&root->fs_info->trans_mutex);
- ret = wait_for_commit(root, cur_trans);
- BUG_ON(ret);
+ prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
+ TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&root->fs_info->transaction_throttle, &wait);
mutex_lock(&root->fs_info->trans_mutex);
}
@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
return ret;
}
-int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct inode *inode;
- u64 root_objectid = 0;
- u64 objectid = 0;
- int ret;
-
- atomic_inc(&root->fs_info->throttles);
- while(1) {
- ret = btrfs_find_first_ordered_inode(
- &cur_trans->ordered_inode_tree,
- &root_objectid, &objectid, &inode);
- if (!ret)
- break;
-
- mutex_unlock(&root->fs_info->trans_mutex);
-
- if (S_ISREG(inode->i_mode)) {
- atomic_inc(&BTRFS_I(inode)->ordered_writeback);
- filemap_fdatawrite(inode->i_mapping);
- atomic_dec(&BTRFS_I(inode)->ordered_writeback);
- }
- iput(inode);
-
- mutex_lock(&root->fs_info->trans_mutex);
- }
- while(1) {
- root_objectid = 0;
- objectid = 0;
- ret = btrfs_find_del_first_ordered_inode(
- &cur_trans->ordered_inode_tree,
- &root_objectid, &objectid, &inode);
- if (!ret)
- break;
- mutex_unlock(&root->fs_info->trans_mutex);
-
- if (S_ISREG(inode->i_mode)) {
- atomic_inc(&BTRFS_I(inode)->ordered_writeback);
- filemap_write_and_wait(inode->i_mapping);
- atomic_dec(&BTRFS_I(inode)->ordered_writeback);
- }
- atomic_dec(&inode->i_count);
- iput(inode);
-
- mutex_lock(&root->fs_info->trans_mutex);
- }
- atomic_dec(&root->fs_info->throttles);
- return 0;
-}
-
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_pending_snapshot *pending)
@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
extent_io_tree_init(pinned_copy,
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
+printk("commit trans %Lu\n", trans->transid);
trans->transaction->in_commit = 1;
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
- ret = btrfs_write_ordered_inodes(trans, root);
-
} while (cur_trans->num_writers > 1 ||
(cur_trans->num_joined != joined));
@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_copy_pinned(root, pinned_copy);
+ wake_up(&root->fs_info->transaction_throttle);
+
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
BUG_ON(ret);
@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
mutex_unlock(&root->fs_info->trans_mutex);
+printk("done commit trans %Lu\n", trans->transid);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (root->fs_info->closing) {