aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 11:34:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 11:34:19 -0700
commit9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e (patch)
treec25b0eb20dac1a39a6b55c521b2658dcceb7d532 /fs/btrfs/tree-log.c
parentMerge branch 'stable-fodder' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs (diff)
parentbtrfs: Use kvmalloc for allocating compressed path context (diff)
downloadlinux-dev-9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e.tar.xz
linux-dev-9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e.zip
Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This time the majority of changes are cleanups, though there's still a number of changes of user interest. User visible changes: - better read time and write checks to catch errors early and before writing data to disk (to catch potential memory corruption on data that get checksummed) - qgroups + metadata relocation: last speed up patch int the series to address the slowness, there should be no overhead comparing balance with and without qgroups - FIEMAP ioctl does not start a transaction unnecessarily, this can result in a speed up and less blocking due to IO - LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this can speed up the mentioned ioctl and scrub as well - fsync on files with many (but not too many) hardlinks is faster, finer decision if the links should be fsynced individually or completely - send tries harder to find ranges to clone - trim/discard will skip unallocated chunks that haven't been touched since the last mount Fixes: - send flushes delayed allocation before start, otherwise it could miss some changes in case of a very recent rw->ro switch of a subvolume - fix fallocate with qgroups that could lead to space accounting underflow, reported as a warning - trim/discard ioctl honours the requested range - starting send and dedupe on a subvolume at the same time will let only one of them succeed, this is to prevent changes that send could miss due to dedupe; both operations are restartable Core changes: - more tree-checker validations, errors reported by fuzzing tools: - device item - inode item - block group profiles - tracepoints for extent buffer locking - async cow preallocates memory to avoid errors happening too deep in the call chain - metadata reservations for delalloc reworked to better adapt in many-writers/low-space scenarios - improved space flushing logic for intense DIO vs buffered workloads - lots of cleanups - removed unused struct members - redundant argument removal - properties and xattrs - extent buffer locking - selftests - use common file type conversions - many-argument functions reduction" * tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits) btrfs: Use kvmalloc for allocating compressed path context btrfs: Factor out common extent locking code in submit_compressed_extents btrfs: Set io_tree only once in submit_compressed_extents btrfs: Replace clear_extent_bit with unlock_extent btrfs: Make compress_file_range take only struct async_chunk btrfs: Remove fs_info from struct async_chunk btrfs: Rename async_cow to async_chunk btrfs: Preallocate chunks in cow_file_range_async btrfs: reserve delalloc metadata differently btrfs: track DIO bytes in flight btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop btrfs: delete unused function btrfs_set_prop_trans btrfs: start transaction in xattr_handler_set_prop btrfs: drop local copy of inode i_mode btrfs: drop old_fsflags in btrfs_ioctl_setflags btrfs: modify local copy of btrfs_inode flags btrfs: drop useless inode i_flags copy and restore btrfs: start transaction in btrfs_ioctl_setflags() btrfs: export btrfs_set_prop btrfs: refactor btrfs_set_props to validate externally ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c289
1 files changed, 225 insertions, 64 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 561884f60d35..6adcd8a2c5c7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
mutex_lock(&root->log_mutex);
if (root->log_root) {
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
ret = -EAGAIN;
goto out;
}
@@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root)
}
}
+static int btrfs_write_tree_block(struct extent_buffer *buf)
+{
+ return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
+ buf->start + buf->len - 1);
+}
+
+static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
+{
+ filemap_fdatawait_range(buf->pages[0]->mapping,
+ buf->start, buf->start + buf->len - 1);
+}
/*
* the walk control struct is used to pass state down the chain when
@@ -304,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log,
if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
if (wc->pin && btrfs_header_level(eb) == 0)
- ret = btrfs_exclude_logged_extents(fs_info, eb);
+ ret = btrfs_exclude_logged_extents(eb);
if (wc->write)
btrfs_write_tree_block(eb);
if (wc->wait)
@@ -333,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
u32 item_size;
u64 saved_i_size = 0;
@@ -454,10 +464,9 @@ insert:
found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
if (found_size > item_size)
- btrfs_truncate_item(fs_info, path, item_size, 1);
+ btrfs_truncate_item(path, item_size, 1);
else if (found_size < item_size)
- btrfs_extend_item(fs_info, path,
- item_size - found_size);
+ btrfs_extend_item(path, item_size - found_size);
} else if (ret) {
return ret;
}
@@ -694,9 +703,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
if (ins.objectid > 0) {
+ struct btrfs_ref ref = { 0 };
u64 csum_start;
u64 csum_end;
LIST_HEAD(ordered_sums);
+
/*
* is this extent already allocated in the extent
* allocation tree? If so, just add a reference
@@ -704,10 +715,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
if (ret == 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- ins.objectid, ins.offset,
- 0, root->root_key.objectid,
+ btrfs_init_generic_ref(&ref,
+ BTRFS_ADD_DELAYED_REF,
+ ins.objectid, ins.offset, 0);
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
key->objectid, offset);
+ ret = btrfs_inc_extent_ref(trans, &ref);
if (ret)
goto out;
} else {
@@ -2725,7 +2739,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -2809,7 +2823,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -2891,7 +2905,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
- clean_tree_block(fs_info, next);
+ btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
} else {
@@ -3066,7 +3080,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
/* bail out if we need to do a full commit */
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
ret = -EAGAIN;
mutex_unlock(&root->log_mutex);
goto out;
@@ -3085,7 +3099,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
blk_finish_plug(&plug);
btrfs_abort_transaction(trans, ret);
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -3127,7 +3141,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
list_del_init(&root_log_ctx.list);
blk_finish_plug(&plug);
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
if (ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
@@ -3173,7 +3187,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* now that we've moved on to the tree of log tree roots,
* check the full commit flag again
*/
- if (btrfs_need_log_full_commit(fs_info, trans)) {
+ if (btrfs_need_log_full_commit(trans)) {
blk_finish_plug(&plug);
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
@@ -3186,7 +3200,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW);
blk_finish_plug(&plug);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
@@ -3196,7 +3210,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_wait_tree_log_extents(log_root_tree,
EXTENT_NEW | EXTENT_DIRTY);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3218,7 +3232,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
ret = write_all_supers(fs_info, 1);
if (ret) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
goto out_wake_log_root;
}
@@ -3422,7 +3436,7 @@ fail:
out_unlock:
mutex_unlock(&dir->log_mutex);
if (ret == -ENOSPC) {
- btrfs_set_log_full_commit(root->fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 0;
} else if (ret < 0)
btrfs_abort_transaction(trans, ret);
@@ -3438,7 +3452,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *inode, u64 dirid)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log;
u64 index;
int ret;
@@ -3456,7 +3469,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
dirid, &index);
mutex_unlock(&inode->log_mutex);
if (ret == -ENOSPC) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 0;
} else if (ret < 0 && ret != -ENOENT)
btrfs_abort_transaction(trans, ret);
@@ -5442,7 +5455,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
* Make sure any commits to the log are forced to be full
* commits.
*/
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = true;
}
mutex_unlock(&inode->log_mutex);
@@ -5819,6 +5832,190 @@ out:
return ret;
}
+static int log_new_ancestors(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_key found_key;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+
+ while (true) {
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ const u64 last_committed = fs_info->last_trans_committed;
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_key search_key;
+ struct inode *inode;
+ int ret = 0;
+
+ btrfs_release_path(path);
+
+ search_key.objectid = found_key.offset;
+ search_key.type = BTRFS_INODE_ITEM_KEY;
+ search_key.offset = 0;
+ inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (BTRFS_I(inode)->generation > last_committed)
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
+ LOG_INODE_EXISTS,
+ 0, LLONG_MAX, ctx);
+ iput(inode);
+ if (ret)
+ return ret;
+
+ if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
+ break;
+
+ search_key.type = BTRFS_INODE_REF_KEY;
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return -ENOENT;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid != search_key.objectid ||
+ found_key.type != BTRFS_INODE_REF_KEY)
+ return -ENOENT;
+ }
+ return 0;
+}
+
+static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct dentry *parent,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct dentry *old_parent = NULL;
+ struct super_block *sb = inode->vfs_inode.i_sb;
+ int ret = 0;
+
+ while (true) {
+ if (!parent || d_really_is_negative(parent) ||
+ sb != parent->d_sb)
+ break;
+
+ inode = BTRFS_I(d_inode(parent));
+ if (root != inode->root)
+ break;
+
+ if (inode->generation > fs_info->last_trans_committed) {
+ ret = btrfs_log_inode(trans, root, inode,
+ LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
+ if (ret)
+ break;
+ }
+ if (IS_ROOT(parent))
+ break;
+
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
+ }
+ dput(old_parent);
+
+ return ret;
+}
+
+static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct dentry *parent,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *root = inode->root;
+ const u64 ino = btrfs_ino(inode);
+ struct btrfs_path *path;
+ struct btrfs_key search_key;
+ int ret;
+
+ /*
+ * For a single hard link case, go through a fast path that does not
+ * need to iterate the fs/subvolume tree.
+ */
+ if (inode->vfs_inode.i_nlink < 2)
+ return log_new_ancestors_fast(trans, inode, parent, ctx);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ search_key.objectid = ino;
+ search_key.type = BTRFS_INODE_REF_KEY;
+ search_key.offset = 0;
+again:
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
+ path->slots[0]++;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_key found_key;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.objectid != ino ||
+ found_key.type > BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ /*
+ * Don't deal with extended references because they are rare
+ * cases and too complex to deal with (we would need to keep
+ * track of which subitem we are processing for each item in
+ * this loop, etc). So just return some error to fallback to
+ * a transaction commit.
+ */
+ if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
+ ret = -EMLINK;
+ goto out;
+ }
+
+ /*
+ * Logging ancestors needs to do more searches on the fs/subvol
+ * tree, so it releases the path as needed to avoid deadlocks.
+ * Keep track of the last inode ref key and resume from that key
+ * after logging all new ancestors for the current hard link.
+ */
+ memcpy(&search_key, &found_key, sizeof(search_key));
+
+ ret = log_new_ancestors(trans, root, path, ctx);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ goto again;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@@ -5836,11 +6033,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct super_block *sb;
- struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = fs_info->last_trans_committed;
bool log_dentries = false;
- struct btrfs_inode *orig_inode = inode;
sb = inode->vfs_inode.i_sb;
@@ -5946,56 +6141,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* and has a link count of 2.
*/
if (inode->last_unlink_trans > last_committed) {
- ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+ ret = btrfs_log_all_parents(trans, inode, ctx);
if (ret)
goto end_trans;
}
- /*
- * If a new hard link was added to the inode in the current transaction
- * and its link count is now greater than 1, we need to fallback to a
- * transaction commit, otherwise we can end up not logging all its new
- * parents for all the hard links. Here just from the dentry used to
- * fsync, we can not visit the ancestor inodes for all the other hard
- * links to figure out if any is new, so we fallback to a transaction
- * commit (instead of adding a lot of complexity of scanning a btree,
- * since this scenario is not a common use case).
- */
- if (inode->vfs_inode.i_nlink > 1 &&
- inode->last_link_trans > last_committed) {
- ret = -EMLINK;
+ ret = log_all_new_ancestors(trans, inode, parent, ctx);
+ if (ret)
goto end_trans;
- }
- while (1) {
- if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
- break;
-
- inode = BTRFS_I(d_inode(parent));
- if (root != inode->root)
- break;
-
- if (inode->generation > last_committed) {
- ret = btrfs_log_inode(trans, root, inode,
- LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
- if (ret)
- goto end_trans;
- }
- if (IS_ROOT(parent))
- break;
-
- parent = dget_parent(parent);
- dput(old_parent);
- old_parent = parent;
- }
if (log_dentries)
- ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+ ret = log_new_dir_dentries(trans, root, inode, ctx);
else
ret = 0;
end_trans:
- dput(old_parent);
if (ret < 0) {
- btrfs_set_log_full_commit(fs_info, trans);
+ btrfs_set_log_full_commit(trans);
ret = 1;
}