aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 12:01:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 12:01:49 -0800
commit97d0bf96a0d0986f466c3ff59f2ace801e33dc69 (patch)
tree34dc9957a14562f70a174c8ae5af322cea5befa9 /fs/btrfs/transaction.c
parentMerge tag 'mtd/for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux (diff)
parentbtrfs: drop bdev argument from submit_extent_page (diff)
downloadlinux-dev-97d0bf96a0d0986f466c3ff59f2ace801e33dc69.tar.xz
linux-dev-97d0bf96a0d0986f466c3ff59f2ace801e33dc69.zip
Merge tag 'for-5.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "User visible changes: - new block group profiles: RAID1 with 3- and 4- copies - RAID1 in btrfs has always 2 copies, now add support for 3 and 4 - this is an incompat feature (named RAID1C34) - recommended use of RAID1C3 is replacement of RAID6 profile on metadata, this brings a more reliable resiliency against 2 device loss/damage - support for new checksums - per-filesystem, set at mkfs time - fast hash (crc32c successor): xxhash, 64bit digest - strong hashes (both 256bit): sha256 (slower, FIPS), blake2b (faster) - the blake2b module goes via the crypto tree, btrfs.ko has a soft dependency - speed up lseek, don't take inode locks unnecessarily, this can speed up parallel SEEK_CUR/SEEK_SET/SEEK_END by 80% - send: - allow clone operations within the same file - limit maximum number of sent clone references to avoid slow backref walking - error message improvements: device scan prints process name and PID Core changes: - cleanups - remove unique workqueue helpers, used to provide a way to avoid deadlocks in the workqueue code, now done in a simpler way - remove lots of indirect function calls in compression code - extent IO tree code moved out of extent_io.c - cleanup backup superblock handling at mount time - transaction life cycle documentation and cleanups - locking code cleanups, annotations and documentation - add more cold, const, pure function attributes - removal of unused or redundant struct members or variables - new tree-checker sanity tests - try to detect missing INODE_ITEM, cross-reference checks of DIR_ITEM, DIR_INDEX, INODE_REF, and XATTR_* items - remove own bio scheduling code (used to avoid checksum submissions being stuck behind other IO), replaced by cgroup controller-based code to allow better control and avoid priority inversions in cases where the custom and cgroup scheduling disagreed Fixes: - avoid getting stuck during cyclic writebacks - fix trimming of ranges crossing block group boundaries - fix rename exchange on subvolumes, all involved subvolumes need to be recorded in the transaction" * tag 'for-5.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (137 commits) btrfs: drop bdev argument from submit_extent_page btrfs: remove extent_map::bdev btrfs: drop bio_set_dev where not needed btrfs: get bdev directly from fs_devices in submit_extent_page btrfs: record all roots for rename exchange on a subvol Btrfs: fix block group remaining RO forever after error during device replace btrfs: scrub: Don't check free space before marking a block group RO btrfs: change btrfs_fs_devices::rotating to bool btrfs: change btrfs_fs_devices::seeding to bool btrfs: rename btrfs_block_group_cache btrfs: block-group: Reuse the item key from caller of read_one_block_group() btrfs: block-group: Refactor btrfs_read_block_groups() btrfs: document extent buffer locking btrfs: access eb::blocking_writers according to ACCESS_ONCE policies btrfs: set blocking_writers directly, no increment or decrement btrfs: merge blocking_writers branches in btrfs_tree_read_lock btrfs: drop incompat bit for raid1c34 after last block group is gone btrfs: add incompat for raid1 with 3, 4 copies btrfs: add support for 4-copy replication (raid1c4) btrfs: add support for 3-copy replication (raid1c3) ...
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c98
1 files changed, 81 insertions, 17 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8624bdee8c5b..cfc08ef9b876 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -24,9 +24,79 @@
#define BTRFS_ROOT_TRANS_TAG 0
+/*
+ * Transaction states and transitions
+ *
+ * No running transaction (fs tree blocks are not modified)
+ * |
+ * | To next stage:
+ * | Call start_transaction() variants. Except btrfs_join_transaction_nostart().
+ * V
+ * Transaction N [[TRANS_STATE_RUNNING]]
+ * |
+ * | New trans handles can be attached to transaction N by calling all
+ * | start_transaction() variants.
+ * |
+ * | To next stage:
+ * | Call btrfs_commit_transaction() on any trans handle attached to
+ * | transaction N
+ * V
+ * Transaction N [[TRANS_STATE_COMMIT_START]]
+ * |
+ * | Will wait for previous running transaction to completely finish if there
+ * | is one
+ * |
+ * | Then one of the following happes:
+ * | - Wait for all other trans handle holders to release.
+ * | The btrfs_commit_transaction() caller will do the commit work.
+ * | - Wait for current transaction to be committed by others.
+ * | Other btrfs_commit_transaction() caller will do the commit work.
+ * |
+ * | At this stage, only btrfs_join_transaction*() variants can attach
+ * | to this running transaction.
+ * | All other variants will wait for current one to finish and attach to
+ * | transaction N+1.
+ * |
+ * | To next stage:
+ * | Caller is chosen to commit transaction N, and all other trans handle
+ * | haven been released.
+ * V
+ * Transaction N [[TRANS_STATE_COMMIT_DOING]]
+ * |
+ * | The heavy lifting transaction work is started.
+ * | From running delayed refs (modifying extent tree) to creating pending
+ * | snapshots, running qgroups.
+ * | In short, modify supporting trees to reflect modifications of subvolume
+ * | trees.
+ * |
+ * | At this stage, all start_transaction() calls will wait for this
+ * | transaction to finish and attach to transaction N+1.
+ * |
+ * | To next stage:
+ * | Until all supporting trees are updated.
+ * V
+ * Transaction N [[TRANS_STATE_UNBLOCKED]]
+ * | Transaction N+1
+ * | All needed trees are modified, thus we only [[TRANS_STATE_RUNNING]]
+ * | need to write them back to disk and update |
+ * | super blocks. |
+ * | |
+ * | At this stage, new transaction is allowed to |
+ * | start. |
+ * | All new start_transaction() calls will be |
+ * | attached to transid N+1. |
+ * | |
+ * | To next stage: |
+ * | Until all tree blocks are super blocks are |
+ * | written to block devices |
+ * V |
+ * Transaction N [[TRANS_STATE_COMPLETED]] V
+ * All tree blocks and super blocks are written. Transaction N+1
+ * This transaction is finished and all its [[TRANS_STATE_COMMIT_START]]
+ * data structures will be cleaned up. | Life goes on
+ */
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U,
- [TRANS_STATE_BLOCKED] = __TRANS_START,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
@@ -63,10 +133,10 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
* discard the physical locations of the block groups.
*/
while (!list_empty(&transaction->deleted_bgs)) {
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
cache = list_first_entry(&transaction->deleted_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
bg_list);
list_del_init(&cache->bg_list);
btrfs_put_block_group_trimming(cache);
@@ -383,7 +453,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
- return (trans->state >= TRANS_STATE_BLOCKED &&
+ return (trans->state >= TRANS_STATE_COMMIT_START &&
trans->state < TRANS_STATE_UNBLOCKED &&
!trans->aborted);
}
@@ -570,7 +640,7 @@ again:
INIT_LIST_HEAD(&h->new_bgs);
smp_mb();
- if (cur_trans->state >= TRANS_STATE_BLOCKED &&
+ if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
may_wait_transaction(fs_info, type)) {
current->journal_info = h;
btrfs_commit_transaction(h);
@@ -659,7 +729,7 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
true);
}
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
+struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
BTRFS_RESERVE_NO_FLUSH, true);
@@ -798,7 +868,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *cur_trans = trans->transaction;
smp_mb();
- if (cur_trans->state >= TRANS_STATE_BLOCKED ||
+ if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
cur_trans->delayed_refs.flushing)
return 1;
@@ -831,7 +901,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_transaction *cur_trans = trans->transaction;
- int lock = (trans->type != TRANS_JOIN_NOLOCK);
int err = 0;
if (refcount_read(&trans->use_count) > 1) {
@@ -847,13 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_chunk_metadata(trans);
- if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
- if (throttle)
- return btrfs_commit_transaction(trans);
- else
- wake_up_process(info->transaction_kthread);
- }
-
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(info->sb);
@@ -990,7 +1052,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
return werr;
}
-int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
+static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages)
{
bool errors = false;
@@ -1875,7 +1937,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *block_group, *tmp;
+ struct btrfs_block_group *block_group, *tmp;
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
btrfs_delayed_refs_rsv_release(fs_info, 1);
@@ -1949,6 +2011,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *prev_trans = NULL;
int ret;
+ ASSERT(refcount_read(&trans->use_count) == 1);
+
/* Stop the commit early if ->aborted is set */
if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;