diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/commit.c | 14 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 123 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 33 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 45 |
4 files changed, 121 insertions, 94 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 75ea4e9a5cab..4305a1ac808a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -353,7 +353,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) struct buffer_head *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; - int flags; + int escape; int err; unsigned long long blocknr; ktime_t start_time; @@ -660,10 +660,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ set_bit(BH_JWrite, &jh2bh(jh)->b_state); JBUFFER_TRACE(jh, "ph3: write metadata"); - flags = jbd2_journal_write_metadata_buffer(commit_transaction, + escape = jbd2_journal_write_metadata_buffer(commit_transaction, jh, &wbuf[bufs], blocknr); - if (flags < 0) { - jbd2_journal_abort(journal, flags); + if (escape < 0) { + jbd2_journal_abort(journal, escape); continue; } jbd2_file_log_bh(&io_bufs, wbuf[bufs]); @@ -672,7 +672,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) buffer */ tag_flag = 0; - if (flags & 1) + if (escape) tag_flag |= JBD2_FLAG_ESCAPE; if (!first_tag) tag_flag |= JBD2_FLAG_SAME_UUID; @@ -766,7 +766,7 @@ start_journal_io: if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ - if (freed < jbd2_journal_get_max_txn_bufs(journal)) + if (freed < journal->j_max_transaction_buffers) update_tail = 0; } J_ASSERT(commit_transaction->t_state == T_COMMIT); @@ -1107,7 +1107,7 @@ restart_loop: commit_transaction->t_state = T_COMMIT_CALLBACK; J_ASSERT(commit_transaction == journal->j_committing_transaction); - journal->j_commit_sequence = commit_transaction->t_tid; + WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid); journal->j_committing_transaction = NULL; commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0d2825c131cc..1ebf2393bfb7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -220,19 +220,12 @@ loop: * so we don't sleep */ DEFINE_WAIT(wait); - int should_sleep = 1; prepare_to_wait(&journal->j_wait_commit, &wait, TASK_INTERRUPTIBLE); - if (journal->j_commit_sequence != journal->j_commit_request) - should_sleep = 0; transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, - transaction->t_expires)) - should_sleep = 0; - if (journal->j_flags & JBD2_UNMOUNT) - should_sleep = 0; - if (should_sleep) { + if (transaction == NULL || + time_before(jiffies, transaction->t_expires)) { write_unlock(&journal->j_state_lock); schedule(); write_lock(&journal->j_state_lock); @@ -316,11 +309,8 @@ static void journal_kill_thread(journal_t *journal) * * Return value: * <0: Error - * >=0: Finished OK - * - * On success: - * Bit 0 set == escape performed on the data - * Bit 1 set == buffer copy-out performed (kfree the data after IO) + * =0: Finished OK without escape + * =1: Finished OK with escape */ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, @@ -328,7 +318,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr) { - int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; @@ -355,7 +344,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, atomic_set(&new_bh->b_count, 1); spin_lock(&jh_in->b_state_lock); -repeat: /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. @@ -365,8 +353,8 @@ repeat: new_folio = virt_to_folio(jh_in->b_frozen_data); new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); } else { - new_folio = jh2bh(jh_in)->b_folio; - new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data); + new_folio = bh_in->b_folio; + new_offset = offset_in_folio(new_folio, bh_in->b_data); } mapped_data = kmap_local_folio(new_folio, new_offset); @@ -383,54 +371,52 @@ repeat: /* * Check for escaping */ - if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) { - need_copy_out = 1; + if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) do_escape = 1; - } kunmap_local(mapped_data); /* * Do we need to do a data copy? */ - if (need_copy_out && !done_copy_out) { + if (do_escape && !done_copy_out) { char *tmp; spin_unlock(&jh_in->b_state_lock); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { brelse(new_bh); + free_buffer_head(new_bh); return -ENOMEM; } spin_lock(&jh_in->b_state_lock); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); - goto repeat; + goto copy_done; } jh_in->b_frozen_data = tmp; memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size); - - new_folio = virt_to_folio(tmp); - new_offset = offset_in_folio(new_folio, tmp); - done_copy_out = 1; - /* * This isn't strictly necessary, as we're using frozen * data for the escaping, but it keeps consistency with * b_frozen_data usage. */ jh_in->b_frozen_triggers = jh_in->b_triggers; + +copy_done: + new_folio = virt_to_folio(jh_in->b_frozen_data); + new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); + done_copy_out = 1; } /* * Did we need to do an escaping? Now we've done all the * copying, we can finally do so. + * b_frozen_data is from jbd2_alloc() which always provides an + * address from the direct kernels mapping. */ - if (do_escape) { - mapped_data = kmap_local_folio(new_folio, new_offset); - *((unsigned int *)mapped_data) = 0; - kunmap_local(mapped_data); - } + if (do_escape) + *((unsigned int *)jh_in->b_frozen_data) = 0; folio_set_bh(new_bh, new_folio, new_offset); new_bh->b_size = bh_in->b_size; @@ -454,7 +440,7 @@ repeat: set_buffer_shadow(bh_in); spin_unlock(&jh_in->b_state_lock); - return do_escape | (done_copy_out << 1); + return do_escape; } /* @@ -789,17 +775,7 @@ EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); /* Return 1 when transaction with given tid has already committed. */ int jbd2_transaction_committed(journal_t *journal, tid_t tid) { - int ret = 1; - - read_lock(&journal->j_state_lock); - if (journal->j_running_transaction && - journal->j_running_transaction->t_tid == tid) - ret = 0; - if (journal->j_committing_transaction && - journal->j_committing_transaction->t_tid == tid) - ret = 0; - read_unlock(&journal->j_state_lock); - return ret; + return tid_geq(READ_ONCE(journal->j_commit_sequence), tid); } EXPORT_SYMBOL(jbd2_transaction_committed); @@ -1451,6 +1427,48 @@ static int journal_revoke_records_per_block(journal_t *journal) return space / record_size; } +static int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 3; +} + +/* + * Base amount of descriptor blocks we reserve for each transaction. + */ +static int jbd2_descriptor_blocks_per_trans(journal_t *journal) +{ + int tag_space = journal->j_blocksize - sizeof(journal_header_t); + int tags_per_block; + + /* Subtract UUID */ + tag_space -= 16; + if (jbd2_journal_has_csum_v2or3(journal)) + tag_space -= sizeof(struct jbd2_journal_block_tail); + /* Commit code leaves a slack space of 16 bytes at the end of block */ + tags_per_block = (tag_space - 16) / journal_tag_bytes(journal); + /* + * Revoke descriptors are accounted separately so we need to reserve + * space for commit block and normal transaction descriptor blocks. + */ + return 1 + DIV_ROUND_UP(jbd2_journal_get_max_txn_bufs(journal), + tags_per_block); +} + +/* + * Initialize number of blocks each transaction reserves for its bookkeeping + * and maximum number of blocks a transaction can use. This needs to be called + * after the journal size and the fastcommit area size are initialized. + */ +static void jbd2_journal_init_transaction_limits(journal_t *journal) +{ + journal->j_revoke_records_per_block = + journal_revoke_records_per_block(journal); + journal->j_transaction_overhead_buffers = + jbd2_descriptor_blocks_per_trans(journal); + journal->j_max_transaction_buffers = + jbd2_journal_get_max_txn_bufs(journal); +} + /* * Load the on-disk journal superblock and read the key fields into the * journal_t. @@ -1492,8 +1510,8 @@ static int journal_load_superblock(journal_t *journal) if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + /* After journal features are set, we can compute transaction limits */ + jbd2_journal_init_transaction_limits(journal); if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); @@ -1599,7 +1617,6 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan; journal->j_shrinker->count_objects = jbd2_journal_shrink_count; - journal->j_shrinker->batch = journal->j_max_transaction_buffers; journal->j_shrinker->private_data = journal; shrinker_register(journal->j_shrinker); @@ -1743,8 +1760,6 @@ static int journal_reset(journal_t *journal) journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); - /* * Now that journal recovery is done, turn fast commits off here. This * way, if fast commit was enabled before the crash but if now FS has @@ -2285,8 +2300,6 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) journal->j_fc_first = journal->j_last + 1; journal->j_fc_off = 0; journal->j_free = journal->j_last - journal->j_first; - journal->j_max_transaction_buffers = - jbd2_journal_get_max_txn_bufs(journal); return 0; } @@ -2374,8 +2387,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_incompat |= cpu_to_be32(incompat); unlock_buffer(journal->j_sb_buffer); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + jbd2_journal_init_transaction_limits(journal); return 1; #undef COMPAT_FEATURE_ON @@ -2406,8 +2418,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, sb->s_feature_compat &= ~cpu_to_be32(compat); sb->s_feature_ro_compat &= ~cpu_to_be32(ro); sb->s_feature_incompat &= ~cpu_to_be32(incompat); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + jbd2_journal_init_transaction_limits(journal); } EXPORT_SYMBOL(jbd2_journal_clear_features); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 1f7664984d6e..667f67342c52 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/crc32.h> #include <linux/blkdev.h> +#include <linux/string_choices.h> #endif /* @@ -374,7 +375,7 @@ int jbd2_journal_skip_recovery(journal_t *journal) be32_to_cpu(journal->j_superblock->s_sequence); jbd2_debug(1, "JBD2: ignoring %d transaction%s from the journal.\n", - dropped, (dropped == 1) ? "" : "s"); + dropped, str_plural(dropped)); #endif journal->j_transaction_sequence = ++info.end_transaction; journal->j_head = info.head_block; @@ -443,6 +444,27 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) return provided == cpu_to_be32(calculated); } +static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf) +{ + struct commit_header *h; + __be32 provided; + __u32 calculated; + void *tmpbuf; + + tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL); + if (!tmpbuf) + return false; + + memcpy(tmpbuf, buf, sizeof(struct commit_header)); + h = tmpbuf; + provided = h->h_chksum[0]; + h->h_chksum[0] = 0; + calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize); + kfree(tmpbuf); + + return provided == cpu_to_be32(calculated); +} + static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, journal_block_tag3_t *tag3, void *buf, __u32 sequence) @@ -810,6 +832,13 @@ static int do_one_pass(journal_t *journal, if (pass == PASS_SCAN && !jbd2_commit_block_csum_verify(journal, bh->b_data)) { + if (jbd2_commit_block_csum_verify_partial( + journal, + bh->b_data)) { + pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n", + next_commit_ID, next_log_block); + goto chksum_ok; + } chksum_error: if (commit_time < last_trans_commit_time) goto ignore_crc_mismatch; @@ -824,6 +853,7 @@ static int do_one_pass(journal_t *journal, } } if (pass == PASS_SCAN) { + chksum_ok: last_trans_commit_time = commit_time; head_block = next_log_block; } @@ -843,6 +873,7 @@ static int do_one_pass(journal_t *journal, next_log_block); need_check_commit_time = true; } + /* If we aren't in the REVOKE pass, then we can * just skip over this block. */ if (pass != PASS_REVOKE) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index cb0b8d6fc0c6..66513c18ca29 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -63,28 +63,6 @@ void jbd2_journal_free_transaction(transaction_t *transaction) } /* - * Base amount of descriptor blocks we reserve for each transaction. - */ -static int jbd2_descriptor_blocks_per_trans(journal_t *journal) -{ - int tag_space = journal->j_blocksize - sizeof(journal_header_t); - int tags_per_block; - - /* Subtract UUID */ - tag_space -= 16; - if (jbd2_journal_has_csum_v2or3(journal)) - tag_space -= sizeof(struct jbd2_journal_block_tail); - /* Commit code leaves a slack space of 16 bytes at the end of block */ - tags_per_block = (tag_space - 16) / journal_tag_bytes(journal); - /* - * Revoke descriptors are accounted separately so we need to reserve - * space for commit block and normal transaction descriptor blocks. - */ - return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers, - tags_per_block); -} - -/* * jbd2_get_transaction: obtain a new transaction_t object. * * Simply initialise a new transaction. Initialize it in @@ -109,7 +87,7 @@ static void jbd2_get_transaction(journal_t *journal, transaction->t_expires = jiffies + journal->j_commit_interval; atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_outstanding_credits, - jbd2_descriptor_blocks_per_trans(journal) + + journal->j_transaction_overhead_buffers + atomic_read(&journal->j_reserved_credits)); atomic_set(&transaction->t_outstanding_revokes, 0); atomic_set(&transaction->t_handle_count, 0); @@ -213,6 +191,13 @@ static void sub_reserved_credits(journal_t *journal, int blocks) wake_up(&journal->j_wait_reserved); } +/* Maximum number of blocks for user transaction payload */ +static int jbd2_max_user_trans_buffers(journal_t *journal) +{ + return journal->j_max_transaction_buffers - + journal->j_transaction_overhead_buffers; +} + /* * Wait until we can add credits for handle to the running transaction. Called * with j_state_lock held for reading. Returns 0 if handle joined the running @@ -262,12 +247,12 @@ __must_hold(&journal->j_state_lock) * big to fit this handle? Wait until reserved credits are freed. */ if (atomic_read(&journal->j_reserved_credits) + total > - journal->j_max_transaction_buffers) { + jbd2_max_user_trans_buffers(journal)) { read_unlock(&journal->j_state_lock); jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + total <= - journal->j_max_transaction_buffers); + jbd2_max_user_trans_buffers(journal)); __acquire(&journal->j_state_lock); /* fake out sparse */ return 1; } @@ -307,14 +292,14 @@ __must_hold(&journal->j_state_lock) needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); /* We allow at most half of a transaction to be reserved */ - if (needed > journal->j_max_transaction_buffers / 2) { + if (needed > jbd2_max_user_trans_buffers(journal) / 2) { sub_reserved_credits(journal, rsv_blocks); atomic_sub(total, &t->t_outstanding_credits); read_unlock(&journal->j_state_lock); jbd2_might_wait_for_commit(journal); wait_event(journal->j_wait_reserved, atomic_read(&journal->j_reserved_credits) + rsv_blocks - <= journal->j_max_transaction_buffers / 2); + <= jbd2_max_user_trans_buffers(journal) / 2); __acquire(&journal->j_state_lock); /* fake out sparse */ return 1; } @@ -344,12 +329,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle, * size and limit the number of total credits to not exceed maximum * transaction size per operation. */ - if ((rsv_blocks > journal->j_max_transaction_buffers / 2) || - (rsv_blocks + blocks > journal->j_max_transaction_buffers)) { + if (rsv_blocks > jbd2_max_user_trans_buffers(journal) / 2 || + rsv_blocks + blocks > jbd2_max_user_trans_buffers(journal)) { printk(KERN_ERR "JBD2: %s wants too many credits " "credits:%d rsv_credits:%d max:%d\n", current->comm, blocks, rsv_blocks, - journal->j_max_transaction_buffers); + jbd2_max_user_trans_buffers(journal)); WARN_ON(1); return -ENOSPC; } |