From f5113effc2a2ee6b86a4b345ce557353dcbcfffe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:01:45 -0400 Subject: jbd2: don't create journal_head for temporary journal buffers When writing metadata to the journal, we create temporary buffer heads for that task. We also attach journal heads to these buffer heads but the only purpose of the journal heads is to keep buffers linked in transaction's BJ_IO list. We remove the need for journal heads by reusing buffer_head's b_assoc_buffers list for that purpose. Also since BJ_IO list is just a temporary list for transaction commit, we use a private list in jbd2_journal_commit_transaction() for that thus removing BJ_IO list from transaction completely. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/jbd2/checkpoint.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..2735fef6e55e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_state == T_FINISHED); J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_forget == NULL); - J_ASSERT(transaction->t_iobuf_list == NULL); J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); -- cgit v1.2.3-59-g8ed1b From e5a120aeb57f40ae568a5ca1dd6ace53d0213582 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:06:01 -0400 Subject: jbd2: remove journal_head from descriptor buffers Similarly as for metadata buffers, also log descriptor buffers don't really need the journal head. So strip it and remove BJ_LogCtl list. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 1 - fs/jbd2/commit.c | 78 ++++++++++++++++++++------------------------------- fs/jbd2/journal.c | 4 +-- fs/jbd2/revoke.c | 49 ++++++++++++++++---------------- fs/jbd2/transaction.c | 6 ---- include/linux/jbd2.h | 19 +++++-------- 6 files changed, 64 insertions(+), 93 deletions(-) (limited to 'fs/jbd2/checkpoint.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 2735fef6e55e..65ec076e41f2 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -691,7 +691,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_shadow_list == NULL); - J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 57bd2ff97888..7c6f7eea2316 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -85,8 +85,7 @@ nope: __brelse(bh); } -static void jbd2_commit_block_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) { struct commit_header *h; __u32 csum; @@ -94,12 +93,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - h = (struct commit_header *)(jh2bh(descriptor)->b_data); + h = (struct commit_header *)(bh->b_data); h->h_chksum_type = 0; h->h_chksum_size = 0; h->h_chksum[0] = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); h->h_chksum[0] = cpu_to_be32(csum); } @@ -116,7 +114,6 @@ static int journal_submit_commit_record(journal_t *journal, struct buffer_head **cbh, __u32 crc32_sum) { - struct journal_head *descriptor; struct commit_header *tmp; struct buffer_head *bh; int ret; @@ -127,12 +124,10 @@ static int journal_submit_commit_record(journal_t *journal, if (is_journal_aborted(journal)) return 0; - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) + bh = jbd2_journal_get_descriptor_buffer(journal); + if (!bh) return 1; - bh = jh2bh(descriptor); - tmp = (struct commit_header *)bh->b_data; tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); @@ -146,9 +141,9 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } - jbd2_commit_block_csum_set(journal, descriptor); + jbd2_commit_block_csum_set(journal, bh); - JBUFFER_TRACE(descriptor, "submit commit block"); + BUFFER_TRACE(bh, "submit commit block"); lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -180,7 +175,6 @@ static int journal_wait_on_commit_record(journal_t *journal, if (unlikely(!buffer_uptodate(bh))) ret = -EIO; put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(bh2jh(bh)); return ret; } @@ -321,7 +315,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, } static void jbd2_descr_block_csum_set(journal_t *j, - struct journal_head *descriptor) + struct buffer_head *bh) { struct jbd2_journal_block_tail *tail; __u32 csum; @@ -329,12 +323,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_block_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); tail->t_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->t_checksum = cpu_to_be32(csum); } @@ -369,7 +361,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) { struct transaction_stats_s stats; transaction_t *commit_transaction; - struct journal_head *jh, *descriptor; + struct journal_head *jh; + struct buffer_head *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; @@ -394,6 +387,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int update_tail; int csum_size = 0; LIST_HEAD(io_bufs); + LIST_HEAD(log_bufs); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) csum_size = sizeof(struct jbd2_journal_block_tail); @@ -547,7 +541,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) blk_start_plug(&plug); jbd2_journal_write_revoke_records(journal, commit_transaction, - WRITE_SYNC); + &log_bufs, WRITE_SYNC); blk_finish_plug(&plug); jbd_debug(3, "JBD2: commit phase 2\n"); @@ -573,8 +567,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) atomic_read(&commit_transaction->t_outstanding_credits)); err = 0; - descriptor = NULL; bufs = 0; + descriptor = NULL; blk_start_plug(&plug); while (commit_transaction->t_buffers) { @@ -606,8 +600,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) record the metadata buffer. */ if (!descriptor) { - struct buffer_head *bh; - J_ASSERT (bufs == 0); jbd_debug(4, "JBD2: get descriptor\n"); @@ -618,26 +610,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) continue; } - bh = jh2bh(descriptor); jbd_debug(4, "JBD2: got buffer %llu (%p)\n", - (unsigned long long)bh->b_blocknr, bh->b_data); - header = (journal_header_t *)&bh->b_data[0]; + (unsigned long long)descriptor->b_blocknr, + descriptor->b_data); + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - tagp = &bh->b_data[sizeof(journal_header_t)]; - space_left = bh->b_size - sizeof(journal_header_t); + tagp = &descriptor->b_data[sizeof(journal_header_t)]; + space_left = descriptor->b_size - + sizeof(journal_header_t); first_tag = 1; - set_buffer_jwrite(bh); - set_buffer_dirty(bh); - wbuf[bufs++] = bh; + set_buffer_jwrite(descriptor); + set_buffer_dirty(descriptor); + wbuf[bufs++] = descriptor; /* Record it so that we can wait for IO completion later */ - BUFFER_TRACE(bh, "ph3: file as descriptor"); - jbd2_journal_file_buffer(descriptor, commit_transaction, - BJ_LogCtl); + BUFFER_TRACE(descriptor, "ph3: file as descriptor"); + jbd2_file_log_bh(&log_bufs, descriptor); } /* Where is the buffer to be written? */ @@ -864,26 +856,19 @@ start_journal_io: jbd_debug(3, "JBD2: commit phase 4\n"); /* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: - while (commit_transaction->t_log_list != NULL) { + while (!list_empty(&log_bufs)) { struct buffer_head *bh; - jh = commit_transaction->t_log_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_ctlbuf; - } - if (cond_resched()) - goto wait_for_ctlbuf; + bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); + wait_on_buffer(bh); + cond_resched(); if (unlikely(!buffer_uptodate(bh))) err = -EIO; BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); - jbd2_journal_unfile_buffer(journal, jh); - jbd2_journal_put_journal_head(jh); + jbd2_unfile_log_bh(bh); __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -934,7 +919,6 @@ start_journal_io: J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL); J_ASSERT(commit_transaction->t_shadow_list == NULL); - J_ASSERT(commit_transaction->t_log_list == NULL); restart_loop: /* diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 45cdc080e466..b0a8d1e4703e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -790,7 +790,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ -struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long long blocknr; @@ -809,7 +809,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) set_buffer_uptodate(bh); unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); - return jbd2_journal_add_journal_head(bh); + return bh; } /* diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8b..198c9c10276d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -122,9 +122,10 @@ struct jbd2_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, + struct list_head *, + struct buffer_head **, int *, struct jbd2_revoke_record_s *, int); -static void flush_descriptor(journal_t *, struct journal_head *, int, int); +static void flush_descriptor(journal_t *, struct buffer_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) */ void jbd2_journal_write_revoke_records(journal_t *journal, transaction_t *transaction, + struct list_head *log_bufs, int write_op) { - struct journal_head *descriptor; + struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; @@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; - write_one_revoke_record(journal, transaction, + write_one_revoke_record(journal, transaction, log_bufs, &descriptor, &offset, record, write_op); count++; @@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct list_head *log_bufs, + struct buffer_head **descriptorp, int *offsetp, struct jbd2_revoke_record_s *record, int write_op) { int csum_size = 0; - struct journal_head *descriptor; + struct buffer_head *descriptor; int offset; journal_header_t *header; @@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); header->h_sequence = cpu_to_be32(transaction->t_tid); /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); + BUFFER_TRACE(descriptor, "file in log_bufs"); + jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { - * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); offset += 8; } else { - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; } @@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } -static void jbd2_revoke_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) { struct jbd2_journal_revoke_tail *tail; __u32 csum; @@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_revoke_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_revoke_tail)); tail->r_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->r_checksum = cpu_to_be32(csum); } @@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, */ static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, + struct buffer_head *descriptor, int offset, int write_op) { jbd2_journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); if (is_journal_aborted(journal)) { - put_bh(bh); + put_bh(descriptor); return; } - header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; + header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); jbd2_revoke_csum_set(journal, descriptor); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - write_dirty_buffer(bh, write_op); + set_buffer_jwrite(descriptor); + BUFFER_TRACE(descriptor, "write"); + set_buffer_dirty(descriptor); + write_dirty_buffer(descriptor, write_op); } #endif diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 983010900258..f1c5392e62b6 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1637,9 +1637,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; @@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 57210844b228..b7dc40da99e0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -530,12 +530,6 @@ struct transaction_s */ struct journal_head *t_shadow_list; - /* - * Doubly-linked circular list of control buffers being written to the - * log. [j_list_lock] - */ - struct journal_head *t_log_list; - /* * List of inodes whose data we've modified in data=ordered mode. * [j_list_lock] @@ -995,7 +989,7 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh) } /* Log buffer allocation */ -extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); @@ -1179,8 +1173,10 @@ extern int jbd2_journal_init_revoke_caches(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, - transaction_t *, int); +extern void jbd2_journal_write_revoke_records(journal_t *journal, + transaction_t *transaction, + struct list_head *log_bufs, + int write_op); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); @@ -1288,9 +1284,8 @@ static inline int jbd_space_needed(journal_t *journal) #define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Forget 2 /* Buffer superseded by this transaction */ #define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 4 /* Buffer contains log descriptors */ -#define BJ_Reserved 5 /* Buffer is reserved for access by journal */ -#define BJ_Types 6 +#define BJ_Reserved 4 /* Buffer is reserved for access by journal */ +#define BJ_Types 5 extern int jbd_blocks_per_page(struct inode *inode); -- cgit v1.2.3-59-g8ed1b From 76c39904561004ac8675f858a290129e439d5168 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:12:57 -0400 Subject: jbd2: cleanup needed free block estimates when starting a transaction __jbd2_log_space_left() and jbd_space_needed() were kind of odd. jbd_space_needed() accounted also credits needed for currently committing transaction while it didn't account for credits needed for control blocks. __jbd2_log_space_left() then accounted for control blocks as a fraction of free space. Since results of these two functions are always only compared against each other, this works correct but is somewhat strange. Move the estimates so that jbd_space_needed() returns number of blocks needed for a transaction including control blocks and __jbd2_log_space_left() returns free space in the journal (with the committing transaction already subtracted). Rename functions to jbd2_log_space_left() and jbd2_space_needed() while we are changing them. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 8 ++++---- fs/jbd2/journal.c | 29 ----------------------------- fs/jbd2/transaction.c | 9 +++++---- include/linux/jbd2.h | 32 ++++++++++++++++++++++++++------ 4 files changed, 35 insertions(+), 43 deletions(-) (limited to 'fs/jbd2/checkpoint.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 65ec076e41f2..a572383bcf99 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) int nblocks, space_left; /* assert_spin_locked(&journal->j_state_lock); */ - nblocks = jbd_space_needed(journal); - while (__jbd2_log_space_left(journal) < nblocks) { + nblocks = jbd2_space_needed(journal); + while (jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; write_unlock(&journal->j_state_lock); @@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) */ write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); - nblocks = jbd_space_needed(journal); - space_left = __jbd2_log_space_left(journal); + nblocks = jbd2_space_needed(journal); + space_left = jbd2_log_space_left(journal); if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; tid_t tid = 0; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5ef0712e2f7a..8e5486d62e89 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -477,35 +477,6 @@ repeat: * journal, so that we can begin checkpointing when appropriate. */ -/* - * __jbd2_log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __jbd2_log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - /* assert_spin_locked(&journal->j_state_lock); */ - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - /* * Called with j_state_lock locked for writing. * Returns true if a transaction commit was started. diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 60361c634b5d..f9cd43190b43 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -283,12 +283,12 @@ repeat: * reduce the free space arbitrarily. Be careful to account for * those buffers when checkpointing. */ - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); atomic_sub(nblocks, &transaction->t_outstanding_credits); read_unlock(&journal->j_state_lock); write_lock(&journal->j_state_lock); - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) __jbd2_log_wait_for_space(journal); write_unlock(&journal->j_state_lock); goto repeat; @@ -306,7 +306,7 @@ repeat: jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", handle, nblocks, atomic_read(&transaction->t_outstanding_credits), - __jbd2_log_space_left(journal)); + jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); lock_map_acquire(&handle->h_lockdep_map); @@ -441,7 +441,8 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) goto unlock; } - if (wanted > __jbd2_log_space_left(journal)) { + if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > + jbd2_log_space_left(journal)) { jbd_debug(3, "denied handle %p %d blocks: " "insufficient log space\n", handle, nblocks); goto unlock; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e33e84b3d5c8..7a1f6cd864c8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1220,7 +1220,6 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); * transitions on demand. */ -int __jbd2_log_space_left(journal_t *); /* Called with journal locked */ int jbd2_log_start_commit(journal_t *journal, tid_t tid); int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); @@ -1290,17 +1289,38 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +/* + * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for + * transaction control blocks. + */ +#define JBD2_CONTROL_BLOCKS_SHIFT 5 + /* * Return the minimum number of blocks which must be free in the journal * before a new transaction may be started. Must be called under j_state_lock. */ -static inline int jbd_space_needed(journal_t *journal) +static inline int jbd2_space_needed(journal_t *journal) { int nblocks = journal->j_max_transaction_buffers; - if (journal->j_committing_transaction) - nblocks += atomic_read(&journal->j_committing_transaction-> - t_outstanding_credits); - return nblocks; + return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT); +} + +/* + * Return number of free blocks in the log. Must be called under j_state_lock. + */ +static inline unsigned long jbd2_log_space_left(journal_t *journal) +{ + /* Allow for rounding errors */ + unsigned long free = journal->j_free - 32; + + if (journal->j_committing_transaction) { + unsigned long committing = atomic_read(&journal-> + j_committing_transaction->t_outstanding_credits); + + /* Transaction + control blocks */ + free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); + } + return free; } /* -- cgit v1.2.3-59-g8ed1b From f29fad72105287e6899d9128a9d494514f220e77 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:24:11 -0400 Subject: jbd2: remove unused waitqueues j_wait_logspace and j_wait_checkpoint are unused. Remove them. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 4 ---- fs/jbd2/journal.c | 2 -- include/linux/jbd2.h | 8 -------- 3 files changed, 14 deletions(-) (limited to 'fs/jbd2/checkpoint.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index a572383bcf99..75a15f371b00 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -625,10 +625,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) __jbd2_journal_drop_transaction(journal, transaction); jbd2_journal_free_transaction(transaction); - - /* Just in case anybody was waiting for more transactions to be - checkpointed... */ - wake_up(&journal->j_wait_logspace); ret = 1; out: return ret; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8e5486d62e89..f43f97ba002e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1027,9 +1027,7 @@ static journal_t * journal_init_common (void) return NULL; init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); mutex_init(&journal->j_barrier); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 7a1f6cd864c8..8028dd581cb0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -687,9 +687,7 @@ jbd2_time_diff(unsigned long start, unsigned long end) * waiting for checkpointing * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction * to start committing, or for a barrier lock to be released - * @j_wait_logspace: Wait queue for waiting for checkpointing to complete * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_checkpoint: Wait queue to trigger checkpointing * @j_wait_commit: Wait queue to trigger commit * @j_wait_updates: Wait queue to wait for updates to complete * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints @@ -794,15 +792,9 @@ struct journal_s */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for checkpointing to complete */ - wait_queue_head_t j_wait_logspace; - /* Wait queue for waiting for commit to complete */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger checkpointing */ - wait_queue_head_t j_wait_checkpoint; - /* Wait queue to trigger commit */ wait_queue_head_t j_wait_commit; -- cgit v1.2.3-59-g8ed1b From 0ef54180e0187117062939202b96faf04c8673bc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 12 Jun 2013 22:47:35 -0400 Subject: jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space() While trying to debug an an issue under extreme I/O loading on preempt-rt kernels, the following backtrace was observed via SysRQ output: rm D ffff8802203afbc0 4600 4878 4748 0x00000000 ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80 ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8 ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000 Call Trace: [] schedule+0x24/0x70 [] jbd2_log_wait_commit+0xbd/0x140 [] ? __init_waitqueue_head+0x50/0x50 [] jbd2_log_do_checkpoint+0xf5/0x520 [] __jbd2_log_wait_for_space+0xa9/0x1f0 [] start_this_handle.isra.10+0x2e0/0x530 [] ? __init_waitqueue_head+0x50/0x50 [] jbd2__journal_start+0xc3/0x110 [] ? ext4_rmdir+0x6e/0x230 [] jbd2_journal_start+0xe/0x10 [] ext4_journal_start_sb+0x5b/0x160 [] ext4_rmdir+0x6e/0x230 [] vfs_rmdir+0xd5/0x140 [] do_rmdir+0xdf/0x120 [] ? task_work_run+0x44/0x80 [] ? do_notify_resume+0x89/0x100 [] ? int_signal+0x12/0x17 [] sys_unlinkat+0x25/0x40 [] system_call_fastpath+0x16/0x1b What is interesting here, is that we call log_wait_commit, from within wait_for_space, but we are still holding the checkpoint_mutex as it surrounds mostly the whole of wait_for_space. And then, as we are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED bit is set, then we will also try to take the same checkpoint_mutex. It seems that we need to drop the checkpoint_mutex while sitting in jbd2_log_wait_commit, if we want to guarantee that progress can be made by jbd2_journal_commit_transaction(). There does not seem to be anything preempt-rt specific about this, other then perhaps increasing the odds of it happening. Signed-off-by: Paul Gortmaker Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/jbd2/checkpoint.c') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 75a15f371b00..7f34f4716165 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* We were able to recover space; yay! */ ; } else if (tid) { + /* + * jbd2_journal_commit_transaction() may want + * to take the checkpoint_mutex if JBD2_FLUSHED + * is set. So we need to temporarily drop it. + */ + mutex_unlock(&journal->j_checkpoint_mutex); jbd2_log_wait_commit(journal, tid); + write_lock(&journal->j_state_lock); + continue; } else { printk(KERN_ERR "%s: needed %d blocks and " "only had %d space available\n", -- cgit v1.2.3-59-g8ed1b