diff options
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r-- | fs/f2fs/node.c | 591 |
1 files changed, 395 insertions, 196 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9d02cdcdbb07..983572f23896 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -8,7 +8,7 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/mpage.h> -#include <linux/backing-dev.h> +#include <linux/sched/mm.h> #include <linux/blkdev.h> #include <linux/pagevec.h> #include <linux/swap.h> @@ -17,7 +17,7 @@ #include "node.h" #include "segment.h" #include "xattr.h" -#include "trace.h" +#include "iostat.h" #include <trace/events/f2fs.h> #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) @@ -36,6 +36,7 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", __func__, nid); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } return 0; @@ -44,11 +45,15 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct sysinfo val; unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; + if (!nm_i) + return true; + si_meminfo(&val); /* only uses low memory */ @@ -62,8 +67,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> - PAGE_SHIFT; + mem_size = (nm_i->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) res = false; @@ -86,10 +91,24 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == INMEM_PAGES) { - /* it allows 20% / total_ram for inmemory pages */ - mem_size = get_pages(sbi, F2FS_INMEM_PAGES); - res = mem_size < (val.totalram / 5); + } else if (type == DISCARD_CACHE) { + mem_size = (atomic_read(&dcc->discard_cmd_cnt) * + sizeof(struct discard_cmd)) >> PAGE_SHIFT; + res = mem_size < (avail_ram * nm_i->ram_thresh / 100); + } else if (type == COMPRESS_PAGE) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned long free_ram = val.freeram; + + /* + * free memory is lower than watermark or cached page count + * exceed threshold, deny caching compress page. + */ + res = (free_ram > avail_ram * sbi->compress_watermark / 100) && + (COMPRESS_MAPPING(sbi)->nrpages < + free_ram * sbi->compress_percent / 100); +#else + res = false; +#endif } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; @@ -109,7 +128,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) @@ -141,14 +160,13 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } -static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) +static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, + nid_t nid, bool no_fail) { struct nat_entry *new; - if (no_fail) - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); - else - new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); + new = f2fs_kmem_cache_alloc(nat_entry_slab, + GFP_F2FS_ZERO, no_fail, sbi); if (new) { nat_set_nid(new, nid); nat_reset_flag(new); @@ -177,7 +195,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); - nm_i->nat_cnt++; + nm_i->nat_cnt[TOTAL_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; return ne; } @@ -207,7 +226,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); - nm_i->nat_cnt--; + nm_i->nat_cnt[TOTAL_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; __free_nat_entry(e); } @@ -219,7 +239,8 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { - head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); + head = f2fs_kmem_cache_alloc(nat_entry_set_slab, + GFP_NOFS, true, NULL); INIT_LIST_HEAD(&head->entry_list); INIT_LIST_HEAD(&head->set_list); @@ -253,7 +274,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; - nm_i->dirty_nat_cnt++; + nm_i->nat_cnt[DIRTY_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -273,7 +295,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; - nm_i->dirty_nat_cnt--; + nm_i->nat_cnt[DIRTY_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -304,7 +327,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, unsigned long flags; unsigned int seq_id; - fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS); + fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, + GFP_NOFS, true, NULL); get_page(page); fn->page = page; @@ -355,14 +379,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need; } @@ -372,11 +396,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -386,13 +410,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -403,11 +427,15 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *new, *e; - new = __alloc_nat_entry(nid, false); + /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ + if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) + return; + + new = __alloc_nat_entry(sbi, nid, false); if (!new) return; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) e = __init_nat_entry(nm_i, new, ne, false); @@ -416,7 +444,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); if (e != new) __free_nat_entry(new); } @@ -426,9 +454,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - struct nat_entry *new = __alloc_nat_entry(ni->nid, true); + struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); @@ -459,6 +487,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* increment version no as node is removed */ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); } @@ -476,7 +505,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -484,7 +513,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) + if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) return 0; spin_lock(&nm_i->nat_list_lock); @@ -506,15 +535,12 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } spin_unlock(&nm_i->nat_list_lock); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } -/* - * This function always returns success - */ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, - struct node_info *ni) + struct node_info *ni, bool checkpoint_context) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -529,22 +555,32 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, int i; ni->nid = nid; - +retry: /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return 0; } - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* + * Check current segment summary by trying to grab journal_rwsem first. + * This sem is on the critical path on the checkpoint requiring the above + * nat_tree_lock. Therefore, we should retry, if we failed to grab here + * while not bothering checkpoint. + */ + if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { + down_read(&curseg->journal_rwsem); + } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || + !down_read_trylock(&curseg->journal_rwsem)) { + f2fs_up_read(&nm_i->nat_tree_lock); + goto retry; + } - /* Check current segment summary */ - down_read(&curseg->journal_rwsem); i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); if (i >= 0) { ne = nat_in_journal(journal, i); @@ -552,13 +588,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, } up_read(&curseg->journal_rwsem); if (i >= 0) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto cache; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); if (IS_ERR(page)) @@ -621,10 +657,10 @@ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) switch (dn->max_level) { case 3: base += 2 * indirect_blks; - /* fall through */ + fallthrough; case 2: base += 2 * direct_blks; - /* fall through */ + fallthrough; case 1: base += direct_index; break; @@ -716,8 +752,7 @@ got: /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. + * f2fs_unlock_op() only if mode is set with ALLOC_NODE. */ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { @@ -809,8 +844,27 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); + + if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && + f2fs_sb_has_readonly(sbi)) { + unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + block_t blkaddr; + + if (!c_len) + goto out; + + blkaddr = f2fs_data_blkaddr(dn); + if (blkaddr == COMPRESS_ADDR) + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + 1); + + f2fs_update_extent_tree_range_compressed(dn->inode, + index, blkaddr, + F2FS_I(dn->inode)->i_cluster_size, + c_len); + } +out: return 0; release_pages: @@ -835,7 +889,7 @@ static int truncate_node(struct dnode_of_data *dn) int err; pgoff_t index; - err = f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; @@ -1046,8 +1100,10 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); - if (level < 0) + if (level < 0) { + trace_f2fs_truncate_inode_blocks_exit(inode, level); return level; + } page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -1188,8 +1244,9 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), + "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } @@ -1231,12 +1288,17 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false); if (err) { dec_valid_node_count(sbi, dn->inode, !ofs); goto fail; } - f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); + if (unlikely(new_ni.blk_addr != NULL_ADDR)) { + err = -EFSCORRUPTED; + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + goto fail; + } #endif new_ni.nid = dn->nid; new_ni.ino = dn->inode->i_ino; @@ -1271,7 +1333,7 @@ fail: * 0: f2fs_put_page(page, 0) * LOCKED_PAGE or error: f2fs_put_page(page, 1) */ -static int read_node_page(struct page *page, int op_flags) +static int read_node_page(struct page *page, blk_opf_t op_flags) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; @@ -1293,18 +1355,25 @@ static int read_node_page(struct page *page, int op_flags) return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni, false); if (err) return err; - if (unlikely(ni.blk_addr == NULL_ADDR) || + /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ + if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ClearPageUptodate(page); return -ENOENT; } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* @@ -1349,8 +1418,7 @@ repeat: err = read_node_page(page, 0); if (err < 0) { - f2fs_put_page(page, 1); - return ERR_PTR(err); + goto out_put_err; } else if (err == LOCKED_PAGE) { err = 0; goto page_hit; @@ -1376,18 +1444,23 @@ repeat: goto out_err; } page_hit: - if(unlikely(nid != nid_of_node(page))) { - f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + if (likely(nid == nid_of_node(page))) + return page; + + f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); - err = -EINVAL; + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; out_err: - ClearPageUptodate(page); - f2fs_put_page(page, 1); - return ERR_PTR(err); - } - return page; + ClearPageUptodate(page); +out_put_err: + /* ENOENT comes from read_node_page which is not an error. */ + if (err != -ENOENT) + f2fs_handle_page_eio(sbi, page->index, NODE); + f2fs_put_page(page, 1); + return ERR_PTR(err); } struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) @@ -1518,8 +1591,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; + if (unlikely(f2fs_cp_error(sbi))) { + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; @@ -1533,21 +1610,21 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); - if (f2fs_get_node_info(sbi, nid, &ni)) + if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) + if (!f2fs_down_read_trylock(&sbi->node_write)) goto redirty_out; } else { - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); unlock_page(page); return 0; } @@ -1555,27 +1632,28 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (__is_valid_data_blkaddr(ni.blk_addr) && !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER)) + if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - set_page_writeback(page); - ClearPageError(page); - + /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(sbi, page)) { seq = f2fs_add_fsync_node_entry(sbi, page); if (seq_id) *seq_id = seq; } + set_page_writeback(page); + ClearPageError(page); + fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); @@ -1709,6 +1787,7 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); + percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) @@ -1716,7 +1795,7 @@ continue_unlock: set_dentry_mark(page, f2fs_need_dentry_mark(sbi, ino)); } - /* may be written by other thread */ + /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); } @@ -1760,7 +1839,7 @@ continue_unlock: out: if (nwritten) f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); - return ret ? -EIO: 0; + return ret ? -EIO : 0; } static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) @@ -1804,6 +1883,51 @@ static bool flush_dirty_inode(struct page *page) return true; } +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) +{ + pgoff_t index = 0; + struct pagevec pvec; + int nr_pages; + + pagevec_init(&pvec); + + while ((nr_pages = pagevec_lookup_tag(&pvec, + NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (!IS_DNODE(page)) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + /* flush inline_data, if it's async context. */ + if (page_private_inline(page)) { + clear_page_private_inline(page); + unlock_page(page); + flush_inline_data(sbi, ino_of_node(page)); + continue; + } + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } +} + int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) @@ -1827,7 +1951,6 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; - bool may_dirty = true; /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[NODE]) && @@ -1867,21 +1990,22 @@ continue_unlock: goto continue_unlock; } + /* flush inline_data/inode, if it's async context. */ + if (!do_balance) + goto write_node; + /* flush inline_data */ - if (is_inline_node(page)) { - clear_inline_node(page); + if (page_private_inline(page)) { + clear_page_private_inline(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); goto lock_node; } /* flush dirty inode */ - if (IS_INODE(page) && may_dirty) { - may_dirty = false; - if (flush_dirty_inode(page)) - goto lock_node; - } - + if (IS_INODE(page) && flush_dirty_inode(page)) + goto lock_node; +write_node: f2fs_wait_on_page_writeback(page, NODE, true, true); if (!clear_page_dirty_for_io(page)) @@ -1979,7 +2103,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, goto skip_write; /* balancing f2fs's metadata in background */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); /* collect a number of dirty node pages and write together */ if (wbc->sync_mode != WB_SYNC_ALL && @@ -1989,8 +2113,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); @@ -2010,24 +2138,23 @@ skip_write: return 0; } -static int f2fs_set_node_page_dirty(struct page *page) +static bool f2fs_dirty_node_folio(struct address_space *mapping, + struct folio *folio) { - trace_f2fs_set_page_dirty(page, NODE); + trace_f2fs_set_page_dirty(&folio->page, NODE); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS - if (IS_INODE(page)) - f2fs_inode_chksum_set(F2FS_P_SB(page), page); + if (IS_INODE(&folio->page)) + f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page); #endif - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); - inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - f2fs_set_page_private(page, 0); - f2fs_trace_pid(page); - return 1; + if (filemap_dirty_folio(mapping, folio)) { + inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); + set_page_private_reference(&folio->page); + return true; } - return 0; + return false; } /* @@ -2036,12 +2163,10 @@ static int f2fs_set_node_page_dirty(struct page *page) const struct address_space_operations f2fs_node_aops = { .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, - .set_page_dirty = f2fs_set_node_page_dirty, - .invalidatepage = f2fs_invalidate_page, - .releasepage = f2fs_release_page, -#ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, -#endif + .dirty_folio = f2fs_dirty_node_folio, + .invalidate_folio = f2fs_invalidate_folio, + .release_folio = f2fs_release_folio, + .migrate_folio = filemap_migrate_folio, }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, @@ -2051,18 +2176,16 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, } static int __insert_free_nid(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_state state) + struct free_nid *i) { struct f2fs_nm_info *nm_i = NM_I(sbi); - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) return err; - f2fs_bug_on(sbi, state != i->state); - nm_i->nid_cnt[state]++; - if (state == FREE_NID) - list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->nid_cnt[FREE_NID]++; + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } @@ -2100,6 +2223,24 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, } } +bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int i; + bool ret = true; + + f2fs_down_read(&nm_i->nat_tree_lock); + for (i = 0; i < nm_i->nat_blocks; i++) { + if (!test_bit_le(i, nm_i->nat_block_bitmap)) { + ret = false; + break; + } + } + f2fs_up_read(&nm_i->nat_tree_lock); + + return ret; +} + static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set, bool build) { @@ -2141,7 +2282,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, if (unlikely(f2fs_check_nid_range(sbi, nid))) return false; - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL); i->nid = nid; i->state = FREE_NID; @@ -2184,7 +2325,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, } } ret = true; - err = __insert_free_nid(sbi, i, FREE_NID); + err = __insert_free_nid(sbi, i); err_out: if (update) { update_free_nid_bitmap(sbi, nid, ret, build); @@ -2278,7 +2419,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) unsigned int i, idx; nid_t nid; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) @@ -2301,7 +2442,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) out: scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, @@ -2314,6 +2455,9 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (unlikely(nid >= nm_i->max_nid)) nid = 0; + if (unlikely(nid % NAT_ENTRY_PER_BLOCK)) + nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK; + /* Enough entries */ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return 0; @@ -2333,7 +2477,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), @@ -2348,7 +2492,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, } if (ret) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2368,7 +2512,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* find free nids from current sum_pages */ scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2485,7 +2629,6 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i, *next; int nr = nr_shrink; if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) @@ -2494,23 +2637,29 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) if (!mutex_trylock(&nm_i->build_lock)) return 0; - spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) - break; + while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) { + struct free_nid *i, *next; + unsigned int batch = SHRINK_NID_BATCH_SIZE; - __remove_free_nid(sbi, i, FREE_NID); - kmem_cache_free(free_nid_slab, i); - nr_shrink--; + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { + if (!nr_shrink || !batch || + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) + break; + __remove_free_nid(sbi, i, FREE_NID); + kmem_cache_free(free_nid_slab, i); + nr_shrink--; + batch--; + } + spin_unlock(&nm_i->nid_list_lock); } - spin_unlock(&nm_i->nid_list_lock); + mutex_unlock(&nm_i->build_lock); return nr - nr_shrink; } -void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) +int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; size_t inline_size; @@ -2518,13 +2667,20 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) struct f2fs_inode *ri; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); ri = F2FS_INODE(page); if (ri->i_inline & F2FS_INLINE_XATTR) { - set_inode_flag(inode, FI_INLINE_XATTR); + if (!f2fs_has_inline_xattr(inode)) { + set_inode_flag(inode, FI_INLINE_XATTR); + stat_inc_inline_xattr(inode); + } } else { - clear_inode_flag(inode, FI_INLINE_XATTR); + if (f2fs_has_inline_xattr(inode)) { + stat_dec_inline_xattr(inode); + clear_inode_flag(inode, FI_INLINE_XATTR); + } goto update_inode; } @@ -2537,6 +2693,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) update_inode: f2fs_update_inode(inode, ipage); f2fs_put_page(ipage, 1); + return 0; } int f2fs_recover_xattr_data(struct inode *inode, struct page *page) @@ -2553,7 +2710,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - err = f2fs_get_node_info(sbi, prev_xnid, &ni); + err = f2fs_get_node_info(sbi, prev_xnid, &ni, false); if (err) return err; @@ -2593,7 +2750,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct page *ipage; int err; - err = f2fs_get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni, false); if (err) return err; @@ -2602,7 +2759,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_NOFS); goto retry; } @@ -2617,7 +2774,7 @@ retry: src = F2FS_INODE(page); dst = F2FS_INODE(ipage); - memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); + memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); @@ -2670,7 +2827,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { - nrpages = min(last_offset - i, BIO_MAX_PAGES); + nrpages = bio_max_segs(last_offset - i); /* readahead node pages */ f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); @@ -2708,11 +2865,14 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct f2fs_nat_entry raw_ne; nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); + if (f2fs_check_nid_range(sbi, nid)) + continue; + raw_ne = nat_in_journal(journal, i); ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = __alloc_nat_entry(nid, true); + ne = __alloc_nat_entry(sbi, nid, true); __init_nat_entry(nm_i, ne, &raw_ne, true); } @@ -2752,7 +2912,23 @@ add_out: list_add_tail(&nes->set_list, head); } -static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, +static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs, + unsigned int valid) +{ + if (valid == 0) { + __set_bit_le(nat_ofs, nm_i->empty_nat_bits); + __clear_bit_le(nat_ofs, nm_i->full_nat_bits); + return; + } + + __clear_bit_le(nat_ofs, nm_i->empty_nat_bits); + if (valid == NAT_ENTRY_PER_BLOCK) + __set_bit_le(nat_ofs, nm_i->full_nat_bits); + else + __clear_bit_le(nat_ofs, nm_i->full_nat_bits); +} + +static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, struct page *page) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2761,7 +2937,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, int valid = 0; int i = 0; - if (!enabled_nat_bits(sbi, NULL)) + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) return; if (nat_index == 0) { @@ -2772,17 +2948,36 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } - if (valid == 0) { - __set_bit_le(nat_index, nm_i->empty_nat_bits); - __clear_bit_le(nat_index, nm_i->full_nat_bits); - return; + + __update_nat_bits(nm_i, nat_index, valid); +} + +void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_ofs; + + f2fs_down_read(&nm_i->nat_tree_lock); + + for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { + unsigned int valid = 0, nid_ofs = 0; + + /* handle nid zero due to it should never be used */ + if (unlikely(nat_ofs == 0)) { + valid = 1; + nid_ofs = 1; + } + + for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) { + if (!test_bit_le(nid_ofs, + nm_i->free_nid_bitmap[nat_ofs])) + valid++; + } + + __update_nat_bits(nm_i, nat_ofs, valid); } - __clear_bit_le(nat_index, nm_i->empty_nat_bits); - if (valid == NAT_ENTRY_PER_BLOCK) - __set_bit_le(nat_index, nm_i->full_nat_bits); - else - __clear_bit_le(nat_index, nm_i->full_nat_bits); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -2801,7 +2996,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, * #1, flush nat entries to journal in current hot data summary block. * #2, flush nat entries to nat page. */ - if (enabled_nat_bits(sbi, cpc) || + if ((cpc->reason & CP_UMOUNT) || !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) to_journal = false; @@ -2848,7 +3043,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, page); + update_nat_bits(sbi, start_nid, page); f2fs_put_page(page, 1); } @@ -2875,30 +3070,35 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) LIST_HEAD(sets); int err = 0; - /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ - if (enabled_nat_bits(sbi, cpc)) { - down_write(&nm_i->nat_tree_lock); + /* + * during unmount, let's flush nat_bits before checking + * nat_cnt[DIRTY_NAT]. + */ + if (cpc->reason & CP_UMOUNT) { + f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } - if (!nm_i->dirty_nat_cnt) + if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them * into nat entry set. */ - if (enabled_nat_bits(sbi, cpc) || - !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) + if (cpc->reason & CP_UMOUNT || + !__has_cursum_space(journal, + nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; + set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) __adjust_nat_entry_set(setvec[idx], &sets, @@ -2912,7 +3112,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ return err; @@ -2927,15 +3127,18 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) __u64 cp_ver = cur_cp_version(ckpt); block_t nat_bits_addr; - if (!enabled_nat_bits(sbi, NULL)) - return 0; - nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); - nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits = f2fs_kvzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; + nm_i->full_nat_bits = nm_i->nat_bits + 8; + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; + + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) + return 0; + nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { @@ -2952,13 +3155,12 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) cp_ver |= (cur_cp_crc(ckpt) << 32); if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { - disable_nat_bits(sbi, true); + clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); + f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)", + cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits)); return 0; } - nm_i->full_nat_bits = nm_i->nat_bits + 8; - nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } @@ -2969,7 +3171,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) unsigned int i = 0; nid_t nid, last_nid; - if (!enabled_nat_bits(sbi, NULL)) + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) return; for (i = 0; i < nm_i->nat_blocks; i++) { @@ -3017,10 +3219,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi) F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; - nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; + nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); @@ -3031,14 +3233,11 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + init_f2fs_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); - if (!version_bitmap) - return -EFAULT; - nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL); if (!nm_i->nat_bitmap) @@ -3064,9 +3263,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) int i; nm_i->free_nid_bitmap = - f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), - nm_i->nat_blocks), - GFP_KERNEL); + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; @@ -3140,7 +3339,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -3154,7 +3353,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } } - f2fs_bug_on(sbi, nm_i->nat_cnt); + f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); /* destroy nat set cache */ nid = 0; @@ -3170,7 +3369,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); if (nm_i->free_nid_bitmap) { @@ -3188,27 +3387,27 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kvfree(nm_i); + kfree(nm_i); } int __init f2fs_create_node_manager_caches(void) { - nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry", sizeof(struct nat_entry)); if (!nat_entry_slab) goto fail; - free_nid_slab = f2fs_kmem_cache_create("free_nid", + free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid", sizeof(struct free_nid)); if (!free_nid_slab) goto destroy_nat_entry; - nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", + nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; - fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry", sizeof(struct fsync_node_entry)); if (!fsync_node_entry_slab) goto destroy_nat_entry_set; |