From e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Fri, 7 Jan 2022 12:48:44 -0800 Subject: f2fs: move f2fs to use reader-unfair rwsems f2fs rw_semaphores work better if writers can starve readers, especially for the checkpoint thread, because writers are strictly more important than reader threads. This prevents significant priority inversion between low-priority readers that blocked while trying to acquire the read lock and a second acquisition of the write lock that might be blocking high priority work. Signed-off-by: Tim Murray Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 34 ++++++++-------- fs/f2fs/compress.c | 6 +-- fs/f2fs/data.c | 50 +++++++++++------------ fs/f2fs/dir.c | 12 +++--- fs/f2fs/f2fs.h | 110 ++++++++++++++++++++++++++++++++++++++++---------- fs/f2fs/file.c | 112 +++++++++++++++++++++++++-------------------------- fs/f2fs/gc.c | 46 ++++++++++----------- fs/f2fs/inline.c | 4 +- fs/f2fs/namei.c | 34 ++++++++-------- fs/f2fs/node.c | 84 +++++++++++++++++++------------------- fs/f2fs/recovery.c | 4 +- fs/f2fs/segment.c | 44 ++++++++++---------- fs/f2fs/super.c | 56 +++++++++++++------------- fs/f2fs/sysfs.c | 4 +- fs/f2fs/verity.c | 4 +- fs/f2fs/xattr.c | 12 +++--- 16 files changed, 342 insertions(+), 274 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 982f0170639f..deeda95688f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -351,13 +351,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* if locked failed, cp will flush dirty pages instead */ - if (!down_write_trylock(&sbi->cp_global_sem)) + if (!f2fs_down_write_trylock(&sbi->cp_global_sem)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -1159,7 +1159,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) if (!is_journalled_quota(sbi)) return false; - if (!down_write_trylock(&sbi->quota_sem)) + if (!f2fs_down_write_trylock(&sbi->quota_sem)) return true; if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { ret = false; @@ -1171,7 +1171,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { ret = true; } - up_write(&sbi->quota_sem); + f2fs_up_write(&sbi->quota_sem); return ret; } @@ -1228,10 +1228,10 @@ retry_flush_dents: * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. inode->i_blocks can be updated. */ - down_write(&sbi->node_change); + f2fs_down_write(&sbi->node_change); if (get_pages(sbi, F2FS_DIRTY_IMETA)) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) @@ -1241,15 +1241,15 @@ retry_flush_dents: } retry_flush_nodes: - down_write(&sbi->node_write); + f2fs_down_write(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); atomic_inc(&sbi->wb_sync_req[NODE]); err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); return err; } @@ -1262,13 +1262,13 @@ retry_flush_nodes: * dirty node blocks and some checkpoint values by block allocation. */ __prepare_cp_block(sbi); - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); return err; } static void unblock_operations(struct f2fs_sb_info *sbi) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); f2fs_unlock_all(sbi); } @@ -1612,7 +1612,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_warn(sbi, "Start checkpoint disabled!"); } if (cpc->reason != CP_RESIZE) - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1693,7 +1693,7 @@ stop: trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: if (cpc->reason != CP_RESIZE) - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); return err; } @@ -1741,9 +1741,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi) struct cp_control cpc = { .reason = CP_SYNC, }; int err; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return err; } @@ -1831,9 +1831,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) { int ret; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); ret = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return ret; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d0c3aeba5945..67bac2792e57 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1267,7 +1267,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, * checkpoint. This can only happen to quota writes which can cause * the below discard race condition. */ - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } else if (!f2fs_trylock_op(sbi)) { goto out_free; } @@ -1384,7 +1384,7 @@ unlock_continue: f2fs_put_dnode(&dn); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); @@ -1410,7 +1410,7 @@ out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); out_free: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66a..0f124e8de1d4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -590,7 +590,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { @@ -601,7 +601,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, @@ -616,9 +616,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_read(&io->io_rwsem); + f2fs_down_read(&io->io_rwsem); ret = __has_merged_page(io->bio, inode, page, ino); - up_read(&io->io_rwsem); + f2fs_up_read(&io->io_rwsem); } if (ret) __f2fs_submit_merged_write(sbi, type, temp); @@ -742,9 +742,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) f2fs_bug_on(sbi, 1); - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } static void del_bio_entry(struct bio_entry *be) @@ -766,7 +766,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct list_head *head = &io->bio_list; struct bio_entry *be; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (be->bio != *bio) continue; @@ -790,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, __submit_bio(sbi, *bio, DATA); break; } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (ret) { @@ -816,7 +816,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (list_empty(head)) continue; - down_read(&io->bio_list_lock); + f2fs_down_read(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -826,14 +826,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (found) break; } - up_read(&io->bio_list_lock); + f2fs_up_read(&io->bio_list_lock); if (!found) continue; found = false; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -846,7 +846,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, break; } } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (found) @@ -906,7 +906,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); next: if (fio->in_list) { spin_lock(&io->io_lock); @@ -973,7 +973,7 @@ out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, @@ -1383,9 +1383,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) - down_read(&sbi->node_change); + f2fs_down_read(&sbi->node_change); else - up_read(&sbi->node_change); + f2fs_up_read(&sbi->node_change); } else { if (lock) f2fs_lock_op(sbi); @@ -2749,13 +2749,13 @@ write: * the below discard race condition. */ if (IS_NOQUOTA(inode)) - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto done; } @@ -3213,14 +3213,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to) /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -3721,13 +3721,13 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int end_sec = secidx + blkcnt / blk_per_sec; int ret = 0; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); for (; secidx < end_sec; secidx++) { - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); @@ -3741,7 +3741,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, page = f2fs_get_lock_data_page(inode, blkidx, true); if (IS_ERR(page)) { - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); ret = PTR_ERR(page); goto done; } @@ -3754,7 +3754,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, ret = filemap_fdatawrite(inode->i_mapping); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); if (ret) break; @@ -3765,7 +3765,7 @@ done: clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1820e9c106f7..011df7058c42 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -766,7 +766,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -793,7 +793,7 @@ add_dentry: f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); f2fs_put_page(dentry_page, 1); @@ -858,7 +858,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) struct page *page; int err = 0; - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -869,7 +869,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); return err; } @@ -877,7 +877,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); @@ -888,7 +888,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) f2fs_add_orphan_inode(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb22fa91c2b2..8178a9152e49 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -123,6 +123,18 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +/* + * An implementation of an rwsem that is explicitly unfair to readers. This + * prevents priority inversion when a low-priority reader acquires the read lock + * while sleeping on the write lock but the write lock is needed by + * higher-priority clients. + */ + +struct f2fs_rwsem { + struct rw_semaphore internal_rwsem; + wait_queue_head_t read_waiters; +}; + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -752,7 +764,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ - struct rw_semaphore i_sem; /* protect fi info */ + struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ @@ -777,8 +789,8 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ - struct rw_semaphore i_gc_rwsem[2]; - struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ + struct f2fs_rwsem i_gc_rwsem[2]; + struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ @@ -904,7 +916,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ + struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ @@ -1017,7 +1029,7 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ - struct rw_semaphore curseg_lock; /* for preventing curseg change */ + struct f2fs_rwsem curseg_lock; /* for preventing curseg change */ block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ @@ -1201,11 +1213,11 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ - struct rw_semaphore io_rwsem; /* blocking op for bio */ + struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ struct list_head bio_list; /* bio entry list head */ - struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ + struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1571,7 +1583,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - struct rw_semaphore sb_lock; /* lock for raw super block */ + struct f2fs_rwsem sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ @@ -1591,7 +1603,7 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ - struct rw_semaphore io_order_lock; + struct f2fs_rwsem io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1599,10 +1611,10 @@ struct f2fs_sb_info { int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct rw_semaphore node_write; /* locking node writes */ - struct rw_semaphore node_change; /* locking node change */ + struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */ + struct f2fs_rwsem cp_rwsem; /* blocking FS operations */ + struct f2fs_rwsem node_write; /* locking node writes */ + struct f2fs_rwsem node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -1662,7 +1674,7 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ - struct rw_semaphore quota_sem; /* blocking cp for flags */ + struct f2fs_rwsem quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1678,7 +1690,7 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct rw_semaphore gc_lock; /* + struct f2fs_rwsem gc_lock; /* * semaphore for GC, avoid * race between GC and GC or CP */ @@ -1698,7 +1710,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; - struct rw_semaphore pin_sem; + struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -2092,9 +2104,65 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem) +{ + init_rwsem(&sem->internal_rwsem); + init_waitqueue_head(&sem->read_waiters); +} + +static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) +{ + return rwsem_is_locked(&sem->internal_rwsem); +} + +static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) +{ + return rwsem_is_contended(&sem->internal_rwsem); +} + +static inline void f2fs_down_read(struct f2fs_rwsem *sem) +{ + wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +} + +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) +{ + return down_read_trylock(&sem->internal_rwsem); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#endif + +static inline void f2fs_up_read(struct f2fs_rwsem *sem) +{ + up_read(&sem->internal_rwsem); +} + +static inline void f2fs_down_write(struct f2fs_rwsem *sem) +{ + down_write(&sem->internal_rwsem); +} + +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) +{ + return down_write_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_write(struct f2fs_rwsem *sem) +{ + up_write(&sem->internal_rwsem); + wake_up_all(&sem->read_waiters); +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + f2fs_down_read(&sbi->cp_rwsem); } static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) @@ -2103,22 +2171,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) f2fs_show_injection_info(sbi, FAULT_LOCK_OP); return 0; } - return down_read_trylock(&sbi->cp_rwsem); + return f2fs_down_read_trylock(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + f2fs_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + f2fs_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + f2fs_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c98ef6af97d..6ccdd6e347e2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -237,13 +237,13 @@ static void try_to_fix_pino(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t pino; - down_write(&fi->i_sem); + f2fs_down_write(&fi->i_sem); if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); file_got_pino(inode); } - up_write(&fi->i_sem); + f2fs_up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, @@ -318,9 +318,9 @@ go_write: * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - down_read(&F2FS_I(inode)->i_sem); + f2fs_down_read(&F2FS_I(inode)->i_sem); cp_reason = need_do_checkpoint(inode); - up_read(&F2FS_I(inode)->i_sem); + f2fs_up_read(&F2FS_I(inode)->i_sem); if (cp_reason) { /* all the dirty node pages should be flushed for POR */ @@ -958,7 +958,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return err; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -970,7 +970,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) return err; @@ -1112,7 +1112,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_pagecache_range(inode, blk_start, blk_end - 1); @@ -1122,7 +1122,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -1355,7 +1355,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); f2fs_lock_op(sbi); @@ -1365,7 +1365,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -1500,7 +1500,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); truncate_pagecache_range(inode, @@ -1514,7 +1514,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) { f2fs_unlock_op(sbi); filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1526,7 +1526,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_unlock_op(sbi); filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1600,7 +1600,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); truncate_pagecache(inode, offset); @@ -1618,7 +1618,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_unlock_op(sbi); } filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ filemap_invalidate_lock(mapping); @@ -1674,13 +1674,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset, next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; } - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); @@ -1690,7 +1690,7 @@ next_alloc: err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); file_dont_truncate(inode); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); expanded += map.m_len; sec_len -= map.m_len; @@ -2020,7 +2020,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2031,7 +2031,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -2044,7 +2044,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) /* add inode in inmem_list first and set atomic_file */ set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; @@ -2351,7 +2351,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) return err; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) goto got_it; @@ -2370,7 +2370,7 @@ got_it: 16)) err = -EFAULT; out_err: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); return err; } @@ -2447,12 +2447,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO); @@ -2483,12 +2483,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) do_more: if (!range->sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range->sync, true, false, @@ -2820,10 +2820,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_balance_fs(sbi, true); - down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; - if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) goto out_src; } @@ -2841,9 +2841,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_unlock_op(sbi); if (src != dst) - up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); out_src: - up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: if (src != dst) inode_unlock(dst); @@ -2938,7 +2938,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } @@ -3215,9 +3215,9 @@ int f2fs_precache_extents(struct inode *inode) while (map.m_lblk < end) { map.m_len = end - map.m_lblk; - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; @@ -3294,11 +3294,11 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) if (!vbuf) return -ENOMEM; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); count = utf16s_to_utf8s(sbi->raw_super->volume_name, ARRAY_SIZE(sbi->raw_super->volume_name), UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (copy_to_user((char __user *)arg, vbuf, min(FSLABEL_MAX, count))) @@ -3326,7 +3326,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) if (err) goto out; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); memset(sbi->raw_super->volume_name, 0, sizeof(sbi->raw_super->volume_name)); @@ -3336,7 +3336,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) err = f2fs_commit_super(sbi, false); - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); out: @@ -3462,7 +3462,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3499,7 +3499,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); out: inode_unlock(inode); @@ -3615,7 +3615,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3652,7 +3652,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (ret >= 0) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3770,7 +3770,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (ret) goto err; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); ret = filemap_write_and_wait_range(mapping, range.start, @@ -3859,7 +3859,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) prev_block, len, range.flags); out: filemap_invalidate_unlock(mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); file_end_write(filp); @@ -4291,12 +4291,12 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) trace_f2fs_direct_IO_enter(inode, iocb, count, READ); if (iocb->ki_flags & IOCB_NOWAIT) { - if (!down_read_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { ret = -EAGAIN; goto out; } } else { - down_read(&fi->i_gc_rwsem[READ]); + f2fs_down_read(&fi->i_gc_rwsem[READ]); } /* @@ -4315,7 +4315,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = iomap_dio_complete(dio); } - up_read(&fi->i_gc_rwsem[READ]); + f2fs_up_read(&fi->i_gc_rwsem[READ]); file_accessed(file); out: @@ -4497,12 +4497,12 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, goto out; } - if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) { + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { ret = -EAGAIN; goto out; } - if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { - up_read(&fi->i_gc_rwsem[WRITE]); + if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); ret = -EAGAIN; goto out; } @@ -4511,9 +4511,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (ret) goto out; - down_read(&fi->i_gc_rwsem[WRITE]); + f2fs_down_read(&fi->i_gc_rwsem[WRITE]); if (do_opu) - down_read(&fi->i_gc_rwsem[READ]); + f2fs_down_read(&fi->i_gc_rwsem[READ]); } if (whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; @@ -4542,8 +4542,8 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (whint_mode == WHINT_MODE_OFF) iocb->ki_hint = hint; if (do_opu) - up_read(&fi->i_gc_rwsem[READ]); - up_read(&fi->i_gc_rwsem[WRITE]); + f2fs_up_read(&fi->i_gc_rwsem[READ]); + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); if (ret < 0) goto out; @@ -4644,12 +4644,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't leave any preallocated blocks around past i_size. */ if (preallocated && i_size_read(inode) < target_size) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); if (!f2fs_truncate(inode)) file_dont_truncate(inode); filemap_invalidate_unlock(inode->i_mapping); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } else { file_dont_truncate(inode); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ee308a8de432..0a6b0a8ae97e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -105,21 +105,21 @@ static int gc_thread_func(void *data) spin_unlock(&sbi->gc_urgent_high_lock); wait_ms = gc_th->urgent_sleep_time; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; } if (foreground) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; - } else if (!down_write_trylock(&sbi->gc_lock)) { + } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -1230,7 +1230,7 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) - down_write(&fio.sbi->io_order_lock); + f2fs_down_write(&fio.sbi->io_order_lock); mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), fio.old_blkaddr, false); @@ -1316,7 +1316,7 @@ recover_block: true, true, true); up_out: if (lfs_mode) - up_write(&fio.sbi->io_order_lock); + f2fs_up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: @@ -1475,7 +1475,7 @@ next_step: special_file(inode->i_mode)) continue; - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { iput(inode); sbi->skipped_gc_rwsem++; @@ -1488,7 +1488,7 @@ next_step: if (f2fs_post_read_required(inode)) { int err = ra_data_block(inode, start_bidx); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) { iput(inode); continue; @@ -1499,7 +1499,7 @@ next_step: data_page = f2fs_get_read_data_page(inode, start_bidx, REQ_RAHEAD, true); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -1518,14 +1518,14 @@ next_step: int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) { sbi->skipped_gc_rwsem++; continue; } - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); continue; } locked = true; @@ -1548,8 +1548,8 @@ next_step: submitted++; if (locked) { - up_write(&fi->i_gc_rwsem[WRITE]); - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -1807,7 +1807,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1936,7 +1936,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) long long block_count; int segs = secs * sbi->segs_per_sec; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); section_count = le32_to_cpu(raw_sb->section_count); segment_count = le32_to_cpu(raw_sb->segment_count); @@ -1957,7 +1957,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) cpu_to_le32(dev_segs + segs); } - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) @@ -2031,7 +2031,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!down_write_trylock(&sbi->gc_lock)) + if (!f2fs_down_write_trylock(&sbi->gc_lock)) return -EAGAIN; /* stop CP to protect MAIN_SEC in free_segment_range */ @@ -2051,15 +2051,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) return err; set_sbi_flag(sbi, SBI_IS_RESIZEFS); freeze_super(sbi->sb); - down_write(&sbi->gc_lock); - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->cp_global_sem); spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + @@ -2104,8 +2104,8 @@ recover_out: spin_unlock(&sbi->stat_lock); } out_err: - up_write(&sbi->cp_global_sem); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4b5cefa3f90c..a578bf83b803 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -629,7 +629,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, } if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -658,7 +658,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, f2fs_update_parent_metadata(dir, inode, 0); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); out: f2fs_put_page(ipage, 1); return err; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a728a0af9ce0..0347c5780910 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -196,7 +196,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; int i, cold_count, hot_count; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; @@ -206,7 +206,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * break; } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (i == cold_count + hot_count) return; @@ -299,19 +299,19 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, (!ext_cnt && !noext_cnt)) return; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) { if (is_extension_exist(name, extlist[i], false)) { - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); return; } } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); for (i = 0; i < noext_cnt; i++) { if (is_extension_exist(name, noext[i], false)) { @@ -1023,11 +1023,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, new_page = NULL; new_inode->i_ctime = current_time(new_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) f2fs_i_links_write(new_inode, false); f2fs_i_links_write(new_inode, false); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) f2fs_add_orphan_inode(new_inode); @@ -1048,13 +1048,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, true); } - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); @@ -1214,38 +1214,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* update directory entry info of old dir inode */ f2fs_set_link(old_dir, old_entry, old_page, new_inode); - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); if (old_nlink) { - down_write(&F2FS_I(old_dir)->i_sem); + f2fs_down_write(&F2FS_I(old_dir)->i_sem); f2fs_i_links_write(old_dir, old_nlink > 0); - up_write(&F2FS_I(old_dir)->i_sem); + f2fs_up_write(&F2FS_I(old_dir)->i_sem); } f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) file_lost_pino(new_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(new_inode, old_dir->i_ino); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); if (new_nlink) { - down_write(&F2FS_I(new_dir)->i_sem); + f2fs_down_write(&F2FS_I(new_dir)->i_sem); f2fs_i_links_write(new_dir, new_nlink > 0); - up_write(&F2FS_I(new_dir)->i_sem); + f2fs_up_write(&F2FS_I(new_dir)->i_sem); } f2fs_mark_inode_dirty_sync(new_dir, false); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50b2874e758c..93512f8859d5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need; } @@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -431,14 +431,14 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct nat_entry *new, *e; /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ - if (rwsem_is_locked(&sbi->cp_global_sem)) + if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) return; new = __alloc_nat_entry(sbi, nid, false); if (!new) return; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) e = __init_nat_entry(nm_i, new, ne, false); @@ -447,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); if (e != new) __free_nat_entry(new); } @@ -459,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); @@ -508,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -516,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) + if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) return 0; spin_lock(&nm_i->nat_list_lock); @@ -538,7 +538,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } spin_unlock(&nm_i->nat_list_lock); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -560,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->nid = nid; retry: /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return 0; } @@ -576,11 +576,11 @@ retry: * nat_tree_lock. Therefore, we should retry, if we failed to grab here * while not bothering checkpoint. */ - if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { + if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { down_read(&curseg->journal_rwsem); - } else if (rwsem_is_contended(&nm_i->nat_tree_lock) || + } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || !down_read_trylock(&curseg->journal_rwsem)) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto retry; } @@ -589,15 +589,15 @@ retry: ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); } - up_read(&curseg->journal_rwsem); + up_read(&curseg->journal_rwsem); if (i >= 0) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto cache; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); if (IS_ERR(page)) @@ -1609,17 +1609,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) + if (!f2fs_down_read_trylock(&sbi->node_write)) goto redirty_out; } else { - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); unlock_page(page); return 0; } @@ -1627,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (__is_valid_data_blkaddr(ni.blk_addr) && !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto redirty_out; } @@ -1648,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); @@ -2225,14 +2225,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) unsigned int i; bool ret = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) { ret = false; break; } } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return ret; } @@ -2415,7 +2415,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) unsigned int i, idx; nid_t nid; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) @@ -2438,7 +2438,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) out: scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, @@ -2473,7 +2473,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), @@ -2488,7 +2488,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, } if (ret) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2508,7 +2508,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* find free nids from current sum_pages */ scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2953,7 +2953,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { unsigned int valid = 0, nid_ofs = 0; @@ -2973,7 +2973,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) __update_nat_bits(nm_i, nat_ofs, valid); } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -3071,15 +3071,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * nat_cnt[DIRTY_NAT]. */ if (cpc->reason & CP_UMOUNT) { - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -3108,7 +3108,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ return err; @@ -3228,7 +3228,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + init_f2fs_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -3334,7 +3334,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -3364,7 +3364,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); if (nm_i->free_nid_bitmap) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9683c80ff8c2..10d152cfa58d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -796,7 +796,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); @@ -845,7 +845,7 @@ skip: if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1dabc8244083..216538b57331 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -471,7 +471,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -483,7 +483,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); return err; } @@ -521,7 +521,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) io_schedule(); finish_wait(&sbi->gc_thread->fggc_wq, &wait); } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, false, NULL_SEGNO); } } @@ -529,7 +529,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) { - int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; + int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS); unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA); unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); @@ -570,7 +570,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) /* there is background inflight IO or foreground operation recently */ if (is_inflight_io(sbi, REQ_TIME) || - (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) + (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem))) return; /* exceed periodical checkpoint timeout threshold */ @@ -2821,7 +2821,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) if (!sbi->am.atgc_enabled) return; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -2831,7 +2831,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) @@ -2982,7 +2982,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -3006,7 +3006,7 @@ unlock: type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, @@ -3038,23 +3038,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi, void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); __allocate_new_section(sbi, type, force); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static const struct segment_allocation default_salloc_ops = { @@ -3192,9 +3192,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) goto out; @@ -3431,7 +3431,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -3514,7 +3514,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3550,7 +3550,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) - down_read(&fio->sbi->io_order_lock); + f2fs_down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); @@ -3570,7 +3570,7 @@ reallocate: f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); if (keep_order) - up_read(&fio->sbi->io_order_lock); + f2fs_up_read(&fio->sbi->io_order_lock); } void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, @@ -3705,7 +3705,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; - down_write(&SM_I(sbi)->curseg_lock); + f2fs_down_write(&SM_I(sbi)->curseg_lock); if (!recover_curseg) { /* for recovery flow */ @@ -3774,7 +3774,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_write(&SM_I(sbi)->curseg_lock); + f2fs_up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -5258,7 +5258,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - init_rwsem(&sm_info->curseg_lock); + init_f2fs_rwsem(&sm_info->curseg_lock); if (!f2fs_readonly(sbi->sb)) { err = f2fs_create_flush_cmd_control(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76e6a3df9aba..9af6c20532ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1355,16 +1355,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); - init_rwsem(&fi->i_sem); + init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->i_gc_rwsem[READ]); - init_rwsem(&fi->i_gc_rwsem[WRITE]); - init_rwsem(&fi->i_xattr_sem); + init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); + init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); + init_f2fs_rwsem(&fi->i_xattr_sem); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -2088,7 +2088,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -2110,7 +2110,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -2122,7 +2122,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; @@ -2142,12 +2142,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) if (unlikely(retry < 0)) f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -2707,18 +2707,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) /* * do_quotactl * f2fs_quota_sync - * down_read(quota_sem) + * f2fs_down_read(quota_sem) * dquot_writeback_dquots() * f2fs_dquot_commit * block_operation - * down_read(quota_sem) + * f2fs_down_read(quota_sem) */ f2fs_lock_op(sbi); - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = f2fs_quota_sync_file(sbi, cnt); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); f2fs_unlock_op(sbi); inode_unlock(dqopt->files[cnt]); @@ -2843,11 +2843,11 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); + f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -2856,11 +2856,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -3601,14 +3601,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - init_rwsem(&sbi->io_order_lock); + init_f2fs_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); - init_rwsem(&sbi->sb_lock); - init_rwsem(&sbi->pin_sem); + init_f2fs_rwsem(&sbi->sb_lock); + init_f2fs_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -4067,11 +4067,11 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_rwsem(&sbi->gc_lock); + init_f2fs_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - init_rwsem(&sbi->cp_global_sem); - init_rwsem(&sbi->node_write); - init_rwsem(&sbi->node_change); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); @@ -4092,18 +4092,18 @@ try_onemore: } for (j = HOT; j < n; j++) { - init_rwsem(&sbi->write_io[i][j].io_rwsem); + init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); sbi->write_io[i][j].sbi = sbi; sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_rwsem(&sbi->write_io[i][j].bio_list_lock); + init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); } } - init_rwsem(&sbi->cp_rwsem); - init_rwsem(&sbi->quota_sem); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index df406c16b2eb..ce70e798d0d4 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -363,7 +363,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN) return -EINVAL; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); ret = f2fs_update_extension_list(sbi, name, hot, set); if (ret) @@ -373,7 +373,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (ret) f2fs_update_extension_list(sbi, name, hot, !set); out: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); return ret ? ret : count; } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index fe5acdccaae1..3d793202cc9f 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -208,7 +208,7 @@ cleanup: * from re-instantiating cached pages we are truncating (since unlike * normal file accesses, garbage collection isn't limited by i_size). */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); truncate_inode_pages(inode->i_mapping, inode->i_size); err2 = f2fs_truncate(inode); if (err2) { @@ -216,7 +216,7 @@ cleanup: err2); set_sbi_flag(sbi, SBI_NEED_FSCK); } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); return err ?: err2; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8e5cd9c916ff..c76c15086e5f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -525,10 +525,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr, &base_size, &is_inline); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -562,9 +562,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error; size_t rest = buffer_size; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = read_all_xattrs(inode, NULL, &base_addr); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -786,9 +786,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - down_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); - up_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_write(&F2FS_I(inode)->i_xattr_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); -- cgit v1.2.3-59-g8ed1b From 7d19e3dab0002e527052b0aaf986e8c32e5537bf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jan 2022 11:48:02 +0800 Subject: f2fs: fix to enable ATGC correctly via gc_idle sysfs interface It needs to assign sbi->gc_mode with GC_IDLE_AT rather than GC_AT when user tries to enable ATGC via gc_idle sysfs interface, fix it. Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Cc: Zhipeng Tan Signed-off-by: Jicheng Shao Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ce70e798d0d4..2bccdaedfb00 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -481,7 +481,7 @@ out: } else if (t == GC_IDLE_AT) { if (!sbi->am.atgc_enabled) return -EINVAL; - sbi->gc_mode = GC_AT; + sbi->gc_mode = GC_IDLE_AT; } else { sbi->gc_mode = GC_NORMAL; } -- cgit v1.2.3-59-g8ed1b From d2d8e896485a52554cea486816c171dc7240792e Mon Sep 17 00:00:00 2001 From: Konstantin Vyshetsky Date: Mon, 13 Dec 2021 17:12:03 -0800 Subject: f2fs: move discard parameters into discard_cmd_control This patch unifies parameters related to how often discard is issued and how many requests go out at the same time by placing them in discard_cmd_control. The move will allow the parameters to be modified in the future without relying on hard-coded values. Signed-off-by: Konstantin Vyshetsky Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/segment.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8178a9152e49..63c90416364b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -398,6 +398,10 @@ struct discard_cmd_control { struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int max_discard_request; /* max. discard request per round */ + unsigned int min_discard_issue_time; /* min. interval between discard issue */ + unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ + unsigned int max_discard_issue_time; /* max. interval between discard issue */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 216538b57331..56211e201d51 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1156,14 +1156,14 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->ordered = false; dpolicy->granularity = granularity; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->max_requests = dcc->max_discard_request; dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = true; dpolicy->sync = false; dpolicy->ordered = true; @@ -1171,12 +1171,12 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->granularity = 1; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = - DEF_MIN_DISCARD_ISSUE_TIME; + dcc->min_discard_issue_time; } } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; @@ -1781,7 +1781,7 @@ static int issue_discard_thread(void *data) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; - unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + unsigned int wait_ms = dcc->min_discard_issue_time; int issued; set_freezable(); @@ -2180,6 +2180,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; + dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; + dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; + dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; -- cgit v1.2.3-59-g8ed1b From b2e4a2b300e5e2042e8d92ec16fc124222b7ecc9 Mon Sep 17 00:00:00 2001 From: Konstantin Vyshetsky Date: Mon, 13 Dec 2021 17:12:43 -0800 Subject: f2fs: expose discard related parameters in sysfs This patch exposes max_discard_request, min_discard_issue_time, mid_discard_issue_time, and max_discard_issue_time in sysfs. This will allow the user to fine tune discard operations. Signed-off-by: Konstantin Vyshetsky Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 27 +++++++++++++++++++++++++++ fs/f2fs/sysfs.c | 8 ++++++++ 2 files changed, 35 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 2416b03ff283..87d3884c90ea 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -98,6 +98,33 @@ Description: Controls the issue rate of discard commands that consist of small checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. +What: /sys/fs/f2fs//max_discard_request +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the number of discards a thread will issue at a time. + Higher number will allow the discard thread to finish its work + faster, at the cost of higher latency for incomming I/O. + +What: /sys/fs/f2fs//min_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + no I/O aware interruptions occur. + +What: /sys/fs/f2fs//mid_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + an I/O aware interruption occurs. + +What: /sys/fs/f2fs//max_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait when there are + no discard operations to be issued. + What: /sys/fs/f2fs//discard_granularity Date: July 2017 Contact: "Chao Yu" diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2bccdaedfb00..281bc0133ee6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -716,6 +716,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); @@ -832,6 +836,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reclaim_segments), ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), + ATTR_LIST(max_discard_request), + ATTR_LIST(min_discard_issue_time), + ATTR_LIST(mid_discard_issue_time), + ATTR_LIST(max_discard_issue_time), ATTR_LIST(discard_granularity), ATTR_LIST(pending_discard), ATTR_LIST(batched_trim_sections), -- cgit v1.2.3-59-g8ed1b From 6d18762ed5cd549fde74fd0e05d4d87bac5a3beb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 11:21:14 +0800 Subject: f2fs: fix to unlock page correctly in error path of is_alive() As Pavel Machek reported in below link [1]: After commit 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()"), node page should be unlock via calling f2fs_put_page() in the error path of is_alive(), otherwise, f2fs may hang when it tries to lock the node page, fix it. [1] https://lore.kernel.org/stable/20220124203637.GA19321@duo.ucw.cz/ Fixes: 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()") Cc: Reported-by: Pavel Machek Signed-off-by: Pavel Machek Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0a6b0a8ae97e..2d53ef121e76 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1038,8 +1038,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } - if (f2fs_check_nid_range(sbi, dni->ino)) + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); return false; + } *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); -- cgit v1.2.3-59-g8ed1b From 430f163b01888dc26696365d9c1053ba9d6c7d92 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 08:34:10 +0800 Subject: f2fs: adjust readahead block number during recovery In a fragmented image, entries in dnode block list may locate in incontiguous physical block address space, however, in recovery flow, we will always readahead BIO_MAX_VECS size blocks, so in such case, current readahead policy is low efficient, let's adjust readahead window size dynamically based on consecutiveness of dnode blocks. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++-- fs/f2fs/f2fs.h | 6 +++++- fs/f2fs/recovery.c | 27 ++++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index deeda95688f0..a13b6b4af220 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -282,18 +282,22 @@ out: return blkno - start; } -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks) { struct page *page; bool readahead = false; + if (ra_blocks == RECOVERY_MIN_RA_BLOCKS) + return; + page = find_get_page(META_MAPPING(sbi), index); if (!page || !PageUptodate(page)) readahead = true; f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true); + f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 63c90416364b..51c1392708e6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -590,6 +590,9 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -3651,7 +3654,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks); long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 10d152cfa58d..2af503f75b4f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -343,6 +343,19 @@ static int recover_inode(struct inode *inode, struct page *page) return 0; } +static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, + unsigned int ra_blocks, unsigned int blkaddr, + unsigned int next_blkaddr) +{ + if (blkaddr + 1 == next_blkaddr) + ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, + ra_blocks * 2); + else if (next_blkaddr % sbi->blocks_per_seg) + ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, + ra_blocks / 2); + return ra_blocks; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool check_only) { @@ -350,6 +363,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct page *page = NULL; block_t blkaddr; unsigned int loop_cnt = 0; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - valid_user_blocks(sbi); int err = 0; @@ -424,11 +438,14 @@ next: break; } + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - f2fs_ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } return err; } @@ -704,6 +721,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct page *page = NULL; int err = 0; block_t blkaddr; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -715,8 +733,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - f2fs_ra_meta_pages_cond(sbi, blkaddr); - page = f2fs_get_tmp_page(sbi, blkaddr); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -759,9 +775,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); next: + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } if (!err) f2fs_allocate_new_segments(sbi); -- cgit v1.2.3-59-g8ed1b From 1018a5463a063715365784704c4e8cdf2eec4b04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 15:19:46 +0800 Subject: f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy Once F2FS_IPU_FORCE policy is enabled in some cases: a) f2fs forces to use F2FS_IPU_FORCE in a small-sized volume b) user sets F2FS_IPU_FORCE policy via sysfs Then we may fail to defragment file due to IPU policy check, it doesn't make sense, let's introduce a new IPU policy to allow OPU during file defragmentation. In small-sized volume, let's enable F2FS_IPU_HONOR_OPU_WRITE policy by default. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++- fs/f2fs/data.c | 18 +++++++++++++----- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 18 +++++++++++------- fs/f2fs/segment.h | 5 ++++- fs/f2fs/super.c | 3 ++- 6 files changed, 34 insertions(+), 16 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 87d3884c90ea..7b50bf82f14d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -55,8 +55,9 @@ Description: Controls the in-place-update policy. 0x04 F2FS_IPU_UTIL 0x08 F2FS_IPU_SSR_UTIL 0x10 F2FS_IPU_FSYNC - 0x20 F2FS_IPU_ASYNC, + 0x20 F2FS_IPU_ASYNC 0x40 F2FS_IPU_NOCACHE + 0x80 F2FS_IPU_HONOR_OPU_WRITE ==== ================= Refer segment.h for details. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0f124e8de1d4..6b5f389ba998 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2460,6 +2460,9 @@ static inline bool check_inplace_update_policy(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; + if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && + is_inode_flag_set(inode, FI_OPU_WRITE)) + return false; if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) @@ -2530,6 +2533,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return true; + if (is_inode_flag_set(inode, FI_OPU_WRITE)) + return true; + if (fio) { if (page_private_gcing(fio->page)) return true; @@ -3154,8 +3160,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; - /* skip writing during file defragment */ - if (is_inode_flag_set(inode, FI_DO_DEFRAG)) + /* skip writing in file defragment preparing stage */ + if (is_inode_flag_set(inode, FI_SKIP_WRITES)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, DATA); @@ -3725,6 +3731,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); + set_inode_flag(inode, FI_OPU_WRITE); for (; secidx < end_sec; secidx++) { f2fs_down_write(&sbi->pin_sem); @@ -3733,7 +3740,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { struct page *page; @@ -3750,7 +3757,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_put_page(page, 1); } - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); ret = filemap_fdatawrite(inode->i_mapping); @@ -3761,7 +3768,8 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, } done: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); + clear_inode_flag(inode, FI_OPU_WRITE); clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51c1392708e6..3b4bf1c3f1ed 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -740,7 +740,8 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ccdd6e347e2..42fbdcf0ccc9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2559,10 +2559,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - /* if in-place-update policy is enabled, don't waste time here */ - if (f2fs_should_update_inplace(inode, NULL)) - return -EINVAL; - pg_start = range->start >> PAGE_SHIFT; pg_end = (range->start + range->len) >> PAGE_SHIFT; @@ -2570,6 +2566,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + /* if in-place-update policy is enabled, don't waste time here */ + set_inode_flag(inode, FI_OPU_WRITE); + if (f2fs_should_update_inplace(inode, NULL)) { + err = -EINVAL; + goto out; + } + /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); @@ -2651,7 +2654,7 @@ do_map: goto check; } - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -2676,15 +2679,16 @@ check: if (map.m_lblk < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); out: + clear_inode_flag(inode, FI_OPU_WRITE); inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_SHIFT; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0291cd55cf09..5c94caf0c0a1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -651,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi) * pages over min_fsync_blocks. (=default option) * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. - * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) + * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has + * FI_OPU_WRITE flag. + * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -667,6 +669,7 @@ enum { F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, F2FS_IPU_NOCACHE, + F2FS_IPU_HONOR_OPU_WRITE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9af6c20532ec..806836184ebc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3957,7 +3957,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; if (f2fs_block_unit_discard(sbi)) sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + 1 << F2FS_IPU_HONOR_OPU_WRITE; } sbi->readdir_ra = 1; -- cgit v1.2.3-59-g8ed1b From 47c8ebcce85ed7113e9e3e3f1d8c6374fa87848e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jan 2022 13:31:43 -0800 Subject: f2fs: add a way to limit roll forward recovery time This adds a sysfs entry to call checkpoint during fsync() in order to avoid long elapsed time to run roll-forward recovery when booting the device. Default value doesn't enforce the limitation which is same as before. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/checkpoint.c | 1 + fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/node.c | 2 ++ fs/f2fs/node.h | 3 +++ fs/f2fs/recovery.c | 4 ++++ fs/f2fs/super.c | 14 ++++++++++++-- fs/f2fs/sysfs.c | 2 ++ 9 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7b50bf82f14d..58bf0dc83712 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -568,3 +568,9 @@ Contact: "Daeho Jeong" Description: You can set the trial count limit for GC urgent high mode with this value. If GC thread gets to the limit, the mode will turn back to GC normal mode. By default, the value is zero, which means there is no limit like before. + +What: /sys/fs/f2fs//max_roll_forward_node_blocks +Date: January 2022 +Contact: "Jaegeuk Kim" +Description: Controls max # of node block writes to be used for roll forward + recovery. This can limit the roll forward recovery time. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a13b6b4af220..203a1577942d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1547,6 +1547,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a9..9a13902c7702 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -532,6 +532,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); + seq_printf(s, " - fsync mark: %4lld\n", + percpu_counter_sum_positive( + &si->sbi->rf_node_block_count)); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b4bf1c3f1ed..c9515c3c54fd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -917,6 +917,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -1688,6 +1689,8 @@ struct f2fs_sb_info { atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 93512f8859d5..0d9883457579 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1782,6 +1782,7 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); + percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) @@ -3218,6 +3219,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; + nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 18b98cf0465b..4c1d34bfea78 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,6 +31,9 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 +/* control total # of node writes used for roll-fowrad recovery */ +#define DEF_RF_NODE_BLOCKS 0 + /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 #define SETVEC_SIZE 32 diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2af503f75b4f..ab33e474af07 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -56,6 +56,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) return false; + if (NM_I(sbi)->max_rf_node_blocks && + percpu_counter_sum_positive(&sbi->rf_node_block_count) >= + NM_I(sbi)->max_rf_node_blocks) + return false; return true; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 806836184ebc..f816d7d1987d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1501,8 +1501,9 @@ static void f2fs_free_inode(struct inode *inode) static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + percpu_counter_destroy(&sbi->rf_node_block_count); + percpu_counter_destroy(&sbi->alloc_valid_block_count); } static void destroy_device_list(struct f2fs_sb_info *sbi) @@ -3619,11 +3620,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; + err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL); + if (err) + goto err_valid_block; + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); if (err) - percpu_counter_destroy(&sbi->alloc_valid_block_count); + goto err_node_block; + return 0; +err_node_block: + percpu_counter_destroy(&sbi->rf_node_block_count); +err_valid_block: + percpu_counter_destroy(&sbi->alloc_valid_block_count); return err; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 281bc0133ee6..47efcf233afd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -732,6 +732,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -855,6 +856,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(max_roll_forward_node_blocks), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), -- cgit v1.2.3-59-g8ed1b From 984fc4e76d63345499f01c0c198a4b44860cf027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 13:24:56 +0800 Subject: f2fs: support idmapped mounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables idmapped mounts for f2fs, since all dedicated helpers for this functionality existsm, so, in this patch we just pass down the user_namespace argument from the VFS methods to the relevant helpers. Simple idmap example on f2fs image: 1. truncate -s 128M f2fs.img 2. mkfs.f2fs f2fs.img 3. mount f2fs.img /mnt/f2fs/ 4. touch /mnt/f2fs/file 5. ls -ln /mnt/f2fs/ total 0 -rw-r--r-- 1 0 0 0 2月 4 13:17 file 6. ./mount-idmapped --map-mount b:0:1001:1 /mnt/f2fs/ /mnt/scratch_f2fs/ 7. ls -ln /mnt/scratch_f2fs/ total 0 -rw-r--r-- 1 1001 1001 0 2月 4 13:17 file Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 21 ++++++++++++--------- fs/f2fs/file.c | 23 ++++++++++++++--------- fs/f2fs/namei.c | 41 +++++++++++++++++++++++------------------ fs/f2fs/super.c | 2 +- 4 files changed, 50 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 16e826e01f09..eaa240b21f07 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -204,8 +204,9 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) return __f2fs_get_acl(inode, type, NULL); } -static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, - struct posix_acl **acl) +static int f2fs_acl_update_mode(struct user_namespace *mnt_userns, + struct inode *inode, umode_t *mode_p, + struct posix_acl **acl) { umode_t mode = inode->i_mode; int error; @@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, return error; if (error == 0) *acl = NULL; - if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) + if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) && + !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) mode &= ~S_ISGID; *mode_p = mode; return 0; } -static int __f2fs_set_acl(struct inode *inode, int type, +static int __f2fs_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { int name_index; @@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = f2fs_acl_update_mode(inode, &mode, &acl); + error = f2fs_acl_update_mode(mnt_userns, inode, + &mode, &acl); if (error) return error; set_acl_inode(inode, mode); @@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - return __f2fs_set_acl(inode, type, acl, NULL); + return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL); } /* @@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { - error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); } else { @@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, } if (acl) { if (!error) - error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); } else { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 42fbdcf0ccc9..cfdc41f87f5d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -844,7 +844,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(mnt_userns, inode, stat); /* we need to show initial sectors used for inline_data/dentries */ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || @@ -904,7 +904,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, !f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(mnt_userns, dentry, attr); if (err) return err; @@ -980,10 +980,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, spin_unlock(&F2FS_I(inode)->i_size_lock); } - __setattr_copy(&init_user_ns, inode, attr); + __setattr_copy(mnt_userns, inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode)); + err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode)); if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) @@ -1989,11 +1989,12 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2058,9 +2059,10 @@ out: static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2100,9 +2102,10 @@ err_out: static int f2fs_ioc_start_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2135,9 +2138,10 @@ out: static int f2fs_ioc_release_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2164,9 +2168,10 @@ out: static int f2fs_ioc_abort_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0347c5780910..2b23a76bdae9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,7 +22,8 @@ #include "acl.h" #include -static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, + struct inode *dir, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); nid_t ino; @@ -46,7 +47,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(mnt_userns, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; @@ -67,7 +68,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -349,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -679,7 +680,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -750,7 +751,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFDIR | mode); + inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -807,7 +808,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -834,8 +835,9 @@ out: return err; } -static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, + struct inode **whiteout) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -845,7 +847,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -909,20 +911,22 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - return __f2fs_tmpfile(dir, dentry, mode, NULL); + return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL); } -static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) +static int f2fs_create_whiteout(struct user_namespace *mnt_userns, + struct inode *dir, struct inode **whiteout) { if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) return -EIO; - return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); + return __f2fs_tmpfile(mnt_userns, dir, NULL, + S_IFCHR | WHITEOUT_MODE, whiteout); } -static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); @@ -960,7 +964,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); + err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout); if (err) return err; } @@ -1300,7 +1304,8 @@ static int f2fs_rename2(struct user_namespace *mnt_userns, * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); + return f2fs_rename(mnt_userns, old_dir, old_dentry, + new_dir, new_dentry, flags); } static const char *f2fs_encrypted_get_link(struct dentry *dentry, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f816d7d1987d..22fb4d3b1170 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4539,7 +4539,7 @@ static struct file_system_type f2fs_fs_type = { .name = "f2fs", .mount = f2fs_mount, .kill_sb = kill_f2fs_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("f2fs"); -- cgit v1.2.3-59-g8ed1b From 2fef99b8372c1ae3d8445ab570e888b5a358dbe9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 11 Feb 2022 18:56:46 -0800 Subject: f2fs: fix missing free nid in f2fs_handle_failed_inode This patch fixes xfstests/generic/475 failure. [ 293.680694] F2FS-fs (dm-1): May loss orphan inode, run fsck to fix. [ 293.685358] Buffer I/O error on dev dm-1, logical block 8388592, async page read [ 293.691527] Buffer I/O error on dev dm-1, logical block 8388592, async page read [ 293.691764] sh (7615): drop_caches: 3 [ 293.691819] sh (7616): drop_caches: 3 [ 293.694017] Buffer I/O error on dev dm-1, logical block 1, async page read [ 293.695659] sh (7618): drop_caches: 3 [ 293.696979] sh (7617): drop_caches: 3 [ 293.700290] sh (7623): drop_caches: 3 [ 293.708621] sh (7626): drop_caches: 3 [ 293.711386] sh (7628): drop_caches: 3 [ 293.711825] sh (7627): drop_caches: 3 [ 293.716738] sh (7630): drop_caches: 3 [ 293.719613] sh (7632): drop_caches: 3 [ 293.720971] sh (7633): drop_caches: 3 [ 293.727741] sh (7634): drop_caches: 3 [ 293.730783] sh (7636): drop_caches: 3 [ 293.732681] sh (7635): drop_caches: 3 [ 293.732988] sh (7637): drop_caches: 3 [ 293.738836] sh (7639): drop_caches: 3 [ 293.740568] sh (7641): drop_caches: 3 [ 293.743053] sh (7640): drop_caches: 3 [ 293.821889] ------------[ cut here ]------------ [ 293.824654] kernel BUG at fs/f2fs/node.c:3334! [ 293.826226] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 293.828713] CPU: 0 PID: 7653 Comm: umount Tainted: G OE 5.17.0-rc1-custom #1 [ 293.830946] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 293.832526] RIP: 0010:f2fs_destroy_node_manager+0x33f/0x350 [f2fs] [ 293.833905] Code: e8 d6 3d f9 f9 48 8b 45 d0 65 48 2b 04 25 28 00 00 00 75 1a 48 81 c4 28 03 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b [ 293.837783] RSP: 0018:ffffb04ec31e7a20 EFLAGS: 00010202 [ 293.839062] RAX: 0000000000000001 RBX: ffff9df947db2eb8 RCX: 0000000080aa0072 [ 293.840666] RDX: 0000000000000000 RSI: ffffe86c0432a140 RDI: ffffffffc0b72a21 [ 293.842261] RBP: ffffb04ec31e7d70 R08: ffff9df94ca85780 R09: 0000000080aa0072 [ 293.843909] R10: ffff9df94ca85700 R11: ffff9df94e1ccf58 R12: ffff9df947db2e00 [ 293.845594] R13: ffff9df947db2ed0 R14: ffff9df947db2eb8 R15: ffff9df947db2eb8 [ 293.847855] FS: 00007f5a97379800(0000) GS:ffff9dfa77c00000(0000) knlGS:0000000000000000 [ 293.850647] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 293.852940] CR2: 00007f5a97528730 CR3: 000000010bc76005 CR4: 0000000000370ef0 [ 293.854680] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 293.856423] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 293.858380] Call Trace: [ 293.859302] [ 293.860311] ? ttwu_do_wakeup+0x1c/0x170 [ 293.861800] ? ttwu_do_activate+0x6d/0xb0 [ 293.863057] ? _raw_spin_unlock_irqrestore+0x29/0x40 [ 293.864411] ? try_to_wake_up+0x9d/0x5e0 [ 293.865618] ? debug_smp_processor_id+0x17/0x20 [ 293.866934] ? debug_smp_processor_id+0x17/0x20 [ 293.868223] ? free_unref_page+0xbf/0x120 [ 293.869470] ? __free_slab+0xcb/0x1c0 [ 293.870614] ? preempt_count_add+0x7a/0xc0 [ 293.871811] ? __slab_free+0xa0/0x2d0 [ 293.872918] ? __wake_up_common_lock+0x8a/0xc0 [ 293.874186] ? __slab_free+0xa0/0x2d0 [ 293.875305] ? free_inode_nonrcu+0x20/0x20 [ 293.876466] ? free_inode_nonrcu+0x20/0x20 [ 293.877650] ? debug_smp_processor_id+0x17/0x20 [ 293.878949] ? call_rcu+0x11a/0x240 [ 293.880060] ? f2fs_destroy_stats+0x59/0x60 [f2fs] [ 293.881437] ? kfree+0x1fe/0x230 [ 293.882674] f2fs_put_super+0x160/0x390 [f2fs] [ 293.883978] generic_shutdown_super+0x7a/0x120 [ 293.885274] kill_block_super+0x27/0x50 [ 293.886496] kill_f2fs_super+0x7f/0x100 [f2fs] [ 293.887806] deactivate_locked_super+0x35/0xa0 [ 293.889271] deactivate_super+0x40/0x50 [ 293.890513] cleanup_mnt+0x139/0x190 [ 293.891689] __cleanup_mnt+0x12/0x20 [ 293.892850] task_work_run+0x64/0xa0 [ 293.894035] exit_to_user_mode_prepare+0x1b7/0x1c0 [ 293.895409] syscall_exit_to_user_mode+0x27/0x50 [ 293.896872] do_syscall_64+0x48/0xc0 [ 293.898090] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 293.899517] RIP: 0033:0x7f5a975cd25b Fixes: 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0ec8e32a00b4..ab8e0c06c78c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -885,6 +885,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } -- cgit v1.2.3-59-g8ed1b From c7f91bd4102902384137dd5c50d04bfed27050dd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Feb 2022 10:43:13 -0800 Subject: f2fs: Restore rwsem lockdep support Lockdep uses lock class keys in its analysis. init_rwsem() instantiates one lock class key with each init_rwsem() user as follows: #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ __init_rwsem((sem), #sem, &__key); \ } while (0) Commit e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems") reduced the number of lock class keys from one per init_rwsem() user to one per file in which init_f2fs_rwsem() is used. This causes the same lock class key to be associated with multiple f2fs rwsems and also triggers a number of false positive lockdep deadlock reports. Fix this by again instantiating one lock class key with each init_f2fs_rwsem() caller. Cc: Tim Murray Reported-by: syzbot+0b9cadf5fc45a98a5083@syzkaller.appspotmail.com Fixes: e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems") Signed-off-by: Bart Van Assche Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9515c3c54fd..47bf9e30913f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2115,9 +2115,17 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } -static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem) +#define init_f2fs_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_f2fs_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key) { - init_rwsem(&sem->internal_rwsem); + __init_rwsem(&sem->internal_rwsem, sem_name, key); init_waitqueue_head(&sem->read_waiters); } -- cgit v1.2.3-59-g8ed1b From 680af5b824a52faa819167628665804a14f0e0df Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Tue, 15 Feb 2022 17:27:21 +0900 Subject: f2fs: quota: fix loop condition at f2fs_quota_sync() cnt should be passed to sb_has_quota_active() instead of type to check active quota properly. Moreover, when the type is -1, the compiler with enough inline knowledge can discard sb_has_quota_active() check altogether, causing a NULL pointer dereference at the following inode_lock(dqopt->files[cnt]): [ 2.796010] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 2.796024] Mem abort info: [ 2.796025] ESR = 0x96000005 [ 2.796028] EC = 0x25: DABT (current EL), IL = 32 bits [ 2.796029] SET = 0, FnV = 0 [ 2.796031] EA = 0, S1PTW = 0 [ 2.796032] Data abort info: [ 2.796034] ISV = 0, ISS = 0x00000005 [ 2.796035] CM = 0, WnR = 0 [ 2.796046] user pgtable: 4k pages, 39-bit VAs, pgdp=00000003370d1000 [ 2.796048] [00000000000000a0] pgd=0000000000000000, pud=0000000000000000 [ 2.796051] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 2.796056] CPU: 7 PID: 640 Comm: f2fs_ckpt-259:7 Tainted: G S 5.4.179-arter97-r8-64666-g2f16e087f9d8 #1 [ 2.796057] Hardware name: Qualcomm Technologies, Inc. Lahaina MTP lemonadep (DT) [ 2.796059] pstate: 80c00005 (Nzcv daif +PAN +UAO) [ 2.796065] pc : down_write+0x28/0x70 [ 2.796070] lr : f2fs_quota_sync+0x100/0x294 [ 2.796071] sp : ffffffa3f48ffc30 [ 2.796073] x29: ffffffa3f48ffc30 x28: 0000000000000000 [ 2.796075] x27: ffffffa3f6d718b8 x26: ffffffa415fe9d80 [ 2.796077] x25: ffffffa3f7290048 x24: 0000000000000001 [ 2.796078] x23: 0000000000000000 x22: ffffffa3f7290000 [ 2.796080] x21: ffffffa3f72904a0 x20: ffffffa3f7290110 [ 2.796081] x19: ffffffa3f77a9800 x18: ffffffc020aae038 [ 2.796083] x17: ffffffa40e38e040 x16: ffffffa40e38e6d0 [ 2.796085] x15: ffffffa40e38e6cc x14: ffffffa40e38e6d0 [ 2.796086] x13: 00000000000004f6 x12: 00162c44ff493000 [ 2.796088] x11: 0000000000000400 x10: ffffffa40e38c948 [ 2.796090] x9 : 0000000000000000 x8 : 00000000000000a0 [ 2.796091] x7 : 0000000000000000 x6 : 0000d1060f00002a [ 2.796093] x5 : ffffffa3f48ff718 x4 : 000000000000000d [ 2.796094] x3 : 00000000060c0000 x2 : 0000000000000001 [ 2.796096] x1 : 0000000000000000 x0 : 00000000000000a0 [ 2.796098] Call trace: [ 2.796100] down_write+0x28/0x70 [ 2.796102] f2fs_quota_sync+0x100/0x294 [ 2.796104] block_operations+0x120/0x204 [ 2.796106] f2fs_write_checkpoint+0x11c/0x520 [ 2.796107] __checkpoint_and_complete_reqs+0x7c/0xd34 [ 2.796109] issue_checkpoint_thread+0x6c/0xb8 [ 2.796112] kthread+0x138/0x414 [ 2.796114] ret_from_fork+0x10/0x18 [ 2.796117] Code: aa0803e0 aa1f03e1 52800022 aa0103e9 (c8e97d02) [ 2.796120] ---[ end trace 96e942e8eb6a0b53 ]--- [ 2.800116] Kernel panic - not syncing: Fatal exception [ 2.800120] SMP: stopping secondary CPUs Fixes: 9de71ede81e6 ("f2fs: quota: fix potential deadlock") Cc: # v5.15+ Signed-off-by: Juhyung Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 22fb4d3b1170..8e3840973077 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2689,7 +2689,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2700,8 +2700,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); -- cgit v1.2.3-59-g8ed1b From 344150999b7fc88502a65bbb147a47503eca2033 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jan 2022 13:44:49 +0800 Subject: f2fs: fix to avoid potential deadlock Quoted from Jing Xia's report, there is a potential deadlock may happen between kworker and checkpoint as below: [T:writeback] [T:checkpoint] - wb_writeback - blk_start_plug bio contains NodeA was plugged in writeback threads - do_writepages -- sync write inodeB, inc wb_sync_req[DATA] - f2fs_write_data_pages - f2fs_write_single_data_page -- write last dirty page - f2fs_do_write_data_page - set_page_writeback -- clear page dirty flag and PAGECACHE_TAG_DIRTY tag in radix tree - f2fs_outplace_write_data - f2fs_update_data_blkaddr - f2fs_wait_on_page_writeback -- wait NodeA to writeback here - inode_dec_dirty_pages - writeback_sb_inodes - writeback_single_inode - do_writepages - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA] - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped - blk_finish_plug Let's try to avoid deadlock condition by forcing unplugging previous bio via blk_finish_plug(current->plug) once we'v skipped writeback in writepages() due to valid sbi->wb_sync_req[DATA/NODE]. Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE") Signed-off-by: Zhiguo Niu Signed-off-by: Jing Xia Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/node.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6b5f389ba998..b09f401f8960 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3169,8 +3169,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0d9883457579..fe8c38c9ac77 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2112,8 +2112,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); -- cgit v1.2.3-59-g8ed1b From f41ee8b91c00770d718be2ff4852a80017ae9ab3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Mar 2022 09:49:13 +0800 Subject: f2fs: fix to do sanity check on curseg->alloc_type As Wenqing Liu reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215657 - Overview UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2 when mount and operate a corrupted image - Reproduce tested on kernel 5.17-rc4, 5.17-rc6 1. mkdir test_crash 2. cd test_crash 3. unzip tmp2.zip 4. mkdir mnt 5. ./single_test.sh f2fs 2 - Kernel dump [ 46.434454] loop0: detected capacity change from 0 to 131072 [ 46.529839] F2FS-fs (loop0): Mounted with checkpoint version = 7548c2d9 [ 46.738319] ================================================================================ [ 46.738412] UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2 [ 46.738475] index 231 is out of range for type 'unsigned int [2]' [ 46.738539] CPU: 2 PID: 939 Comm: umount Not tainted 5.17.0-rc6 #1 [ 46.738547] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 46.738551] Call Trace: [ 46.738556] [ 46.738563] dump_stack_lvl+0x47/0x5c [ 46.738581] ubsan_epilogue+0x5/0x50 [ 46.738592] __ubsan_handle_out_of_bounds+0x68/0x80 [ 46.738604] f2fs_allocate_data_block+0xdff/0xe60 [f2fs] [ 46.738819] do_write_page+0xef/0x210 [f2fs] [ 46.738934] f2fs_do_write_node_page+0x3f/0x80 [f2fs] [ 46.739038] __write_node_page+0x2b7/0x920 [f2fs] [ 46.739162] f2fs_sync_node_pages+0x943/0xb00 [f2fs] [ 46.739293] f2fs_write_checkpoint+0x7bb/0x1030 [f2fs] [ 46.739405] kill_f2fs_super+0x125/0x150 [f2fs] [ 46.739507] deactivate_locked_super+0x60/0xc0 [ 46.739517] deactivate_super+0x70/0xb0 [ 46.739524] cleanup_mnt+0x11a/0x200 [ 46.739532] __cleanup_mnt+0x16/0x20 [ 46.739538] task_work_run+0x67/0xa0 [ 46.739547] exit_to_user_mode_prepare+0x18c/0x1a0 [ 46.739559] syscall_exit_to_user_mode+0x26/0x40 [ 46.739568] do_syscall_64+0x46/0xb0 [ 46.739584] entry_SYSCALL_64_after_hwframe+0x44/0xae The root cause is we missed to do sanity check on curseg->alloc_type, result in out-of-bound accessing on sbi->block_count[] array, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 56211e201d51..012524db7437 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4793,6 +4793,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) sanity_check_seg_type(sbi, curseg->seg_type); + if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { + f2fs_err(sbi, + "Current segment has invalid alloc_type:%d", + curseg->alloc_type); + return -EFSCORRUPTED; + } + if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out; -- cgit v1.2.3-59-g8ed1b From 50c63009f6ab37eb78d8b4f9bc606a12b2b46505 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 2 Mar 2022 11:39:08 -0800 Subject: f2fs: avoid an infinite loop in f2fs_sync_dirty_inodes If one read IO is always failing, we can fall into an infinite loop in f2fs_sync_dirty_inodes. This happens during xfstests/generic/475. [ 142.803335] Buffer I/O error on dev dm-1, logical block 8388592, async page read ... [ 382.887210] submit_bio_noacct+0xdd/0x2a0 [ 382.887213] submit_bio+0x80/0x110 [ 382.887223] __submit_bio+0x4d/0x300 [f2fs] [ 382.887282] f2fs_submit_page_bio+0x125/0x200 [f2fs] [ 382.887299] __get_meta_page+0xc9/0x280 [f2fs] [ 382.887315] f2fs_get_meta_page+0x13/0x20 [f2fs] [ 382.887331] f2fs_get_node_info+0x317/0x3c0 [f2fs] [ 382.887350] f2fs_do_write_data_page+0x327/0x6f0 [f2fs] [ 382.887367] f2fs_write_single_data_page+0x5b7/0x960 [f2fs] [ 382.887386] f2fs_write_cache_pages+0x302/0x890 [f2fs] [ 382.887405] ? preempt_count_add+0x7a/0xc0 [ 382.887408] f2fs_write_data_pages+0xfd/0x320 [f2fs] [ 382.887425] ? _raw_spin_unlock+0x1a/0x30 [ 382.887428] do_writepages+0xd3/0x1d0 [ 382.887432] filemap_fdatawrite_wbc+0x69/0x90 [ 382.887434] filemap_fdatawrite+0x50/0x70 [ 382.887437] f2fs_sync_dirty_inodes+0xa4/0x270 [f2fs] [ 382.887453] f2fs_write_checkpoint+0x189/0x1640 [f2fs] [ 382.887469] ? schedule_timeout+0x114/0x150 [ 382.887471] ? ttwu_do_activate+0x6d/0xb0 [ 382.887473] ? preempt_count_add+0x7a/0xc0 [ 382.887476] kill_f2fs_super+0xca/0x100 [f2fs] [ 382.887491] deactivate_locked_super+0x35/0xa0 [ 382.887494] deactivate_super+0x40/0x50 [ 382.887497] cleanup_mnt+0x139/0x190 [ 382.887499] __cleanup_mnt+0x12/0x20 [ 382.887501] task_work_run+0x64/0xa0 [ 382.887505] exit_to_user_mode_prepare+0x1b7/0x1c0 [ 382.887508] syscall_exit_to_user_mode+0x27/0x50 [ 382.887510] do_syscall_64+0x48/0xc0 [ 382.887513] entry_SYSCALL_64_after_hwframe+0x44/0xae Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++++ fs/f2fs/f2fs.h | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 203a1577942d..871eee35a32f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -98,6 +98,13 @@ repeat: } if (unlikely(!PageUptodate(page))) { + if (page->index == sbi->metapage_eio_ofs && + sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) { + set_ckpt_flags(sbi, CP_ERROR_FLAG); + } else { + sbi->metapage_eio_ofs = page->index; + sbi->metapage_eio_cnt = 0; + } f2fs_put_page(page, 1); return ERR_PTR(-EIO); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 47bf9e30913f..efc4f1fe2ffd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -577,6 +577,9 @@ enum { /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 +/* maximum retry of EIO'ed meta page */ +#define MAX_RETRY_META_PAGE_EIO 100 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -1614,6 +1617,8 @@ struct f2fs_sb_info { /* keep migration IO order for LFS mode */ struct f2fs_rwsem io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ + pgoff_t metapage_eio_ofs; /* EIO page offset */ + int metapage_eio_cnt; /* EIO count */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ -- cgit v1.2.3-59-g8ed1b From 7f8e249dccc4c530cf26c2c091f5bb64e701c262 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Mar 2022 13:21:27 -0800 Subject: f2fs: introduce F2FS_UNFAIR_RWSEM to support unfair rwsem Unfair rwsem should be used when blk-cg is on. Otherwise, there is regression. FYI, we noticed a -26.7% regression of aim7.jobs-per-min due to commit: commit: e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae ("f2fs: move f2fs to use reader-unfair rwsems") https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master in testcase: aim7 on test machine: 88 threads 2 sockets Intel(R) Xeon(R) Gold 6238M CPU @ 2.10GHz with 128G memory with following parameters: disk: 4BRD_12G md: RAID0 fs: f2fs test: sync_disk_rw load: 100 cpufreq_governor: performance ucode: 0x500320a test-description: AIM7 is a traditional UNIX system level benchmark suite which is used to test and measure the performance of multiuser system. test-url: https://sourceforge.net/projects/aimbench/files/aim-suite7/ Reported-by: kernel test robot Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 7 +++++++ fs/f2fs/f2fs.h | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index f46a7339d6cf..03ef087537c7 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -143,3 +143,10 @@ config F2FS_IOSTAT Support getting IO statistics through sysfs and printing out periodic IO statistics tracepoint events. You have to turn on "iostat_enable" sysfs node to enable this feature. + +config F2FS_UNFAIR_RWSEM + bool "F2FS unfair rw_semaphore" + depends on F2FS_FS && BLK_CGROUP + help + Use unfair rw_semaphore, if system configured IO priority by block + cgroup. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index efc4f1fe2ffd..68d791ec8b27 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -132,7 +132,9 @@ typedef u32 nid_t; struct f2fs_rwsem { struct rw_semaphore internal_rwsem; +#ifdef CONFIG_F2FS_UNFAIR_RWSEM wait_queue_head_t read_waiters; +#endif }; struct f2fs_mount_info { @@ -2131,7 +2133,9 @@ static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, const char *sem_name, struct lock_class_key *key) { __init_rwsem(&sem->internal_rwsem, sem_name, key); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM init_waitqueue_head(&sem->read_waiters); +#endif } static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) @@ -2146,7 +2150,11 @@ static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) static inline void f2fs_down_read(struct f2fs_rwsem *sem) { +#ifdef CONFIG_F2FS_UNFAIR_RWSEM wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +#else + down_read(&sem->internal_rwsem); +#endif } static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) @@ -2181,7 +2189,9 @@ static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) static inline void f2fs_up_write(struct f2fs_rwsem *sem) { up_write(&sem->internal_rwsem); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM wake_up_all(&sem->read_waiters); +#endif } static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) -- cgit v1.2.3-59-g8ed1b From d13732cc0cc920ee58d45a38d8472769171aaa1d Mon Sep 17 00:00:00 2001 From: Jia Yang Date: Wed, 9 Mar 2022 19:09:35 +0800 Subject: f2fs: remove unnecessary read for F2FS_FITS_IN_INODE F2FS_FITS_IN_INODE only cares the type of f2fs inode, so there is no need to read node page of f2fs inode. Signed-off-by: Jia Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cfdc41f87f5d..7049be29bc2e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -812,7 +812,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, { struct inode *inode = d_inode(path->dentry); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri; + struct f2fs_inode *ri = NULL; unsigned int flags; if (f2fs_has_extra_attr(inode) && @@ -2999,7 +2999,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *ipage; + struct f2fs_inode *ri = NULL; kprojid_t kprojid; int err; @@ -3023,17 +3023,8 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) if (IS_NOQUOTA(inode)) return err; - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - - if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize, - i_projid)) { - err = -EOVERFLOW; - f2fs_put_page(ipage, 1); - return err; - } - f2fs_put_page(ipage, 1); + if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) + return -EOVERFLOW; err = f2fs_dquot_initialize(inode); if (err) -- cgit v1.2.3-59-g8ed1b From ba900534f807f0b327c92d5141c85d2313e2d55c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Mar 2022 09:40:05 -0800 Subject: f2fs: don't get FREEZE lock in f2fs_evict_inode in frozen fs Let's purge inode cache in order to avoid the below deadlock. [freeze test] shrinkder freeze_super - pwercpu_down_write(SB_FREEZE_FS) - super_cache_scan - down_read(&sb->s_umount) - prune_icache_sb - dispose_list - evict - f2fs_evict_inode thaw_super - down_write(&sb->s_umount); - __percpu_down_read(SB_FREEZE_FS) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 1 + fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 6 ++++-- fs/f2fs/super.c | 4 ++++ 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 58bf0dc83712..5a5f3c5445f6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -458,6 +458,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= What: /sys/fs/f2fs//ckpt_thread_ioprio diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 9a13902c7702..cba5eab24595 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -338,6 +338,7 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", }; static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68d791ec8b27..da729f53daa8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1293,6 +1293,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ab8e0c06c78c..71f232dcf3c2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -809,7 +810,8 @@ retry: if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8e3840973077..4b570b5c2674 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } -- cgit v1.2.3-59-g8ed1b From 98237fcda4a24e67b0a4498c17d5aa4ad4537bc7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 Mar 2022 17:27:46 -0800 Subject: f2fs: use spin_lock to avoid hang [14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004 [14696.638285] Call Trace: [14696.639038] [14696.640032] __schedule+0x302/0x930 [14696.640969] schedule+0x58/0xd0 [14696.641799] schedule_preempt_disabled+0x18/0x30 [14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0 [14696.644035] ? mod_objcg_state+0x10c/0x310 [14696.645040] ? obj_cgroup_charge+0xe1/0x170 [14696.646067] __mutex_lock_slowpath+0x13/0x20 [14696.647126] mutex_lock+0x34/0x40 [14696.648070] stat_show+0x25/0x17c0 [f2fs] [14696.649218] seq_read_iter+0x120/0x4b0 [14696.650289] ? aa_file_perm+0x12a/0x500 [14696.651357] ? lru_cache_add+0x1c/0x20 [14696.652470] seq_read+0xfd/0x140 [14696.653445] full_proxy_read+0x5c/0x80 [14696.654535] vfs_read+0xa0/0x1a0 [14696.655497] ksys_read+0x67/0xe0 [14696.656502] __x64_sys_read+0x1a/0x20 [14696.657580] do_syscall_64+0x3b/0xc0 [14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae [14696.660068] RIP: 0033:0x7efe39df1cb2 [14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2 [14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003 [14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010 [14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000 [14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [14696.671965] [14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000 [14696.674930] Call Trace: [14696.675903] [14696.676780] __schedule+0x302/0x930 [14696.677927] schedule+0x58/0xd0 [14696.679019] schedule_preempt_disabled+0x18/0x30 [14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0 [14696.681783] ? destroy_inode+0x65/0x80 [14696.683006] __mutex_lock_slowpath+0x13/0x20 [14696.684305] mutex_lock+0x34/0x40 [14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs] [14696.686803] f2fs_put_super+0x158/0x390 [f2fs] [14696.688238] generic_shutdown_super+0x7a/0x120 [14696.689621] kill_block_super+0x27/0x50 [14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs] [14696.692311] deactivate_locked_super+0x35/0xa0 [14696.693698] deactivate_super+0x40/0x50 [14696.694985] cleanup_mnt+0x139/0x190 [14696.696209] __cleanup_mnt+0x12/0x20 [14696.697390] task_work_run+0x64/0xa0 [14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0 [14696.700053] syscall_exit_to_user_mode+0x27/0x50 [14696.701418] do_syscall_64+0x48/0xc0 [14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index cba5eab24595..6d26872c7364 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_MUTEX(f2fs_stat_mutex); +static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -345,8 +345,9 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); @@ -577,7 +578,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -588,6 +589,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -623,9 +625,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -633,10 +635,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); kfree(si); } -- cgit v1.2.3-59-g8ed1b From 646f64b576f7a80af0dd555e25c79ee8af058681 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Mon, 14 Mar 2022 15:15:15 +0800 Subject: f2fs: remove redundant parameter judgment iput() has already judged the incoming parameter, so there is no need to repeat the judgment here. Signed-off-by: Wang Xiaojun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2b23a76bdae9..906e9e301ac8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1111,8 +1111,7 @@ out_dir: out_old: f2fs_put_page(old_page, 0); out: - if (whiteout) - iput(whiteout); + iput(whiteout); return err; } -- cgit v1.2.3-59-g8ed1b From d284af43f703760e261b1601378a0c13a19d5f1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Mar 2022 18:20:00 +0800 Subject: f2fs: compress: fix to print raw data size in error path of lz4 decompression In lz4_decompress_pages(), if size of decompressed data is not equal to expected one, we should print the size rather than size of target buffer for decompressed data, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 67bac2792e57..11e99bf6286c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, + F2FS_I_SB(dic->inode)->sb->s_id, ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } -- cgit v1.2.3-59-g8ed1b From d98af5f4552058a5c22030641ef79cee92c61f54 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 15 Mar 2022 21:14:14 -0700 Subject: f2fs: introduce gc_urgent_mid mode We need a mid level of gc urgent mode to do GC forcibly in a period of given gc_urgent_sleep_time, but not like using greedy GC approach and switching to SSR mode such as gc urgent high mode. This can be used for more aggressive periodic storage clean up. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 17 +++++++++++------ fs/f2fs/debug.c | 4 +++- fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/gc.c | 3 +++ fs/f2fs/sysfs.c | 7 +++++++ 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 5a5f3c5445f6..9b583dd0298b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -297,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily What: /sys/fs/f2fs//gc_urgent Date: August 2017 Contact: "Jaegeuk Kim" -Description: Do background GC aggressively when set. When gc_urgent = 1, - background thread starts to do GC by given gc_urgent_sleep_time - interval. When gc_urgent = 2, F2FS will lower the bar of - checking idle in order to process outstanding discard commands - and GC a little bit aggressively. It is set to 0 by default. +Description: Do background GC aggressively when set. Set to 0 by default. + gc urgent high(1): does GC forcibly in a period of given + gc_urgent_sleep_time and ignores I/O idling check. uses greedy + GC approach and turns SSR mode on. + gc urgent low(2): lowers the bar of checking I/O idling in + order to process outstanding discard commands and GC a + little bit aggressively. uses cost benefit GC approach. + gc urgent mid(3): does GC forcibly in a period of given + gc_urgent_sleep_time and executes a mid level of I/O idling check. + uses cost benefit GC approach. What: /sys/fs/f2fs//gc_urgent_sleep_time Date: August 2017 @@ -532,7 +537,7 @@ Date: July 2021 Contact: "Daeho Jeong" Description: Show how many segments have been reclaimed by GC during a specific GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy, - 3: GC idle AT, 4: GC urgent high, 5: GC urgent low) + 3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid) You can re-initialize this value to "0". What: /sys/fs/f2fs//gc_segment_mode diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 6d26872c7364..fcdf253cd211 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -476,12 +476,14 @@ static int stat_show(struct seq_file *s, void *v) si->node_segs, si->bg_node_segs); seq_printf(s, " - Reclaimed segs : Normal (%d), Idle CB (%d), " "Idle Greedy (%d), Idle AT (%d), " - "Urgent High (%d), Urgent Low (%d)\n", + "Urgent High (%d), Urgent Mid (%d), " + "Urgent Low (%d)\n", si->sbi->gc_reclaimed_segs[GC_NORMAL], si->sbi->gc_reclaimed_segs[GC_IDLE_CB], si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY], si->sbi->gc_reclaimed_segs[GC_IDLE_AT], si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH], + si->sbi->gc_reclaimed_segs[GC_URGENT_MID], si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, si->bg_data_blks + si->bg_node_blks); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da729f53daa8..3016b6354a68 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1313,6 +1313,7 @@ enum { GC_IDLE_AT, GC_URGENT_HIGH, GC_URGENT_LOW, + GC_URGENT_MID, MAX_GC_MODE, }; @@ -2784,6 +2785,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) if (is_inflight_io(sbi, type)) return false; + if (sbi->gc_mode == GC_URGENT_MID) + return true; + if (sbi->gc_mode == GC_URGENT_LOW && (type == DISCARD_TIME || type == GC_TIME)) return true; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2d53ef121e76..ea5b93b689cd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -103,7 +103,10 @@ static int gc_thread_func(void *data) sbi->gc_urgent_high_remaining--; } spin_unlock(&sbi->gc_urgent_high_lock); + } + if (sbi->gc_mode == GC_URGENT_HIGH || + sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; f2fs_down_write(&sbi->gc_lock); goto do_gc; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 47efcf233afd..fe29bcb70f46 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -468,6 +468,13 @@ out: } } else if (t == 2) { sbi->gc_mode = GC_URGENT_LOW; + } else if (t == 3) { + sbi->gc_mode = GC_URGENT_MID; + if (sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all( + &sbi->gc_thread->gc_wait_queue_head); + } } else { return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From c86868bbc22be9487d10d3c6336dd8ccb49e8a62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Mar 2022 16:33:15 +0800 Subject: f2fs: initialize sbi->gc_mode explicitly It needs to initialized sbi->gc_mode to GC_NORMAL explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4b570b5c2674..9176597fdf94 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3579,6 +3579,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->gc_mode = GC_NORMAL; sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; -- cgit v1.2.3-59-g8ed1b From 9b56adcf525522e9ffa52471260298d91fc1d395 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Fri, 18 Mar 2022 09:23:04 +0800 Subject: f2fs: fix compressed file start atomic write may cause data corruption When compressed file has blocks, f2fs_ioc_start_atomic_write will succeed, but compressed flag will be remained in inode. If write partial compreseed cluster and commit atomic write will cause data corruption. This is the reproduction process: Step 1: create a compressed file ,write 64K data , call fsync(), then the blocks are write as compressed cluster. Step2: iotcl(F2FS_IOC_START_ATOMIC_WRITE) --- this should be fail, but not. write page 0 and page 3. iotcl(F2FS_IOC_COMMIT_ATOMIC_WRITE) -- page 0 and 3 write as normal file, Step3: drop cache. read page 0-4 -- Since page 0 has a valid block address, read as non-compressed cluster, page 1 and 2 will be filled with compressed data or zero. The root cause is, after commit 7eab7a696827 ("f2fs: compress: remove unneeded read when rewrite whole cluster"), in step 2, f2fs_write_begin() only set target page dirty, and in f2fs_commit_inmem_pages(), we will write partial raw pages into compressed cluster, result in corrupting compressed cluster layout. Fixes: 4c8ff7095bef ("f2fs: support data compression") Fixes: 7eab7a696827 ("f2fs: compress: remove unneeded read when rewrite whole cluster") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Fengnan Chang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b09f401f8960..5675af1b6916 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3363,7 +3363,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *fsdata = NULL; - if (len == PAGE_SIZE) + if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; ret = f2fs_prepare_compress_overwrite(inode, pagep, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7049be29bc2e..68ddf4c7ca64 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2009,7 +2009,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - f2fs_disable_compressed_file(inode); + if (!f2fs_disable_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) -- cgit v1.2.3-59-g8ed1b From 98e92867b99761979f6d4c155b7d5a5b523412a3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Mar 2022 17:02:30 +0800 Subject: f2fs: use aggressive GC policy during f2fs_disable_checkpoint() Let's enable GC_URGENT_HIGH mode during f2fs_disable_checkpoint(), so that we can use SSR allocator for GCed data/node persistence, it can improve the performance due to it avoiding migration of data/node locates in selected target segment of SSR allocator. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9176597fdf94..caceb80b87ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2080,6 +2080,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; + unsigned int gc_mode; int err = 0; int ret; block_t unusable; @@ -2092,6 +2093,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); + gc_mode = sbi->gc_mode; + sbi->gc_mode = GC_URGENT_HIGH; + while (!f2fs_time_over(sbi, DISABLE_TIME)) { f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); @@ -2129,6 +2133,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: f2fs_up_write(&sbi->gc_lock); restore_flag: + sbi->gc_mode = gc_mode; sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } -- cgit v1.2.3-59-g8ed1b From e60aeb2dee1a4c28591b8f97d4248787aed10769 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 18 Mar 2022 12:13:23 -0700 Subject: f2fs: make gc_urgent and gc_segment_mode sysfs node readable Changed a way of showing values of them to use strings. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fe29bcb70f46..fb950a13375f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -41,6 +41,16 @@ enum { ATGC_INFO, /* struct atgc_management */ }; +static const char *gc_mode_names[MAX_GC_MODE] = { + "GC_NORMAL", + "GC_IDLE_CB", + "GC_IDLE_GREEDY", + "GC_IDLE_AT", + "GC_URGENT_HIGH", + "GC_URGENT_LOW", + "GC_URGENT_MID" +}; + struct f2fs_attr { struct attribute attr; ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); @@ -316,8 +326,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return sysfs_emit(buf, "%u\n", sbi->compr_new_inode); #endif + if (!strcmp(a->attr.name, "gc_urgent")) + return sysfs_emit(buf, "%s\n", + gc_mode_names[sbi->gc_mode]); + if (!strcmp(a->attr.name, "gc_segment_mode")) - return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode); + return sysfs_emit(buf, "%s\n", + gc_mode_names[sbi->gc_segment_mode]); if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { return sysfs_emit(buf, "%u\n", -- cgit v1.2.3-59-g8ed1b From 5b5b4f85b01604389f7a0f11ef180a725bf0e2d4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Mar 2022 23:22:11 +0800 Subject: f2fs: fix to do sanity check on .cp_pack_total_block_count As bughunter reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215709 f2fs may hang when mounting a fuzzed image, the dmesg shows as below: __filemap_get_folio+0x3a9/0x590 pagecache_get_page+0x18/0x60 __get_meta_page+0x95/0x460 [f2fs] get_checkpoint_version+0x2a/0x1e0 [f2fs] validate_checkpoint+0x8e/0x2a0 [f2fs] f2fs_get_valid_checkpoint+0xd0/0x620 [f2fs] f2fs_fill_super+0xc01/0x1d40 [f2fs] mount_bdev+0x18a/0x1c0 f2fs_mount+0x15/0x20 [f2fs] legacy_get_tree+0x28/0x50 vfs_get_tree+0x27/0xc0 path_mount+0x480/0xaa0 do_mount+0x7c/0xa0 __x64_sys_mount+0x8b/0xe0 do_syscall_64+0x38/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae The root cause is cp_pack_total_block_count field in checkpoint was fuzzed to one, as calcuated, two cp pack block locates in the same block address, so then read latter cp pack block, it will block on the page lock due to the lock has already held when reading previous cp pack block, fix it by adding sanity check for cp_pack_total_block_count. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 871eee35a32f..aba1b8a1ce66 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -875,6 +875,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -882,15 +883,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) -- cgit v1.2.3-59-g8ed1b