From 787c7b8cb3c5196f77e4682e0b1c71375e74822c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Oct 2015 09:13:04 +0800 Subject: f2fs: report error of f2fs_create_root_stats f2fs_create_root_stats can fail due to no memory, report it to user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++++-- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/super.c | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 478e5d54154f..b0966f3b1c9a 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -406,20 +406,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) kfree(si); } -void __init f2fs_create_root_stats(void) +int __init f2fs_create_root_stats(void) { struct dentry *file; f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); if (!f2fs_debugfs_root) - return; + return -ENOMEM; file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); if (!file) { debugfs_remove(f2fs_debugfs_root); f2fs_debugfs_root = NULL; + return -ENOMEM; } + + return 0; } void f2fs_destroy_root_stats(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9db5500d63d9..3f1570c4fcf0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1987,7 +1987,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *); void f2fs_destroy_stats(struct f2fs_sb_info *); -void __init f2fs_create_root_stats(void); +int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) @@ -2015,7 +2015,7 @@ void f2fs_destroy_root_stats(void); static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline void __init f2fs_create_root_stats(void) { } +static inline int __init f2fs_create_root_stats(void) { return 0; } static inline void f2fs_destroy_root_stats(void) { } #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3a65e0132352..67864ab376c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1478,10 +1478,14 @@ static int __init init_f2fs_fs(void) err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; - f2fs_create_root_stats(); + err = f2fs_create_root_stats(); + if (err) + goto free_filesystem; f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; +free_filesystem: + unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_crypto: -- cgit v1.2.3-59-g8ed1b From 2da3e027461ab0148384b02bd5905f1a7b335dff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Oct 2015 17:56:14 +0800 Subject: f2fs: commit atomic written page in LFS mode We should always commit atomic written pages in LFS mode, otherwise data will become corrupted if we encounter suddent power cut after partial pages committed in IPU mode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 972eab7ac071..0cc8de2839c4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1083,6 +1083,7 @@ int do_write_data_page(struct f2fs_io_info *fio) */ if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && + !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { rewrite_data_page(fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); -- cgit v1.2.3-59-g8ed1b From d323d005ac4a2d413128267af76bb9d71f7303da Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Oct 2015 09:53:45 +0800 Subject: f2fs: support file defragment This patch introduces a new ioctl F2FS_IOC_DEFRAGMENT to support file defragment in a specified range of regular file. This ioctl can be used in very limited workload: if user expects high sequential read performance in randomly written file, this interface can be used for defragmentation, after that file can be written as continuous as possible in the device. Meanwhile, it has side-effect, it will make holes in segments where blocks located originally, so it's better to trigger GC to eliminate fragment in segments. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +- fs/f2fs/f2fs.h | 8 +++ fs/f2fs/file.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0cc8de2839c4..c3e1ffa0c8d6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -566,7 +566,7 @@ out: * b. do not use extent cache for better performance * c. give the block addresses to blockdev */ -static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, +int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) { unsigned int maxblocks = map->m_len; @@ -1355,6 +1355,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; + /* skip writing during file defragment */ + if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG)) + goto skip_write; + /* during POR, we don't need to trigger writepage at all. */ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f1570c4fcf0..b01ad514fbd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -234,6 +234,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) +#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) #define F2FS_IOC_SET_ENCRYPTION_POLICY \ _IOR('f', 19, struct f2fs_encryption_policy) @@ -260,6 +261,11 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS #endif +struct f2fs_defragment { + u64 start; + u64 len; +}; + /* * For INODE and NODE manager */ @@ -1416,6 +1422,7 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1847,6 +1854,7 @@ struct page *find_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t, bool); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); +int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a197215ad52b..2f392982c597 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1646,6 +1646,199 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) return 0; } +static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + struct file *filp, + struct f2fs_defragment *range) +{ + struct inode *inode = file_inode(filp); + struct f2fs_map_blocks map; + struct extent_info ei; + pgoff_t pg_start, pg_end; + unsigned int blk_per_seg = 1 << sbi->log_blocks_per_seg; + unsigned int total = 0, sec_num; + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + block_t blk_end = 0; + bool fragmented = false; + int err; + + /* if in-place-update policy is enabled, don't waste time here */ + if (need_inplace_update(inode)) + return -EINVAL; + + pg_start = range->start >> PAGE_CACHE_SHIFT; + pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT; + + f2fs_balance_fs(sbi); + + mutex_lock(&inode->i_mutex); + + /* writeback all dirty pages in the range */ + err = filemap_write_and_wait_range(inode->i_mapping, range->start, + range->start + range->len); + if (err) + goto out; + + /* + * lookup mapping info in extent cache, skip defragmenting if physical + * block addresses are continuous. + */ + if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { + if (ei.fofs + ei.len >= pg_end) + goto out; + } + + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + + /* + * lookup mapping info in dnode page cache, skip defragmenting if all + * physical block addresses are continuous even if there are hole(s) + * in logical blocks. + */ + while (map.m_lblk < pg_end) { + map.m_flags = 0; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + if (err) + goto out; + + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + map.m_lblk++; + map.m_len--; + continue; + } + + if (blk_end && blk_end != map.m_pblk) { + fragmented = true; + break; + } + blk_end = map.m_pblk + map.m_len; + + map.m_lblk += map.m_len; + map.m_len = pg_end - map.m_lblk; + } + + if (!fragmented) + goto out; + + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + + sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + + /* + * make sure there are enough free section for LFS allocation, this can + * avoid defragment running in SSR mode when free section are allocated + * intensively + */ + if (has_not_enough_free_secs(sbi, sec_num)) { + err = -EAGAIN; + goto out; + } + + while (map.m_lblk < pg_end) { + pgoff_t idx; + int cnt = 0; + +do_map: + map.m_flags = 0; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + if (err) + goto clear_out; + + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + map.m_lblk++; + continue; + } + + set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + + idx = map.m_lblk; + while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { + struct page *page; + + page = get_lock_data_page(inode, idx, true); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto clear_out; + } + + set_page_dirty(page); + f2fs_put_page(page, 1); + + idx++; + cnt++; + total++; + } + + map.m_lblk = idx; + map.m_len = pg_end - idx; + + if (idx < pg_end && cnt < blk_per_seg) + goto do_map; + + clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + + err = filemap_fdatawrite(inode->i_mapping); + if (err) + goto out; + } +clear_out: + clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); +out: + mutex_unlock(&inode->i_mutex); + if (!err) + range->len = (u64)total << PAGE_CACHE_SHIFT; + return err; +} + +static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_defragment range; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) + return err; + + if (f2fs_readonly(sbi->sb)) { + err = -EROFS; + goto out; + } + + if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, + sizeof(range))) { + err = -EFAULT; + goto out; + } + + /* verify alignment of offset & size */ + if (range.start & (F2FS_BLKSIZE - 1) || + range.len & (F2FS_BLKSIZE - 1)) { + err = -EINVAL; + goto out; + } + + err = f2fs_defragment_range(sbi, filp, &range); + if (err < 0) + goto out; + + if (copy_to_user((struct f2fs_defragment __user *)arg, &range, + sizeof(range))) + err = -EFAULT; +out: + mnt_drop_write_file(filp); + return err; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1679,6 +1872,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_gc(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); + case F2FS_IOC_DEFRAGMENT: + return f2fs_ioc_defragment(filp, arg); default: return -ENOTTY; } -- cgit v1.2.3-59-g8ed1b From 4bb9998d388b48dc0a4128bd1f4d85f09ec3b705 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 16 Nov 2015 20:46:28 +0900 Subject: Doc: f2fs: Fix typos in Documentation/filesystems/f2fs.txt This patch fix some typos in Documentation/filesystems/f2fs.txt Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b102b436563e..ad10494aa224 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage collection, triggered in background when I/O subsystem is idle. If background_gc=on, it will turn on the garbage collection and if background_gc=off, garbage collection - will be truned off. If background_gc=sync, it will turn + will be turned off. If background_gc=sync, it will turn on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. @@ -145,7 +145,7 @@ extent_cache Enable an extent cache based on rb-tree, it can cache as many as extent which map between contiguous logical address and physical address per inode, resulting in increasing the cache hit ratio. Set by default. -noextent_cache Diable an extent cache based on rb-tree explicitly, see +noextent_cache Disable an extent cache based on rb-tree explicitly, see the above extent_cache mount option. noinline_data Disable the inline data feature, inline data feature is enabled by default. @@ -192,7 +192,7 @@ Files in /sys/fs/f2fs/ policy for garbage collection. Setting gc_idle = 0 (default) will disable this option. Setting gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy aproach. + & setting gc_idle = 2 will select the greedy approach. reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree @@ -298,7 +298,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to file. Each file is dump_ssa and dump_sit. The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. -It shows on-disk inode information reconized by a given inode number, and is +It shows on-disk inode information recognized by a given inode number, and is able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and ./dump_sit respectively. -- cgit v1.2.3-59-g8ed1b From 29ba108d9ba44600961418a352871eb967d68c20 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Nov 2015 20:38:25 +0800 Subject: f2fs: fix memory leak of kobject in error path of fill_super f2fs_sb_info::s_kobj should be released in error path of fill_super, otherwise it will lead to memory leak. This bug was found by kmemleak: dmesg: kmemleak: 2 new suspected memory leaks (see /sys/kernel/debug/kmemleak) cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800838dc358 (size 8): comm "mount", pid 4154, jiffies 4297482839 (age 1911.412s) hex dump (first 8 bytes): 7a 72 61 6d 31 00 ff ff zram1... backtrace: [] kmemleak_alloc+0x28/0x50 [] __kmalloc_track_caller+0xef/0x1c0 [] kstrdup+0x45/0x80 [] kstrdup_const+0x28/0x30 [] kvasprintf_const+0x63/0xa0 [] kobject_set_name_vargs+0x3c/0xa0 [] kobject_add_varg+0x25/0x60 [] kobject_init_and_add+0x53/0x70 [] f2fs_fill_super+0x9d9/0xc40 [f2fs] [] mount_bdev+0x192/0x1d0 [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x170 [] vfs_kern_mount+0x76/0x160 [] do_mount+0x258/0xdc0 [] SyS_mount+0x7b/0xc0 [] entry_SYSCALL_64_fastpath+0x12/0x6f ... Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 67864ab376c8..bd7e9c6c42c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1361,6 +1361,8 @@ try_onemore: free_kobj: kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); free_proc: if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); -- cgit v1.2.3-59-g8ed1b From 04ef4b626c324a7529c80ffc45787b274a6fa68a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2015 18:44:20 +0800 Subject: f2fs: fix to enable missing ioctl interfaces in ->compat_ioctl In 64-bit kernel f2fs can supports 32-bit ioctl system call by identifying encoded code which is converted from 32-bit one to 64-bit one in ->compat_ioctl. When we introduced new interfaces in ->ioctl, we forgot to enable them in ->compat_ioctl, so enable them for fixing. Signed-off-by: Chao Yu [Jaegeuk Kim: fix wrongly added spaces together] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/file.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b01ad514fbd8..904384427ca6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -257,8 +257,9 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * ioctl commands in 32 bit emulation */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif struct f2fs_defragment { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f392982c597..3f1026caf807 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1901,6 +1901,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC32_SETFLAGS: cmd = F2FS_IOC_SETFLAGS; break; + case F2FS_IOC32_GETVERSION: + cmd = F2FS_IOC_GETVERSION; + break; + case F2FS_IOC_START_ATOMIC_WRITE: + case F2FS_IOC_COMMIT_ATOMIC_WRITE: + case F2FS_IOC_START_VOLATILE_WRITE: + case F2FS_IOC_RELEASE_VOLATILE_WRITE: + case F2FS_IOC_ABORT_VOLATILE_WRITE: + case F2FS_IOC_SHUTDOWN: + case F2FS_IOC_SET_ENCRYPTION_POLICY: + case F2FS_IOC_GET_ENCRYPTION_PWSALT: + case F2FS_IOC_GET_ENCRYPTION_POLICY: + case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_WRITE_CHECKPOINT: + case F2FS_IOC_DEFRAGMENT: + break; default: return -ENOIOCTLCMD; } -- cgit v1.2.3-59-g8ed1b From eb7e813cc791735f2428202d5249a8fd769df1f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2015 18:45:07 +0800 Subject: f2fs: fix to remove directory inode from dirty list If last dirty dentry page was writebacked in reclaim path, we should remove its directory inode from global dirty list to avoid unnecessary flush for this inode when doing checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c3e1ffa0c8d6..6c689e9a86a3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1182,8 +1182,10 @@ out: unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); - if (wbc->for_reclaim) + if (wbc->for_reclaim) { f2fs_submit_merged_bio(sbi, DATA, WRITE); + remove_dirty_dir_inode(inode); + } return 0; redirty_out: -- cgit v1.2.3-59-g8ed1b From 692223d132067ef2c392adec6f1324d581496212 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Thu, 12 Nov 2015 08:43:04 +0800 Subject: f2fs: optimize __find_rev_next_bit 1. Skip __reverse_ulong if the bitmap is empty. 2. Reduce branches and codes. According to my test, the performance of this new version is 5% higher on an empty bitmap of 64bytes, and remains about the same in the worst scenario. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f77b3258454a..efbf6b5f1dc3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) /* * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. + * @size must be integral times of unsigned long. * Example: * MSB <--> LSB * f2fs_set_bit(0, bitmap) => 1000 0000 @@ -95,47 +96,36 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - tmp = __reverse_ulong((unsigned char *)p); - tmp &= ~0UL >> offset; - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; + while (1) { + if (*p == 0) + goto pass; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG-1)) { tmp = __reverse_ulong((unsigned char *)p); + + tmp &= ~0UL >> offset; + if (size < BITS_PER_LONG) + tmp &= (~0UL << (BITS_PER_LONG - size)); if (tmp) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp &= (~0UL << (BITS_PER_LONG - size)); - if (!tmp) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffs(tmp); + return result; +found: + return result - size + __reverse_ffs(tmp); } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, -- cgit v1.2.3-59-g8ed1b From f478f43fa0d8f38537848d298980955244afdaee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 13 Nov 2015 18:27:35 +0800 Subject: f2fs: clear page uptodate when dropping cache for atomic write We should clear uptodate flag for all pages atomic written when we drop them, otherwise before these cached pages were reclaimed or invalidated eventually, we will see invalid data when hitting them again. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index efbf6b5f1dc3..ed2c5dec7526 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -247,6 +247,7 @@ int commit_inmem_pages(struct inode *inode, bool abort) submit_bio = true; } } else { + ClearPageUptodate(cur->page); trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); } set_page_private(cur->page, 0); -- cgit v1.2.3-59-g8ed1b From 57b62d29ad5b384775974973087d47755a8c6fcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Nov 2015 16:09:07 +0800 Subject: f2fs: fix to report error in f2fs_readdir get_lock_data_page in f2fs_readdir can fail due to a lot of reasons (i.e. no memory or IO error...), it's better to report this kind of error to user rather than ignoring it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7c1678ba8f92..9de898d2ddff 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -855,8 +855,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n, false); - if (IS_ERR(dentry_page)) - continue; + if (IS_ERR(dentry_page)) { + err = PTR_ERR(dentry_page); + if (err == -ENOENT) + continue; + else + goto out; + } dentry_blk = kmap(dentry_page); -- cgit v1.2.3-59-g8ed1b From 760de7914e27781abb44564449c761ea4440f982 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Nov 2015 16:26:44 -0800 Subject: f2fs: avoid deadlock in f2fs_shrink_extent_tree While handling extent trees, we can enter into a reclaiming path anytime. If it tries to release some extent nodes in the same extent tree, write_lock(&et->lock) would be hanged. In order to avoid the deadlock, we can just skip it. Note that, if it is an unreferenced tree, we should get write_lock(&et->lock) successfully and release all of therein nodes. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 7ddba812e11b..de063f2b2384 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -615,9 +615,10 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) for (i = 0; i < found; i++) { struct extent_tree *et = treevec[i]; - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, false); - write_unlock(&et->lock); + if (write_trylock(&et->lock)) { + node_cnt += __free_extent_tree(sbi, et, false); + write_unlock(&et->lock); + } if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; -- cgit v1.2.3-59-g8ed1b From 807b1e1c8e08452948495b1a9985ab46d329e5c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Dec 2015 14:14:40 -0800 Subject: f2fs: do not recover from previous remained wrong dnodes If device does not support discard, some obsolete dnodes can be recovered by roll-forward. This patch enhances the recovery flow. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cbf74f47cce8..fad010faa859 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page) ino_of_node(page), name); } +static bool is_same_inode(struct inode *inode, struct page *ipage) +{ + struct f2fs_inode *ri = F2FS_INODE(ipage); + struct timespec disk; + + if (!IS_INODE(ipage)) + return true; + + disk.tv_sec = le64_to_cpu(ri->i_ctime); + disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + if (timespec_compare(&inode->i_ctime, &disk) > 0) + return false; + + disk.tv_sec = le64_to_cpu(ri->i_atime); + disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + if (timespec_compare(&inode->i_atime, &disk) > 0) + return false; + + disk.tv_sec = le64_to_cpu(ri->i_mtime); + disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + if (timespec_compare(&inode->i_mtime, &disk) > 0) + return false; + + return true; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); @@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (!entry) { + if (entry) { + if (!is_same_inode(entry->inode, page)) + goto next; + } else { if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) -- cgit v1.2.3-59-g8ed1b From e9837bc2a4a407ee366143cf721ee77154ac051e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:41:50 +0800 Subject: f2fs: clean up error path in f2fs_readdir No logic changes, just clean up the error path. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9de898d2ddff..6554fd5fce88 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -867,18 +867,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); - if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) - goto stop; + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + break; + } ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); - dentry_page = NULL; - } -stop: - if (dentry_page && !IS_ERR(dentry_page)) { - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); } out: f2fs_fname_crypto_free_buffer(&fstr); -- cgit v1.2.3-59-g8ed1b From 855639decaa7ba5f356d6928c744a0ae1977c134 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:42:54 +0800 Subject: f2fs: clean up code with __has_cursum_space Clean up codes in lookup_journal_in_cursum() with __has_cursum_space(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ed2c5dec7526..74c474821e5a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1740,13 +1740,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, if (le32_to_cpu(nid_in_journal(sum, i)) == val) return i; } - if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL)) return update_nats_in_cursum(sum, 1); } else if (type == SIT_JOURNAL) { for (i = 0; i < sits_in_cursum(sum); i++) if (le32_to_cpu(segno_in_journal(sum, i)) == val) return i; - if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL)) return update_sits_in_cursum(sum, 1); } return -1; -- cgit v1.2.3-59-g8ed1b From b7973f2378c619d0e17a075f13350bd58a9ebe3d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:43:59 +0800 Subject: f2fs: clean up argument of recover_data In recover_data, value of argument 'type' will be CURSEG_WARM_NODE all the time, remove it for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fad010faa859..7fcb6e49deff 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -488,8 +488,7 @@ out: return err; } -static int recover_data(struct f2fs_sb_info *sbi, - struct list_head *head, int type) +static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; @@ -498,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi, block_t blkaddr; /* get node pages in the current segment */ - curseg = CURSEG_I(sbi, type); + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); while (1) { @@ -585,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + err = recover_data(sbi, &inode_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); out: -- cgit v1.2.3-59-g8ed1b From 9006f2c93fe5cc450bc0d3a4924b46393f165b4a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:47:33 +0800 Subject: f2fs: kill f2fs_drop_largest_extent For direct IO, f2fs only allocate new address for the block which is not exist in the disk before, its mapping info should not exist in extent cache previously, so here we do not need to call f2fs_drop_largest_extent to drop related cache. Due to no more callers for f2fs_drop_largest_extent now, kill it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- fs/f2fs/extent_cache.c | 8 -------- fs/f2fs/f2fs.h | 1 - 3 files changed, 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6c689e9a86a3..90a2ffea875b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -494,10 +494,6 @@ alloc: if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)); - - /* direct IO doesn't use extent cache to maximize the performance */ - f2fs_drop_largest_extent(dn->inode, fofs); - return 0; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index de063f2b2384..e86e9f1e0733 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -164,14 +164,6 @@ static void __drop_largest_extent(struct inode *inode, largest->len = 0; } -void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) -{ - if (!f2fs_may_extent_tree(inode)) - return; - - __drop_largest_extent(inode, fofs, 1); -} - void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 904384427ca6..0831db2f4b3a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2078,7 +2078,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); * extent_cache.c */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); -void f2fs_drop_largest_extent(struct inode *, pgoff_t); void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); -- cgit v1.2.3-59-g8ed1b From 3519e3f992995d46c200134cfbf84c61b7a01f4c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:56:52 +0800 Subject: f2fs: use sbi->blocks_per_seg to avoid unnecessary calculation Use sbi->blocks_per_seg directly to avoid unnecessary calculation when using 1 << sbi->log_blocks_per_seg. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/file.c | 5 ++--- fs/f2fs/gc.c | 4 ++-- fs/f2fs/node.h | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b0966f3b1c9a..8ce2fe3f65ab 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -105,7 +105,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0831db2f4b3a..0052ae8bea3f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1099,8 +1099,7 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * - (1 << sbi->log_blocks_per_seg); + unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; return ((get_pages(sbi, block_type) + pages_per_sec - 1) >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3f1026caf807..96e4e048a8a1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1654,10 +1654,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map; struct extent_info ei; pgoff_t pg_start, pg_end; - unsigned int blk_per_seg = 1 << sbi->log_blocks_per_seg; + unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * - (1 << sbi->log_blocks_per_seg); + unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index fedbf67a0842..ce350c44b5cf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -173,9 +173,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, { /* SSR allocates in a segment unit */ if (p->alloc_mode == SSR) - return 1 << sbi->log_blocks_per_seg; + return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + return sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e4fffd2d98c4..2de759a7746f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) block_addr = (pgoff_t)(nm_i->nat_blkaddr + (seg_off << sbi->log_blocks_per_seg << 1) + - (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) block_addr += sbi->blocks_per_seg; -- cgit v1.2.3-59-g8ed1b From 0cab80ee0c9e17337468c4c0f96786ccbca693d9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:36:16 +0800 Subject: f2fs: fix to convert inline inode in ->setattr In commit 3c4541452748 ("f2fs: do not trim preallocated blocks when truncating after i_size"), in order to follow the regulation: "truncate(x) where x > i_size will not trim all blocks past i_size." like other file systems, in ->setattr we invoked truncate_setsize instead of f2fs_truncate to avoid unneeded block trimming in such case, but forgot to call f2fs_convert_inline_inode keep consistency of inline data conversion rule. This patch fixes to convert inline data if necessary. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 96e4e048a8a1..a018ed327713 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -686,6 +686,14 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * larger than i_size. */ truncate_setsize(inode, attr->ia_size); + + /* should convert inline inode here */ + if (f2fs_has_inline_data(inode) && + !f2fs_may_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } inode->i_mtime = inode->i_ctime = CURRENT_TIME; } } -- cgit v1.2.3-59-g8ed1b From 80609448cd63b700a37427e423c201fc5e16e95a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Dec 2015 16:51:13 -0800 Subject: f2fs: enhance the bit operation for SSR This patch enhances the existing bit operation when f2fs allocates SSR blocks. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 50 ++++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 74c474821e5a..5fa519f02860 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -132,47 +132,37 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - - tmp = __reverse_ulong((unsigned char *)p); - tmp |= ~((~0UL << offset) >> offset); - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp != ~0UL) - goto found_middle; - - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG - 1)) { + + while (1) { + if (*p == ~0UL) + goto pass; + tmp = __reverse_ulong((unsigned char *)p); + + if (offset) + tmp |= ~0UL << (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + tmp |= ~0UL >> size; if (tmp != ~0UL) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp |= ~(~0UL << (BITS_PER_LONG - size)); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffz(tmp); + return result; +found: + return result - size + __reverse_ffz(tmp); } void register_inmem_page(struct inode *inode, struct page *page) -- cgit v1.2.3-59-g8ed1b From 5d909cdbbba244ecb4c2dfc4dc3e3bc29529fb05 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 Dec 2015 10:16:58 -0800 Subject: f2fs: refactor f2fs_commit_super Previously, f2fs_commit_super hacks the bh->blocknr to write the broken alternate superblock. Instead of it, we should use the correct logic to retrieve its buffer head with locking it appropriately. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bd7e9c6c42c8..dbf16ade0e9a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1099,27 +1099,35 @@ out: int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *sbh = sbi->raw_super_buf; - sector_t block = sbh->b_blocknr; + struct buffer_head *bh; int err; /* write back-up superblock first */ - sbh->b_blocknr = block ? 0 : 1; - mark_buffer_dirty(sbh); - err = sync_dirty_buffer(sbh); + bh = sb_getblk(sbi->sb, sbh->b_blocknr ? 0 : 1); + if (!bh) + return -EIO; - sbh->b_blocknr = block; + lock_buffer(bh); + memcpy(bh->b_data, sbh->b_data, sbh->b_size); + WARN_ON(sbh->b_size != F2FS_BLKSIZE); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); + + /* it's rare case, we can do fua all the time */ + err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + brelse(bh); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) - goto out; + return err; /* write current valid superblock */ - mark_buffer_dirty(sbh); - err = sync_dirty_buffer(sbh); -out: - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - return err; + lock_buffer(sbh); + set_buffer_dirty(sbh); + unlock_buffer(sbh); + + return __sync_dirty_buffer(sbh, WRITE_FLUSH_FUA); } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.3-59-g8ed1b From ea212a4a7a432e0ecd0f0f53971b70172b3e7f96 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 Dec 2015 10:18:54 -0800 Subject: f2fs: use lock_buffer when changing superblock When modifying sb contents, we need to use lock its buffer. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a018ed327713..294e71576cec 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1591,14 +1591,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) return err; /* update superblock with uuid */ + lock_buffer(sbi->raw_super_buf); generate_random_uuid(sbi->raw_super->encrypt_pw_salt); + unlock_buffer(sbi->raw_super_buf); err = f2fs_commit_super(sbi, false); mnt_drop_write_file(filp); if (err) { /* undo new data */ + lock_buffer(sbi->raw_super_buf); memset(sbi->raw_super->encrypt_pw_salt, 0, 16); + unlock_buffer(sbi->raw_super_buf); return err; } got_it: -- cgit v1.2.3-59-g8ed1b From e1c51b9f1df2f9efc2ec11488717e40cd12015f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Dec 2015 16:08:22 +0800 Subject: f2fs: clean up node page updating flow If read_node_page return LOCKED_PAGE, in its caller it's better a) skip unneeded 'Update' flag and mapping info verfication; b) check nid value stored in footer structure of node page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7bcbc6e9c40d..d842b199cd02 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1063,12 +1063,13 @@ repeat: if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); - } else if (err != LOCKED_PAGE) { - lock_page(page); + } else if (err == LOCKED_PAGE) { + goto page_hit; } - if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { - ClearPageUptodate(page); + lock_page(page); + + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1076,6 +1077,8 @@ repeat: f2fs_put_page(page, 1); goto repeat; } +page_hit: + f2fs_bug_on(sbi, nid != nid_of_node(page)); return page; } @@ -1114,24 +1117,25 @@ repeat: end = start + MAX_RA_NODE; end = min(end, NIDS_PER_BLOCK); for (i = start + 1; i < end; i++) { - nid = get_nid(parent, i, false); - if (!nid) + nid_t tnid = get_nid(parent, i, false); + if (!tnid) continue; - ra_node_page(sbi, nid); + ra_node_page(sbi, tnid); } blk_finish_plug(&plug); lock_page(page); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } page_hit: - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } + f2fs_bug_on(sbi, nid != nid_of_node(page)); return page; } -- cgit v1.2.3-59-g8ed1b From d8fe4f0e74cb27e79b2b500ca6eae9f9b02748b4 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 14 Dec 2015 13:34:00 +0800 Subject: f2fs: write only the pages in range during defragment @lend of filemap_write_and_wait_range is supposed to be a "offset in bytes where the range ends (inclusive)". Subtract 1 to avoid writing an extra page. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 294e71576cec..5fac4f259965 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1686,7 +1686,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len); + range->start + range->len - 1); if (err) goto out; -- cgit v1.2.3-59-g8ed1b From e8931582bdaf4d4a6ee7e520ef0ccd481713a42a Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 14 Dec 2015 15:26:04 +0800 Subject: f2fs: fix to update variable correctly when skip a unmapped block map.m_len should be reduced after skip a block Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5fac4f259965..9949d0f332c2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1759,6 +1759,7 @@ do_map: if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; + map.m_len--; continue; } -- cgit v1.2.3-59-g8ed1b From 9a59b62fd88196844cee5fff851bee2cfd7afb6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 09:58:18 +0800 Subject: f2fs: do more integrity verification for superblock Do more sanity check for superblock during ->mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dbf16ade0e9a..a6f2beda22b9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -918,6 +918,79 @@ static loff_t max_file_size(unsigned bits) return result; } +static inline bool sanity_check_area_boundary(struct super_block *sb, + struct f2fs_super_block *raw_super) +{ + u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); + u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr); + u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr); + u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt); + u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit); + u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat); + u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa); + u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main); + u32 segment_count = le32_to_cpu(raw_super->segment_count); + u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + + if (segment0_blkaddr != cp_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); + return true; + } + + if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != + sit_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); + return true; + } + + if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != + nat_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); + return true; + } + + if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != + ssa_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); + return true; + } + + if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != + main_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); + return true; + } + + if (main_blkaddr + (segment_count_main << log_blocks_per_seg) != + segment0_blkaddr + (segment_count << log_blocks_per_seg)) { + f2fs_msg(sb, KERN_INFO, + "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)", + main_blkaddr, + segment0_blkaddr + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); + return true; + } + + return false; +} + static int sanity_check_raw_super(struct super_block *sb, struct f2fs_super_block *raw_super) { @@ -947,6 +1020,14 @@ static int sanity_check_raw_super(struct super_block *sb, return 1; } + /* check log blocks per segment */ + if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { + f2fs_msg(sb, KERN_INFO, + "Invalid log blocks per segment (%u)\n", + le32_to_cpu(raw_super->log_blocks_per_seg)); + return 1; + } + /* Currently, support 512/1024/2048/4096 bytes sector size */ if (le32_to_cpu(raw_super->log_sectorsize) > F2FS_MAX_LOG_SECTOR_SIZE || @@ -965,6 +1046,23 @@ static int sanity_check_raw_super(struct super_block *sb, le32_to_cpu(raw_super->log_sectorsize)); return 1; } + + /* check reserved ino info */ + if (le32_to_cpu(raw_super->node_ino) != 1 || + le32_to_cpu(raw_super->meta_ino) != 2 || + le32_to_cpu(raw_super->root_ino) != 3) { + f2fs_msg(sb, KERN_INFO, + "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); + return 1; + } + + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ + if (sanity_check_area_boundary(sb, raw_super)) + return 1; + return 0; } -- cgit v1.2.3-59-g8ed1b From a11fac3776fc4db6a9fa1104b1d0477809c677e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 14 Dec 2015 18:58:42 -0800 Subject: f2fs: add symbol to avoid any confusion with tools This patch adds MAX_VOLUME_NAME to sync with f2fs-tools. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 25c6324a0dd0..e59c3be92106 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -51,6 +51,7 @@ #define MAX_ACTIVE_DATA_LOGS 8 #define VERSION_LEN 256 +#define MAX_VOLUME_NAME 512 /* * For superblock @@ -84,7 +85,7 @@ struct f2fs_super_block { __le32 node_ino; /* node inode number */ __le32 meta_ino; /* meta inode number */ __u8 uuid[16]; /* 128-bit uuid for volume */ - __le16 volume_name[512]; /* volume name */ + __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; -- cgit v1.2.3-59-g8ed1b From a49324f127dec918f5a3b3f145d0bf2fb81f4588 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:29:47 +0800 Subject: f2fs: rename {add,remove,release}_dirty_inode to {add,remove,release}_ino_entry remove_dirty_dir_inode will be renamed to remove_dirty_inode as a generic function in following patch for removing directory/regular/symlink inode in global dirty list. Here rename ino management related functions for readability, also in order to avoid name conflict. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 4 ++-- fs/f2fs/inode.c | 4 ++-- fs/f2fs/super.c | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f661d80474be..b839f5f3385c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&im->ino_lock); } -void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ __add_ino_entry(sbi, ino, type); } -void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* remove dirty ino entry from list */ __remove_ino_entry(sbi, ino, type); @@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -void release_dirty_inode(struct f2fs_sb_info *sbi) +void release_ino_entry(struct f2fs_sb_info *sbi) { struct ino_entry *e, *tmp; int i; @@ -1081,7 +1081,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, discard_blk); - release_dirty_inode(sbi); + release_ino_entry(sbi); if (unlikely(f2fs_cp_error(sbi))) return; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0052ae8bea3f..ee8bcbf34f44 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1820,9 +1820,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); -void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); -void release_dirty_inode(struct f2fs_sb_info *); +void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); +void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); +void release_ino_entry(struct f2fs_sb_info *); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9949d0f332c2..c6d909e9661e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -275,10 +275,10 @@ sync_nodes: goto out; /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); + remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); + remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 97e20decacb4..3d2fe595d078 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -357,9 +357,9 @@ no_delete: if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (is_inode_flag_set(fi, FI_APPEND_WRITE)) - add_dirty_inode(sbi, inode->i_ino, APPEND_INO); + add_ino_entry(sbi, inode->i_ino, APPEND_INO); if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) - add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); + add_ino_entry(sbi, inode->i_ino, UPDATE_INO); if (is_inode_flag_set(fi, FI_FREE_NID)) { if (err && err != -ENOENT) alloc_nid_done(sbi, inode->i_ino); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6f2beda22b9..9ee7144c5367 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -548,7 +548,7 @@ static void f2fs_put_super(struct super_block *sb) * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ - release_dirty_inode(sbi); + release_ino_entry(sbi); release_discard_addrs(sbi); f2fs_leave_shrinker(sbi); -- cgit v1.2.3-59-g8ed1b From 2710fd7e00b4f77dbe807efaf546bed00b62e65e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:30:45 +0800 Subject: f2fs: introduce dirty list node in inode info Add a new dirt list node member in inode info for linking the inode to global dirty list in superblock, instead of old implementation which allocate slab cache memory as an entry to inode. It avoids memory pressure due to slab cache allocation, and also makes codes more clean. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 54 +++++++++++++++++----------------------------------- fs/f2fs/debug.c | 1 - fs/f2fs/f2fs.h | 10 ++-------- fs/f2fs/super.c | 1 + 4 files changed, 20 insertions(+), 46 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b839f5f3385c..1aca402cab9c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -722,25 +722,23 @@ fail_no_cp: return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) +static void __add_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) - return -EEXIST; + if (is_inode_flag_set(fi, FI_DIRTY_DIR)) + return; - set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); - F2FS_I(inode)->dirty_dir = new; - list_add_tail(&new->list, &sbi->dir_inode_list); + set_inode_flag(fi, FI_DIRTY_DIR); + list_add_tail(&fi->dirty_list, &sbi->dir_inode_list); stat_inc_dirty_dir(sbi); - return 0; + return; } void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *new; - int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) @@ -751,17 +749,11 @@ void update_dirty_page(struct inode *inode, struct page *page) goto out; } - new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - new->inode = inode; - INIT_LIST_HEAD(&new->list); - spin_lock(&sbi->dir_inode_lock); - ret = __add_dirty_inode(inode, new); + __add_dirty_inode(inode); inode_inc_dirty_pages(inode); spin_unlock(&sbi->dir_inode_lock); - if (ret) - kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); f2fs_trace_pid(page); @@ -770,25 +762,16 @@ out: void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *new = - f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - int ret = 0; - - new->inode = inode; - INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); - ret = __add_dirty_inode(inode, new); + __add_dirty_inode(inode); spin_unlock(&sbi->dir_inode_lock); - - if (ret) - kmem_cache_free(inode_entry_slab, new); } void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *entry; + struct f2fs_inode_info *fi = F2FS_I(inode); if (!S_ISDIR(inode->i_mode)) return; @@ -800,17 +783,14 @@ void remove_dirty_dir_inode(struct inode *inode) return; } - entry = F2FS_I(inode)->dirty_dir; - list_del(&entry->list); - F2FS_I(inode)->dirty_dir = NULL; - clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + list_del_init(&fi->dirty_list); + clear_inode_flag(fi, FI_DIRTY_DIR); stat_dec_dirty_dir(sbi); spin_unlock(&sbi->dir_inode_lock); - kmem_cache_free(inode_entry_slab, entry); /* Only from the recovery routine */ - if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { - clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { + clear_inode_flag(fi, FI_DELAY_IPUT); iput(inode); } } @@ -818,8 +798,8 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct inode_entry *entry; struct inode *inode; + struct f2fs_inode_info *fi; retry: if (unlikely(f2fs_cp_error(sbi))) return; @@ -831,8 +811,8 @@ retry: spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct inode_entry, list); - inode = igrab(entry->inode); + fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); + inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { filemap_fdatawrite(inode->i_mapping); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8ce2fe3f65ab..f4a7b9e9416d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -189,7 +189,6 @@ get_cache: si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee8bcbf34f44..21048edb72cb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -158,13 +158,7 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* - * for the list of directory inodes or gc inodes. - * NOTE: there are two slab users for this structure, if we add/modify/delete - * fields in structure for one of slab users, it may affect fields or size of - * other one, in this condition, it's better to split both of slab and related - * data structure. - */ +/* for the list of inodes to be GCed */ struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ @@ -441,8 +435,8 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ - struct inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct list_head dirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9ee7144c5367..4031f8ed9d24 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -432,6 +432,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); + INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); -- cgit v1.2.3-59-g8ed1b From 6ad7609a183a250f1a346c7edfcbeaa30a29cfcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:31:40 +0800 Subject: f2fs: introduce __remove_dirty_inode Introduce __remove_dirty_inode to clean up codes in remove_dirty_dir_inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1aca402cab9c..a4392f06f733 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -736,6 +736,20 @@ static void __add_dirty_inode(struct inode *inode) return; } +static void __remove_dirty_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + if (get_dirty_pages(inode) || + !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + return; + + list_del_init(&fi->dirty_list); + clear_inode_flag(fi, FI_DIRTY_DIR); + stat_dec_dirty_dir(sbi); +} + void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -777,15 +791,7 @@ void remove_dirty_dir_inode(struct inode *inode) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { - spin_unlock(&sbi->dir_inode_lock); - return; - } - - list_del_init(&fi->dirty_list); - clear_inode_flag(fi, FI_DIRTY_DIR); - stat_dec_dirty_dir(sbi); + __remove_dirty_inode(inode); spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ -- cgit v1.2.3-59-g8ed1b From a1c1e9b74ff380176bf3862c764061e0a7efd9bb Mon Sep 17 00:00:00 2001 From: Fan Li Date: Tue, 15 Dec 2015 17:02:41 +0800 Subject: f2fs: fix to reset variable correctlly f2fs_map_blocks will set m_flags and m_len to 0, so we don't need to reset m_flags ourselves, but have to reset m_len to correct value before use it again. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c6d909e9661e..1f5892f38018 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1700,7 +1700,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -1708,14 +1707,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * in logical blocks. */ while (map.m_lblk < pg_end) { - map.m_flags = 0; + map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; - map.m_len--; continue; } @@ -1726,7 +1724,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; - map.m_len = pg_end - map.m_lblk; } if (!fragmented) @@ -1752,14 +1749,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int cnt = 0; do_map: - map.m_flags = 0; + map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; - map.m_len--; continue; } @@ -1784,7 +1780,6 @@ do_map: } map.m_lblk = idx; - map.m_len = pg_end - idx; if (idx < pg_end && cnt < blk_per_seg) goto do_map; -- cgit v1.2.3-59-g8ed1b From b39f0de23d8f22253d441b3b68414e9a1d027cf6 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 15 Dec 2015 17:17:20 +0800 Subject: f2fs: backup raw_super in sbi f2fs use fields of f2fs_super_block struct directly in a grabbed buffer. Once the buffer happen to be destroyed (e.g. through dd), it may bring in unpredictable effect on f2fs. This patch fixes to allocate additional buffer to store datas of super block rather than using grabbed block buffer directly. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4031f8ed9d24..6dfe0d32ad88 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -568,6 +568,7 @@ static void f2fs_put_super(struct super_block *sb) sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); + kfree(sbi->raw_super); kfree(sbi); } @@ -1139,6 +1140,9 @@ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block *super; int err = 0; + super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL); + if (!super) + return -ENOMEM; retry: buffer = sb_bread(sb, block); if (!buffer) { @@ -1154,8 +1158,7 @@ retry: } } - super = (struct f2fs_super_block *) - ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET); + memcpy(super, buffer->b_data + F2FS_SUPER_OFFSET, sizeof(*super)); /* sanity checking of raw super */ if (sanity_check_raw_super(sb, super)) { @@ -1189,14 +1192,17 @@ retry: out: /* No valid superblock */ - if (!*raw_super) + if (!*raw_super) { + kfree(super); return err; + } return 0; } int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { + struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *sbh = sbi->raw_super_buf; struct buffer_head *bh; int err; @@ -1207,7 +1213,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EIO; lock_buffer(bh); - memcpy(bh->b_data, sbh->b_data, sbh->b_size); + memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); WARN_ON(sbh->b_size != F2FS_BLKSIZE); set_buffer_uptodate(bh); set_buffer_dirty(bh); @@ -1223,6 +1229,10 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) /* write current valid superblock */ lock_buffer(sbh); + if (memcmp(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super))) { + f2fs_msg(sbi->sb, KERN_INFO, "Write modified valid superblock"); + memcpy(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); + } set_buffer_dirty(sbh); unlock_buffer(sbh); @@ -1497,6 +1507,7 @@ free_options: kfree(options); free_sb_buf: brelse(raw_super_buf); + kfree(raw_super); free_sbi: kfree(sbi); -- cgit v1.2.3-59-g8ed1b From e8240f656d4d5d718ce8cf6b4ea266d6719ef547 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 17:19:26 +0800 Subject: f2fs: don't grab super block buffer header all the time We have already got one copy of valid super block in memory, do not grab buffer header of super block all the time. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 8 ++---- fs/f2fs/super.c | 81 +++++++++++++++++++++++++-------------------------------- 3 files changed, 38 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 21048edb72cb..37cf04b3ff37 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -715,8 +715,8 @@ enum { struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ - struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + int valid_super_block; /* valid super block no */ int s_flag; /* flags for sbi */ /* for node-related operations */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1f5892f38018..7f8ca47be0af 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1591,20 +1591,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) return err; /* update superblock with uuid */ - lock_buffer(sbi->raw_super_buf); generate_random_uuid(sbi->raw_super->encrypt_pw_salt); - unlock_buffer(sbi->raw_super_buf); err = f2fs_commit_super(sbi, false); - - mnt_drop_write_file(filp); if (err) { /* undo new data */ - lock_buffer(sbi->raw_super_buf); memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - unlock_buffer(sbi->raw_super_buf); + mnt_drop_write_file(filp); return err; } + mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6dfe0d32ad88..8c8b4673a5db 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -567,7 +567,6 @@ static void f2fs_put_super(struct super_block *sb) wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; - brelse(sbi->raw_super_buf); kfree(sbi->raw_super); kfree(sbi); } @@ -1132,65 +1131,53 @@ static void init_sb_info(struct f2fs_sb_info *sbi) */ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf, - int *recovery) + int *valid_super_block, int *recovery) { int block = 0; - struct buffer_head *buffer; - struct f2fs_super_block *super; + struct buffer_head *bh; + struct f2fs_super_block *super, *buf; int err = 0; super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL); if (!super) return -ENOMEM; retry: - buffer = sb_bread(sb, block); - if (!buffer) { + bh = sb_bread(sb, block); + if (!bh) { *recovery = 1; f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", block + 1); - if (block == 0) { - block++; - goto retry; - } else { - err = -EIO; - goto out; - } + err = -EIO; + goto next; } - memcpy(super, buffer->b_data + F2FS_SUPER_OFFSET, sizeof(*super)); + buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, super)) { - brelse(buffer); + if (sanity_check_raw_super(sb, buf)) { + brelse(bh); *recovery = 1; f2fs_msg(sb, KERN_ERR, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - if (block == 0) { - block++; - goto retry; - } else { - err = -EINVAL; - goto out; - } + err = -EINVAL; + goto next; } if (!*raw_super) { - *raw_super_buf = buffer; + memcpy(super, buf, sizeof(*super)); + *valid_super_block = block; *raw_super = super; - } else { - /* already have a valid superblock */ - brelse(buffer); } + brelse(bh); +next: /* check the validity of the second superblock */ if (block == 0) { block++; goto retry; } -out: /* No valid superblock */ if (!*raw_super) { kfree(super); @@ -1203,18 +1190,16 @@ out: int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); - struct buffer_head *sbh = sbi->raw_super_buf; struct buffer_head *bh; int err; /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbh->b_blocknr ? 0 : 1); + bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; lock_buffer(bh); memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - WARN_ON(sbh->b_size != F2FS_BLKSIZE); set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -1227,33 +1212,37 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) if (recover || err) return err; + bh = sb_getblk(sbi->sb, sbi->valid_super_block); + if (!bh) + return -EIO; + /* write current valid superblock */ - lock_buffer(sbh); - if (memcmp(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super))) { - f2fs_msg(sbi->sb, KERN_INFO, "Write modified valid superblock"); - memcpy(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - } - set_buffer_dirty(sbh); - unlock_buffer(sbh); + lock_buffer(bh); + memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); - return __sync_dirty_buffer(sbh, WRITE_FLUSH_FUA); + err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + brelse(bh); + + return err; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; - struct buffer_head *raw_super_buf; struct inode *root; long err; bool retry = true, need_fsck = false; char *options = NULL; - int recovery, i; + int recovery, i, valid_super_block; try_onemore: err = -EINVAL; raw_super = NULL; - raw_super_buf = NULL; + valid_super_block = -1; recovery = 0; /* allocate memory for f2fs-specific super block info */ @@ -1267,7 +1256,8 @@ try_onemore: goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); + err = read_raw_super_block(sb, &raw_super, &valid_super_block, + &recovery); if (err) goto free_sbi; @@ -1300,7 +1290,7 @@ try_onemore: /* init f2fs-specific super block info */ sbi->sb = sb; sbi->raw_super = raw_super; - sbi->raw_super_buf = raw_super_buf; + sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); @@ -1506,7 +1496,6 @@ free_meta_inode: free_options: kfree(options); free_sb_buf: - brelse(raw_super_buf); kfree(raw_super); free_sbi: kfree(sbi); -- cgit v1.2.3-59-g8ed1b From 55d1cdb25a815ba92a917ae579c27cc3ffb9a57d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Dec 2015 16:07:14 -0800 Subject: f2fs: relocate tracepoint of write_checkpoint It needs to relocate its location to see exact trace logs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a4392f06f733..5008b872f316 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1130,9 +1130,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* do checkpoint periodically */ sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); } void init_ino_entry_info(struct f2fs_sb_info *sbi) -- cgit v1.2.3-59-g8ed1b From b3980910f746d885111db7252f664600de2a5ea3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:19:35 +0800 Subject: f2fs: introduce __f2fs_commit_super Introduce __f2fs_commit_super to include duplicated codes in f2fs_commit_super for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c8b4673a5db..bc1a8cd38bc8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1187,14 +1187,13 @@ next: return 0; } -int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) +int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *bh; int err; - /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0 : 1); + bh = sb_getblk(sbi->sb, block); if (!bh) return -EIO; @@ -1208,25 +1207,22 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); brelse(bh); + return err; +} + +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) +{ + int err; + + /* write back-up superblock first */ + err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1); + /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; - bh = sb_getblk(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - /* write current valid superblock */ - lock_buffer(bh); - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - unlock_buffer(bh); - - err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); - brelse(bh); - - return err; + return __f2fs_commit_super(sbi, sbi->valid_super_block); } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.3-59-g8ed1b From c227f912732f204c0ec4a577ba812401ac4672af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:09:20 +0800 Subject: f2fs: record dirty status of regular/symlink inode Maintain regular/symlink inode which has dirty pages in global dirty list and record their total dirty pages count like the way of handling directory inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 66 ++++++++++++++++++++++++++-------------------------- fs/f2fs/data.c | 4 ++-- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 27 +++++++++++++-------- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 6 +++-- 6 files changed, 58 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5008b872f316..a037bbd89dc6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -722,53 +722,51 @@ fail_no_cp: return -EINVAL; } -static void __add_dirty_inode(struct inode *inode) +static void __add_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (is_inode_flag_set(fi, FI_DIRTY_DIR)) + if (is_inode_flag_set(fi, flag)) return; - set_inode_flag(fi, FI_DIRTY_DIR); - list_add_tail(&fi->dirty_list, &sbi->dir_inode_list); - stat_inc_dirty_dir(sbi); - return; + set_inode_flag(fi, flag); + list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); + if (type == DIR_INODE) + stat_inc_dirty_dir(sbi); } -static void __remove_dirty_inode(struct inode *inode) +static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + !is_inode_flag_set(F2FS_I(inode), flag)) return; list_del_init(&fi->dirty_list); - clear_inode_flag(fi, FI_DIRTY_DIR); - stat_dec_dirty_dir(sbi); + clear_inode_flag(fi, flag); + if (type == DIR_INODE) + stat_dec_dirty_dir(sbi); } void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return; - if (!S_ISDIR(inode->i_mode)) { - inode_inc_dirty_pages(inode); - goto out; - } - - spin_lock(&sbi->dir_inode_lock); - __add_dirty_inode(inode); + spin_lock(&sbi->inode_lock[type]); + __add_dirty_inode(inode, type); inode_inc_dirty_pages(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); -out: SetPagePrivate(page); f2fs_trace_pid(page); } @@ -777,22 +775,24 @@ void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - spin_lock(&sbi->dir_inode_lock); - __add_dirty_inode(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[DIR_INODE]); + __add_dirty_inode(inode, DIR_INODE); + spin_unlock(&sbi->inode_lock[DIR_INODE]); } -void remove_dirty_dir_inode(struct inode *inode) +void remove_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && + !S_ISLNK(inode->i_mode)) return; - spin_lock(&sbi->dir_inode_lock); - __remove_dirty_inode(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[type]); + __remove_dirty_inode(inode, type); + spin_unlock(&sbi->inode_lock[type]); /* Only from the recovery routine */ if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { @@ -801,7 +801,7 @@ void remove_dirty_dir_inode(struct inode *inode) } } -void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -810,16 +810,16 @@ retry: if (unlikely(f2fs_cp_error(sbi))) return; - spin_lock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[type]); - head = &sbi->dir_inode_list; + head = &sbi->inode_list[type]; if (list_empty(head)) { - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); return; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); if (inode) { filemap_fdatawrite(inode->i_mapping); iput(inode); @@ -854,7 +854,7 @@ retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - sync_dirty_dir_inodes(sbi); + sync_dirty_inodes(sbi, DIR_INODE); if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto out; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 90a2ffea875b..292a06cbea07 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1180,7 +1180,7 @@ out: f2fs_balance_fs(sbi); if (wbc->for_reclaim) { f2fs_submit_merged_bio(sbi, DATA, WRITE); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); } return 0; @@ -1372,7 +1372,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (locked) mutex_unlock(&sbi->writepages); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return ret; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6554fd5fce88..3da58265c0d4 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -444,7 +444,7 @@ error: /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ truncate_inode_pages(&inode->i_data, 0); truncate_blocks(inode, 0, false); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); remove_inode_page(inode); return ERR_PTR(err); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37cf04b3ff37..03a2b86a28ba 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -648,6 +648,7 @@ struct f2fs_sm_info { enum count_type { F2FS_WRITEBACK, F2FS_DIRTY_DENTS, + F2FS_DIRTY_DATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, @@ -696,6 +697,12 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +enum inode_type { + DIR_INODE, /* for dirty dir inode */ + FILE_INODE, /* for dirty regular/symlink inode */ + NR_INODE_TYPE, +}; + /* for inner inode cache management */ struct inode_management { struct radix_tree_root ino_root; /* ino entry array */ @@ -745,9 +752,9 @@ struct f2fs_sb_info { /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ - /* for directory inode management */ - struct list_head dir_inode_list; /* dir inode list */ - spinlock_t dir_inode_lock; /* for dir inode list lock */ + /* for inode management */ + struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ + spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ @@ -1060,8 +1067,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) static inline void inode_inc_dirty_pages(struct inode *inode) { atomic_inc(&F2FS_I(inode)->dirty_pages); - if (S_ISDIR(inode->i_mode)) - inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); + inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1076,9 +1083,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode) return; atomic_dec(&F2FS_I(inode)->dirty_pages); - - if (S_ISDIR(inode->i_mode)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); + dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1417,6 +1423,7 @@ enum { FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1826,8 +1833,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); -void remove_dirty_dir_inode(struct inode *); -void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void remove_dirty_inode(struct inode *); +void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3d2fe595d078..ec3fb32c4726 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -327,7 +327,7 @@ void f2fs_evict_inode(struct inode *inode) goto out_clear; f2fs_bug_on(sbi, get_dirty_pages(inode)); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); f2fs_destroy_extent_tree(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc1a8cd38bc8..5b596d6a8d24 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1339,8 +1339,10 @@ try_onemore: le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->alloc_valid_block_count = 0; - INIT_LIST_HEAD(&sbi->dir_inode_list); - spin_lock_init(&sbi->dir_inode_lock); + for (i = 0; i < NR_INODE_TYPE; i++) { + INIT_LIST_HEAD(&sbi->inode_list[i]); + spin_lock_init(&sbi->inode_lock[i]); + } init_extent_cache_info(sbi); -- cgit v1.2.3-59-g8ed1b From 343f40f0a70eb7cee9cc8d6fcfbb3917252a5245 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:12:16 +0800 Subject: f2fs: introduce new option for controlling data flush Add a new option 'data_flush' to enable data flush functionality. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index ad10494aa224..e1c9f0849da6 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -149,6 +149,8 @@ noextent_cache Disable an extent cache based on rb-tree explicitly, see the above extent_cache mount option. noinline_data Disable the inline data feature, inline data feature is enabled by default. +data_flush Enable data flushing before checkpoint in order to + persist data of regular and symlink. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 03a2b86a28ba..b1fb8f73fe42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -54,6 +54,7 @@ #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 +#define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5b596d6a8d24..c3070c149c0e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -67,6 +67,7 @@ enum { Opt_extent_cache, Opt_noextent_cache, Opt_noinline_data, + Opt_data_flush, Opt_err, }; @@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = { {Opt_extent_cache, "extent_cache"}, {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, + {Opt_data_flush, "data_flush"}, {Opt_err, NULL}, }; @@ -406,6 +408,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_data: clear_opt(sbi, INLINE_DATA); break; + case Opt_data_flush: + set_opt(sbi, DATA_FLUSH); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -687,6 +692,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); + if (test_opt(sbi, DATA_FLUSH)) + seq_puts(seq, ",data_flush"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; -- cgit v1.2.3-59-g8ed1b From 33fbd5100de6d0a87f354ecf5ec0486ba01a6da7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:14:44 +0800 Subject: f2fs: stat dirty regular/symlink inodes Add to stat dirty regular and symlink inode for showing in debugfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++----- fs/f2fs/debug.c | 6 +++++- fs/f2fs/f2fs.h | 13 +++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a037bbd89dc6..53044ea8bb5d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -733,13 +733,11 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) set_inode_flag(fi, flag); list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); - if (type == DIR_INODE) - stat_inc_dirty_dir(sbi); + stat_inc_dirty_inode(sbi, type); } static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; @@ -749,8 +747,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type) list_del_init(&fi->dirty_list); clear_inode_flag(fi, flag); - if (type == DIR_INODE) - stat_dec_dirty_dir(sbi); + stat_dec_dirty_inode(F2FS_I_SB(inode), type); } void update_dirty_page(struct inode *inode, struct page *page) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f4a7b9e9416d..bb307e642fdd 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -42,8 +42,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); - si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; + si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; @@ -298,6 +300,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - datas: %4d in files:%4d\n", + si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b1fb8f73fe42..19beabefd839 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -818,7 +818,7 @@ struct f2fs_sb_info { atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ - unsigned int n_dirty_dirs; /* # of dir inodes */ + unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ @@ -1888,7 +1888,8 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int ndirty_node, ndirty_meta; + int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, inmem_pages, wb_pages; @@ -1921,8 +1922,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) -#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) -#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) +#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) +#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) #define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) @@ -2003,8 +2004,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_cp_count(si) #define stat_inc_call_count(si) #define stat_inc_bggc_count(si) -#define stat_inc_dirty_dir(sbi) -#define stat_dec_dirty_dir(sbi) +#define stat_inc_dirty_inode(sbi, type) +#define stat_dec_dirty_inode(sbi, type) #define stat_inc_total_hit(sb) #define stat_inc_rbtree_node_hit(sb) #define stat_inc_largest_node_hit(sbi) -- cgit v1.2.3-59-g8ed1b From 36b35a0dbe904a06e94154f29db0d0e218420c98 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:13:28 +0800 Subject: f2fs: support data flush in background Previously, when finishing a checkpoint, we have persisted all fs meta info including meta inode, node inode, dentry page of directory inode, so, after a sudden power cut, f2fs can recover from last checkpoint with full directory structure. But during checkpoint, we didn't flush dirty pages of regular and symlink inode, so such dirty datas still in memory will be lost in that moment of power off. In order to reduce the chance of lost data, this patch enables f2fs_balance_fs_bg with the ability of data flushing. It will try to flush user data before starting a checkpoint. So user's data written after last checkpoint which may not be fsynced could be saved. When we mount with data_flush option, after every period of cp_interval (could be configured in sysfs: /sys/fs/f2fs/device/cp_interval) seconds user data could be flushed into device once f2fs_balance_fs_bg was called in kworker thread or gc thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5fa519f02860..c2474509e5de 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -291,8 +291,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - jiffies > sbi->cp_expires) + jiffies > sbi->cp_expires) { + if (test_opt(sbi, DATA_FLUSH)) + sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); + } } static int issue_flush_thread(void *data) -- cgit v1.2.3-59-g8ed1b From 7df3a4318d07ba520b4a8eddad29e9ac748b0a19 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Thu, 17 Dec 2015 13:20:59 +0800 Subject: f2fs: optimize the flow of f2fs_map_blocks check map->m_len right after it changes to avoid excess call to update dnode_of_data. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 69 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 292a06cbea07..e34b1bdfc995 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -573,6 +573,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; struct extent_info ei; bool allocated = false; + block_t blkaddr; map->m_len = 0; map->m_flags = 0; @@ -636,6 +637,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs++; get_next: + if (map->m_len >= maxblocks) + goto sync_out; + if (dn.ofs_in_node >= end_offset) { if (allocated) sync_inode_page(&dn); @@ -653,44 +657,43 @@ get_next: end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } - if (maxblocks > map->m_len) { - block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { - if (create) { - if (unlikely(f2fs_cp_error(sbi))) { - err = -EIO; - goto sync_out; - } - err = __allocate_data_block(&dn); - if (err) - goto sync_out; - allocated = true; - map->m_flags |= F2FS_MAP_NEW; - blkaddr = dn.data_blkaddr; - } else { - /* - * we only merge preallocated unwritten blocks - * for fiemap. - */ - if (flag != F2FS_GET_BLOCK_FIEMAP || - blkaddr != NEW_ADDR) - goto sync_out; + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { + if (create) { + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto sync_out; } + err = __allocate_data_block(&dn); + if (err) + goto sync_out; + allocated = true; + map->m_flags |= F2FS_MAP_NEW; + blkaddr = dn.data_blkaddr; + } else { + /* + * we only merge preallocated unwritten blocks + * for fiemap. + */ + if (flag != F2FS_GET_BLOCK_FIEMAP || + blkaddr != NEW_ADDR) + goto sync_out; } + } - /* Give more consecutive addresses for the readahead */ - if ((map->m_pblk != NEW_ADDR && - blkaddr == (map->m_pblk + ofs)) || - (map->m_pblk == NEW_ADDR && - blkaddr == NEW_ADDR)) { - ofs++; - dn.ofs_in_node++; - pgofs++; - map->m_len++; - goto get_next; - } + /* Give more consecutive addresses for the readahead */ + if ((map->m_pblk != NEW_ADDR && + blkaddr == (map->m_pblk + ofs)) || + (map->m_pblk == NEW_ADDR && + blkaddr == NEW_ADDR)) { + ofs++; + dn.ofs_in_node++; + pgofs++; + map->m_len++; + goto get_next; } + sync_out: if (allocated) sync_inode_page(&dn); -- cgit v1.2.3-59-g8ed1b From 4cf185379b7504d640c9dd72f959f081b25f6ea2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:17:16 +0800 Subject: f2fs: add a tracepoint for sync_dirty_inodes This patch adds a tracepoint for sync_dirty_inodes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++++ include/trace/events/f2fs.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 53044ea8bb5d..fdd43f71d2c6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -803,6 +803,11 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct list_head *head; struct inode *inode; struct f2fs_inode_info *fi; + bool is_dir = (type == DIR_INODE); + + trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: if (unlikely(f2fs_cp_error(sbi))) return; @@ -812,6 +817,9 @@ retry: head = &sbi->inode_list[type]; if (list_empty(head)) { spin_unlock(&sbi->inode_lock[type]); + trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 00b4a6308249..a1b488809f06 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1265,6 +1265,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree, __entry->node_cnt) ); +DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, count) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->count = count; + ), + + TP_printk("dev = (%d,%d), %s, dirty count = %d", + show_dev(__entry), + show_file_type(__entry->type), + __entry->count) +); + +DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count) +); + +DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From 7441ccef339f87abc27afc4ccfc24c014d7360c9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 21 Dec 2015 19:20:15 -0800 Subject: f2fs: use atomic variable for total_extent_tree It would be better to use atomic variable for total_extent_tree. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/extent_cache.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 3 ++- fs/f2fs/shrinker.c | 3 ++- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index bb307e642fdd..ed5dfcc8886f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -38,7 +38,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; si->total_ext = atomic64_read(&sbi->total_hit_ext); - si->ext_tree = sbi->total_ext_tree; + si->ext_tree = atomic_read(&sbi->total_ext_tree); si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -193,7 +193,8 @@ get_cache: si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); - si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); + si->cache_mem += atomic_read(&sbi->total_ext_tree) * + sizeof(struct extent_tree); si->cache_mem += atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e86e9f1e0733..0e97d6af9885 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -70,7 +70,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) rwlock_init(&et->lock); atomic_set(&et->refcount, 0); et->count = 0; - sbi->total_ext_tree++; + atomic_inc(&sbi->total_ext_tree); } atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); @@ -570,7 +570,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(root, et->ino); kmem_cache_free(extent_tree_slab, et); - sbi->total_ext_tree--; + atomic_dec(&sbi->total_ext_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) @@ -663,7 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); - sbi->total_ext_tree--; + atomic_dec(&sbi->total_ext_tree); up_write(&sbi->extent_tree_lock); F2FS_I(inode)->extent_tree = NULL; @@ -715,7 +715,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) init_rwsem(&sbi->extent_tree_lock); INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); - sbi->total_ext_tree = 0; + atomic_set(&sbi->total_ext_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 19beabefd839..a7f619182cec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -762,7 +762,7 @@ struct f2fs_sb_info { struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ - int total_ext_tree; /* extent tree count */ + atomic_t total_ext_tree; /* extent tree count */ atomic_t total_ext_node; /* extent info count */ /* basic filesystem units */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d842b199cd02..6cc8ac7e185a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -65,7 +65,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == EXTENT_CACHE) { - mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + + mem_size = (atomic_read(&sbi->total_ext_tree) * + sizeof(struct extent_tree) + atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index da0d8e0b55a5..a11e099cbddc 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) { - return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node); + return atomic_read(&sbi->total_ext_tree) + + atomic_read(&sbi->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, -- cgit v1.2.3-59-g8ed1b From 74fd8d9927ef08db30a85f131a124152aeba66c7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 21 Dec 2015 19:25:50 -0800 Subject: f2fs: speed up shrinking extent tree entries If there is no candidates for shrinking slab entries, we don't need to traverse any trees at all. Reviewed-by: Chao Yu [Jaegeuk Kim: fix missing initialization reported by Yunlei He] Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 14 ++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/shrinker.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0e97d6af9885..5305a29f91a3 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -71,6 +71,8 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) atomic_set(&et->refcount, 0); et->count = 0; atomic_inc(&sbi->total_ext_tree); + } else { + atomic_dec(&sbi->total_zombie_tree); } atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); @@ -547,10 +549,14 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) unsigned int found; unsigned int node_cnt = 0, tree_cnt = 0; int remained; + bool do_free = false; if (!test_opt(sbi, EXTENT_CACHE)) return 0; + if (!atomic_read(&sbi->total_zombie_tree)) + goto free_node; + if (!down_write_trylock(&sbi->extent_tree_lock)) goto out; @@ -571,6 +577,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(root, et->ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); + atomic_dec(&sbi->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) @@ -580,6 +587,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) } up_write(&sbi->extent_tree_lock); +free_node: /* 2. remove LRU extent entries */ if (!down_write_trylock(&sbi->extent_tree_lock)) goto out; @@ -591,9 +599,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) if (!remained--) break; list_del_init(&en->list); + do_free = true; } spin_unlock(&sbi->extent_lock); + if (do_free == false) + goto unlock_out; + /* * reset ino for searching victims from beginning of global extent tree. */ @@ -651,6 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && et->count) { atomic_dec(&et->refcount); + atomic_inc(&sbi->total_zombie_tree); return; } @@ -716,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); atomic_set(&sbi->total_ext_tree, 0); + atomic_set(&sbi->total_zombie_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a7f619182cec..90fb970e2b98 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -763,6 +763,7 @@ struct f2fs_sb_info { struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ atomic_t total_ext_tree; /* extent tree count */ + atomic_t total_zombie_tree; /* extent zombie tree count */ atomic_t total_ext_node; /* extent info count */ /* basic filesystem units */ diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a11e099cbddc..93606f281bf9 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -32,7 +32,7 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) { - return atomic_read(&sbi->total_ext_tree) + + return atomic_read(&sbi->total_zombie_tree) + atomic_read(&sbi->total_ext_node); } -- cgit v1.2.3-59-g8ed1b From b9d777b85ff1ff79a1173190317b25bebc404ab4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 11:09:35 -0800 Subject: f2fs: check inline_data flag at converting time We can check inode's inline_data flag when calling to convert it. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 +++----- fs/f2fs/file.c | 58 ++++++++++++++++++++++---------------------------------- fs/f2fs/inline.c | 3 +++ 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e34b1bdfc995..cf0c9dda0365 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1573,11 +1573,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int err; /* we don't need to use inline_data strictly */ - if (f2fs_has_inline_data(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } + err = f2fs_convert_inline_inode(inode); + if (err) + return err; if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7f8ca47be0af..f2effe18d3c5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -418,19 +418,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + int err; if (f2fs_encrypted_inode(inode)) { - int err = f2fs_get_encryption_info(inode); + err = f2fs_get_encryption_info(inode); if (err) return 0; } /* we don't need to use inline_data strictly */ - if (f2fs_has_inline_data(inode)) { - int err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } + err = f2fs_convert_inline_inode(inode); + if (err) + return err; file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; @@ -604,7 +603,7 @@ int f2fs_truncate(struct inode *inode, bool lock) trace_f2fs_truncate(inode); /* we should check inline_data size */ - if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { + if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -688,8 +687,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) truncate_setsize(inode, attr->ia_size); /* should convert inline inode here */ - if (f2fs_has_inline_data(inode) && - !f2fs_may_inline_data(inode)) { + if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -786,13 +784,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) { pgoff_t pg_start, pg_end; loff_t off_start, off_end; - int ret = 0; + int ret; - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -951,11 +947,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(F2FS_I_SB(inode)); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = offset >> PAGE_CACHE_SHIFT; pg_end = (offset + len) >> PAGE_CACHE_SHIFT; @@ -1001,11 +995,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_balance_fs(sbi); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) @@ -1114,11 +1106,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1168,11 +1158,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bda7126466c0..8090854dd29c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -177,6 +177,9 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; + if (!f2fs_has_inline_data(inode)) + return 0; + page = grab_cache_page(inode->i_mapping, 0); if (!page) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 00623e6bcf40b03b39f612cef9a744453cf3e2a8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 11:56:08 -0800 Subject: f2fs: avoid unnecessary f2fs_gc for dir operations The f2fs_balance_fs doesn't need to cover f2fs_new_inode or f2fs_find_entry works. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2c32110f9fc0..4e27c5c4b05c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -288,12 +288,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) int err = -ENOENT; trace_f2fs_unlink_enter(dir, dentry); - f2fs_balance_fs(sbi); de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) goto fail; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) { @@ -341,8 +342,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (len > dir->i_sb->s_blocksize) return -ENAMETOOLONG; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -353,6 +352,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -433,8 +434,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -444,6 +443,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + f2fs_balance_fs(sbi); + set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -481,8 +482,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -490,6 +489,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -516,9 +517,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; - if (!whiteout) - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -532,6 +530,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -604,8 +604,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) goto out; @@ -635,6 +633,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_whiteout; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -666,6 +666,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_inode); update_inode_page(new_inode); } else { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(new_dentry, old_inode); @@ -763,8 +765,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode))) return -EPERM; - f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) goto out; @@ -807,6 +807,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_new_dir; } + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); -- cgit v1.2.3-59-g8ed1b From 93bae099eaa0ae784fbe4d9eddcdc54fb5812466 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 12:59:54 -0800 Subject: f2fs: record node block allocation in dnode_of_data This patch introduces recording node block allocation in dnode_of_data. This information helps to figure out whether any node block is allocated during specific file operations. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf0c9dda0365..a7a9a05d012a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -226,6 +226,7 @@ void set_data_blkaddr(struct dnode_of_data *dn) addr_array = blkaddr_in_node(rn); addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); + dn->node_changed = true; } int reserve_new_block(struct dnode_of_data *dn) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 90fb970e2b98..3e4a60da408f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -546,6 +546,7 @@ struct dnode_of_data { nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ bool inode_page_locked; /* inode page is locked or not */ + bool node_changed; /* is node block changed */ block_t data_blkaddr; /* block address of the node block */ }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6cc8ac7e185a..341de5d2353b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -542,6 +542,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); + dn->node_changed = true; done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); @@ -678,6 +679,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (ret < 0) goto out_err; set_nid(page, i, 0, false); + dn->node_changed = true; } } else { child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; @@ -691,6 +693,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { set_nid(page, i, 0, false); + dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { goto out_err; @@ -752,6 +755,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, if (err < 0) goto fail; set_nid(pages[idx], i, 0, false); + dn->node_changed = true; } if (offset[idx + 1] == 0) { @@ -1153,6 +1157,7 @@ void sync_inode_page(struct dnode_of_data *dn) } else { update_inode_page(dn->inode); } + dn->node_changed = true; } int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, -- cgit v1.2.3-59-g8ed1b From 3104af35eb6a2452ccc9912997e7728777100de2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:11:43 +0800 Subject: f2fs: reduce covered region of sbi->cp_rwsem in f2fs_map_blocks Only cover sbi->cp_rwsem on one dnode page's allocation and modification instead of multiple's in f2fs_map_blocks, it can reduce the covered region of cp_rwsem, then we can avoid potential long time delay for concurrent checkpointer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7a9a05d012a..82ecaa30fd77 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -590,7 +590,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } if (create) - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -647,6 +647,11 @@ get_next: allocated = false; f2fs_put_dnode(&dn); + if (create) { + f2fs_unlock_op(sbi); + f2fs_lock_op(sbi); + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, mode); if (err) { @@ -702,7 +707,7 @@ put_out: f2fs_put_dnode(&dn); unlock_out: if (create) - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); out: trace_f2fs_map_blocks(inode, map, err); return err; -- cgit v1.2.3-59-g8ed1b From 2a3407607028f7c780f1c20faa4e922bf631d340 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 13:23:35 -0800 Subject: f2fs: call f2fs_balance_fs only when node was changed If user tries to update or read data, we don't need to call f2fs_balance_fs which triggers f2fs_gc, which increases unnecessary long latency. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 26 ++++++++++++++++++++++---- fs/f2fs/file.c | 26 +++++++++----------------- fs/f2fs/inline.c | 4 ++++ 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 82ecaa30fd77..958d8261b258 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -509,7 +509,6 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, u64 end_offset; while (len) { - f2fs_balance_fs(sbi); f2fs_lock_op(sbi); /* When reading holes, we need its node page */ @@ -542,6 +541,9 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + + if (dn.node_changed) + f2fs_balance_fs(sbi); } return; @@ -551,6 +553,8 @@ sync_out: f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); return; } @@ -649,6 +653,8 @@ get_next: if (create) { f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); f2fs_lock_op(sbi); } @@ -706,8 +712,11 @@ sync_out: put_out: f2fs_put_dnode(&dn); unlock_out: - if (create) + if (create) { f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); + } out: trace_f2fs_map_blocks(inode, map, err); return err; @@ -1415,8 +1424,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); - f2fs_balance_fs(sbi); - /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -1466,6 +1473,17 @@ put_next: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + if (dn.node_changed && has_not_enough_free_secs(sbi, 0)) { + unlock_page(page); + f2fs_balance_fs(sbi); + lock_page(page); + if (page->mapping != mapping) { + /* The page got truncated from under us */ + f2fs_put_page(page, 1); + goto repeat; + } + } + f2fs_wait_on_page_writeback(page, DATA); /* wait for GCed encrypted page writeback */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f2effe18d3c5..888ce4765779 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct dnode_of_data dn; int err; - f2fs_balance_fs(sbi); - sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -57,6 +55,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); + file_update_time(vma->vm_file); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || @@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } go_write: - /* guarantee free sections for fsync */ - f2fs_balance_fs(sbi); - /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. @@ -267,6 +265,8 @@ sync_nodes: if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); + + f2fs_balance_fs(sbi); goto sync_nodes; } @@ -945,8 +945,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode)); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; @@ -993,8 +991,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - f2fs_balance_fs(sbi); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; @@ -1104,12 +1100,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) return -EINVAL; - f2fs_balance_fs(sbi); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; + f2fs_balance_fs(sbi); + ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) return ret; @@ -1152,8 +1148,6 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t off_start, off_end; int ret = 0; - f2fs_balance_fs(sbi); - ret = inode_newsize_ok(inode, (len + offset)); if (ret) return ret; @@ -1162,6 +1156,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; + f2fs_balance_fs(sbi); + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -1349,8 +1345,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_balance_fs(F2FS_I_SB(inode)); - if (f2fs_is_atomic_file(inode)) return 0; @@ -1437,8 +1431,6 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode)); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); commit_inmem_pages(inode, true); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8090854dd29c..c24e5d93720d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -202,6 +202,10 @@ out: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); + + if (dn.node_changed) + f2fs_balance_fs(sbi); + return err; } -- cgit v1.2.3-59-g8ed1b From c34f42e2cb2d27650549306de5ff36839e9177d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:50:30 +0800 Subject: f2fs: report error of do_checkpoint do_checkpoint and write_checkpoint can fail due to reasons like triggering in a readonly fs or encountering IO error of storage device. So it's better to report such error info to user, let user be aware of failure of doing checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 29 +++++++++++++++++++---------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 5 +++-- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 5 +++-- fs/f2fs/super.c | 5 +++-- 6 files changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fdd43f71d2c6..9cdb16197351 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -910,7 +910,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) finish_wait(&sbi->cp_wait, &wait); } -static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -936,7 +936,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; } next_free_nid(sbi, &last_nid); @@ -1021,7 +1021,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* need to wait for end_io results */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1049,7 +1049,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); @@ -1075,19 +1075,22 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) release_ino_entry(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); + + return 0; } /* * We guarantee that this checkpoint procedure will not fail. */ -void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; + int err = 0; mutex_lock(&sbi->cp_mutex); @@ -1095,14 +1098,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || (cpc->reason == CP_DISCARD && !sbi->discard_blks))) goto out; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; goto out; - if (f2fs_readonly(sbi->sb)) + } + if (f2fs_readonly(sbi->sb)) { + err = -EROFS; goto out; + } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); - if (block_operations(sbi)) + err = block_operations(sbi); + if (err) goto out; trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); @@ -1124,7 +1132,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ - do_checkpoint(sbi, cpc); + err = do_checkpoint(sbi, cpc); unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); @@ -1138,6 +1146,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); + return err; } void init_ino_entry_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e4a60da408f..79345e7ce6bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1838,7 +1838,7 @@ void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_inode(struct inode *); void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); -void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); +int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 888ce4765779..780db8bd2451 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1618,6 +1618,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct cp_control cpc; + int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1628,10 +1629,10 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); - return 0; + return err; } static int f2fs_defragment_range(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7fcb6e49deff..589b20b8677b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -623,7 +623,7 @@ out: .reason = CP_RECOVERY, }; mutex_unlock(&sbi->cp_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); } else { mutex_unlock(&sbi->cp_mutex); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c2474509e5de..a3474bad5770 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1118,6 +1118,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; + int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -1148,12 +1149,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) sbi->segs_per_sec) - 1, end_segno); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); - return 0; + return err; } static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c3070c149c0e..597b533634e0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -579,6 +579,7 @@ static void f2fs_put_super(struct super_block *sb) int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err = 0; trace_f2fs_sync_fs(sb, sync); @@ -588,14 +589,14 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); } f2fs_trace_ios(NULL, 1); - return 0; + return err; } static int f2fs_freeze(struct super_block *sb) -- cgit v1.2.3-59-g8ed1b From fba48a8b14f405afc5c80a93ed64a12607dd52c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:51:35 +0800 Subject: f2fs: don't convert inline inode when inline_data option is disable If inline_data option is disable, when truncating an inline inode with size which is not exceed maxinum inline size, we should not convert inline inode to regular one to avoid the overhead of synchronizing conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 3 --- fs/f2fs/namei.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c24e5d93720d..5ffbd169b719 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,9 +16,6 @@ bool f2fs_may_inline_data(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) - return false; - if (f2fs_is_atomic_file(inode)) return false; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4e27c5c4b05c..e439f32d31e6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_may_inline_data(inode)) + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); -- cgit v1.2.3-59-g8ed1b From 2aadac085cf0ca3e0295988d4d1dbdeafc15a9f6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 11:55:18 -0800 Subject: f2fs: introduce prepare_write_begin to clean up This patch adds prepare_write_begin to clean f2fs_write_begin. The major role of this function is to convert any inline_data and allocate or find block address. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 92 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 958d8261b258..d4839fc2b4ca 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1410,6 +1410,51 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) } } +static int prepare_write_begin(struct f2fs_sb_info *sbi, + struct page *page, loff_t pos, unsigned len, + block_t *blk_addr, bool *node_changed) +{ + struct inode *inode = page->mapping->host; + pgoff_t index = page->index; + struct dnode_of_data dn; + struct page *ipage; + int err = 0; + + f2fs_lock_op(sbi); + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_out; + } + + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto done; + } else { + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto err_out; + } + } + err = f2fs_get_block(&dn, index); +done: + /* convert_inline_page can make node_changed */ + *blk_addr = dn.data_blkaddr; + *node_changed = dn.node_changed; +err_out: + f2fs_put_dnode(&dn); +unlock_out: + f2fs_unlock_op(sbi); + return err; +} + static int f2fs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1417,9 +1462,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page = NULL; - struct page *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; - struct dnode_of_data dn; + bool need_balance = false; + block_t blkaddr = NULL_ADDR; int err = 0; trace_f2fs_write_begin(inode, pos, len, flags); @@ -1443,37 +1488,12 @@ repeat: *pagep = page; - f2fs_lock_op(sbi); - - /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto unlock_fail; - } - - set_new_dnode(&dn, inode, ipage, ipage, 0); - - if (f2fs_has_inline_data(inode)) { - if (pos + len <= MAX_INLINE_DATA) { - read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - sync_inode_page(&dn); - goto put_next; - } - err = f2fs_convert_inline_page(&dn, page); - if (err) - goto put_fail; - } - - err = f2fs_get_block(&dn, index); + err = prepare_write_begin(sbi, page, pos, len, + &blkaddr, &need_balance); if (err) - goto put_fail; -put_next: - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + goto fail; - if (dn.node_changed && has_not_enough_free_secs(sbi, 0)) { + if (need_balance && has_not_enough_free_secs(sbi, 0)) { unlock_page(page); f2fs_balance_fs(sbi); lock_page(page); @@ -1488,7 +1508,7 @@ put_next: /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); if (len == PAGE_CACHE_SIZE) goto out_update; @@ -1504,14 +1524,14 @@ put_next: goto out_update; } - if (dn.data_blkaddr == NEW_ADDR) { + if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, .rw = READ_SYNC, - .blk_addr = dn.data_blkaddr, + .blk_addr = blkaddr, .page = page, .encrypted_page = NULL, }; @@ -1542,10 +1562,6 @@ out_clear: clear_cold_data(page); return 0; -put_fail: - f2fs_put_dnode(&dn); -unlock_fail: - f2fs_unlock_op(sbi); fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); -- cgit v1.2.3-59-g8ed1b From 4aa69d5667914dd0844d98ad84804b79a4845fa3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 14:17:47 -0800 Subject: f2fs: return early when trying to read null nid If get_node_page() gets zero nid, we can return early without getting a wrong page. For example, get_dnode_of_data() can try to do that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 341de5d2353b..929265d20c32 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1059,6 +1059,10 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { struct page *page; int err; + + if (!nid) + return ERR_PTR(-ENOENT); + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); repeat: page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) -- cgit v1.2.3-59-g8ed1b From b4d07a3e1a6e783132be7506aeb171dc5728f077 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 13:48:58 -0800 Subject: f2fs: avoid f2fs_lock_op in f2fs_write_begin If f2fs_write_begin is to update data, we can bypass calling f2fs_lock_op() in order to avoid the checkpoint latency in the write syscall. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d4839fc2b4ca..f2a023edfc1d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1418,10 +1418,16 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, pgoff_t index = page->index; struct dnode_of_data dn; struct page *ipage; + bool locked = false; + struct extent_info ei; int err = 0; - f2fs_lock_op(sbi); - + if (f2fs_has_inline_data(inode) || + (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + f2fs_lock_op(sbi); + locked = true; + } +restart: /* check inline_data */ ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -1436,22 +1442,42 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, read_inline_data(page, ipage); set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); - goto done; } else { err = f2fs_convert_inline_page(&dn, page); if (err) - goto err_out; + goto out; + if (dn.data_blkaddr == NULL_ADDR) + err = f2fs_get_block(&dn, index); + } + } else if (locked) { + err = f2fs_get_block(&dn, index); + } else { + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + } else { + bool restart = false; + + /* hole case */ + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err || (!err && dn.data_blkaddr == NULL_ADDR)) + restart = true; + if (restart) { + f2fs_put_dnode(&dn); + f2fs_lock_op(sbi); + locked = true; + goto restart; + } } } - err = f2fs_get_block(&dn, index); -done: + /* convert_inline_page can make node_changed */ *blk_addr = dn.data_blkaddr; *node_changed = dn.node_changed; -err_out: +out: f2fs_put_dnode(&dn); unlock_out: - f2fs_unlock_op(sbi); + if (locked) + f2fs_unlock_op(sbi); return err; } -- cgit v1.2.3-59-g8ed1b From 06d6f2263913029ccd6199fd10e7dca525d348b1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 14:56:09 -0800 Subject: f2fs: declare static function The __f2fs_commit_super is static. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 597b533634e0..75704d9caae2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1195,7 +1195,7 @@ next: return 0; } -int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *bh; -- cgit v1.2.3-59-g8ed1b From d53841740fd7feec170339203b198020ff100c58 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:03:29 +0800 Subject: f2fs: add missing f2fs_balance_fs in __recover_dot_dentries __recover_do_dentries will try to grab free space in storage, so fix to add missing f2fs_balance_fs here. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e439f32d31e6..fb41c8082696 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -214,6 +214,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) struct page *page; int err = 0; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); de = f2fs_find_entry(dir, &dot, &page); -- cgit v1.2.3-59-g8ed1b From 6d5a1495eebd441216dc96913a4270100b26e104 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:04:56 +0800 Subject: f2fs: let user being aware of IO error Sometimes we keep dumb when IO error occur in lower layer device, so user will not receive any error return value for some operation, but actually, the operation did not succeed. This sould be avoided, so this patch reports such kind of error to user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 17 +++++++---------- fs/f2fs/data.c | 23 +++++++++++++---------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 4 +++- fs/f2fs/gc.c | 4 +++- fs/f2fs/node.c | 5 +++++ 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9cdb16197351..6b89ac69b7e4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -798,7 +798,7 @@ void remove_dirty_inode(struct inode *inode) } } -void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -810,7 +810,7 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; spin_lock(&sbi->inode_lock[type]); @@ -820,7 +820,7 @@ retry: trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, get_pages(sbi, is_dir ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); - return; + return 0; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); @@ -859,11 +859,9 @@ retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - sync_dirty_inodes(sbi, DIR_INODE); - if (unlikely(f2fs_cp_error(sbi))) { - err = -EIO; + err = sync_dirty_inodes(sbi, DIR_INODE); + if (err) goto out; - } goto retry_flush_dents; } @@ -876,10 +874,9 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - sync_node_pages(sbi, 0, &wbc); - if (unlikely(f2fs_cp_error(sbi))) { + err = sync_node_pages(sbi, 0, &wbc); + if (err) { f2fs_unlock_all(sbi); - err = -EIO; goto out; } goto retry_flush_nodes; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f2a023edfc1d..5c43b2d606ec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -498,7 +498,7 @@ alloc: return 0; } -static void __allocate_data_blocks(struct inode *inode, loff_t offset, +static int __allocate_data_blocks(struct inode *inode, loff_t offset, size_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -507,13 +507,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, u64 len = F2FS_BYTES_TO_BLK(count); bool allocated; u64 end_offset; + int err = 0; while (len) { f2fs_lock_op(sbi); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) goto out; allocated = false; @@ -522,12 +524,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, while (dn.ofs_in_node < end_offset && len) { block_t blkaddr; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; goto sync_out; + } blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { - if (__allocate_data_block(&dn)) + err = __allocate_data_block(&dn); + if (err) goto sync_out; allocated = true; } @@ -545,7 +550,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, if (dn.node_changed) f2fs_balance_fs(sbi); } - return; + return err; sync_out: if (allocated) @@ -555,7 +560,7 @@ out: f2fs_unlock_op(sbi); if (dn.node_changed) f2fs_balance_fs(sbi); - return; + return err; } /* @@ -1653,11 +1658,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == WRITE) { - __allocate_data_blocks(inode, offset, count); - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { - err = -EIO; + err = __allocate_data_blocks(inode, offset, count); + if (err) goto out; - } } err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 79345e7ce6bb..3406e9966064 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1837,7 +1837,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_inode(struct inode *); -void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); +int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 780db8bd2451..2d87a3cf6768 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -259,8 +259,10 @@ sync_nodes: sync_node_pages(sbi, ino, &wbc); /* if cp_error was enabled, we should avoid infinite loop */ - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; goto out; + } if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ce350c44b5cf..c09be339569c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -832,8 +832,10 @@ gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; goto stop; + } if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { gc_type = FG_GC; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 929265d20c32..94d9753f8c53 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1189,6 +1189,11 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + if (unlikely(f2fs_cp_error(sbi))) { + pagevec_release(&pvec); + return -EIO; + } + /* * flushing sequence with step: * 0. indirect nodes -- cgit v1.2.3-59-g8ed1b From 9a950d52b7f0e1c64c2cc70d350562fb18c8b451 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Sat, 26 Dec 2015 18:07:41 +0800 Subject: f2fs: fix bugs and simplify codes of f2fs_fiemap fix bugs: 1. len could be updated incorrectly when start+len is beyond isize. 2. If there is a hole consisting of more than two blocks, it could fail to add FIEMAP_EXTENT_LAST flag for the last extent. 3. If there is an extent beyond isize, when we search extents in a range that ends at isize, it will also return the extent beyond isize, which is outside the range. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 80 ++++++++++++++++++++-------------------------------------- 1 file changed, 27 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5c43b2d606ec..d67c599510d9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -783,7 +783,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t isize = i_size_read(inode); u64 logical = 0, phys = 0, size = 0; u32 flags = 0; - bool past_eof = false, whole_file = false; int ret = 0; ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); @@ -797,17 +796,18 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } mutex_lock(&inode->i_mutex); + if (start >= isize) + goto out; - if (len >= isize) { - whole_file = true; - len = isize; - } + if (start + len > isize) + len = isize - start; if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); start_blk = logical_to_blk(inode, start); last_blk = logical_to_blk(inode, start + len - 1); + next: memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; @@ -819,59 +819,33 @@ next: /* HOLE */ if (!buffer_mapped(&map_bh)) { - start_blk++; - - if (!past_eof && blk_to_logical(inode, start_blk) >= isize) - past_eof = 1; - - if (past_eof && size) { - flags |= FIEMAP_EXTENT_LAST; - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - } else if (size) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - size = 0; - } + /* Go through holes util pass the EOF */ + if (blk_to_logical(inode, start_blk++) < isize) + goto prep_next; + /* Found a hole beyond isize means no more extents. + * Note that the premise is that filesystems don't + * punch holes beyond isize and keep size unchanged. + */ + flags |= FIEMAP_EXTENT_LAST; + } - /* if we have holes up to/past EOF then we're done */ - if (start_blk > last_blk || past_eof || ret) - goto out; - } else { - if (start_blk > last_blk && !whole_file) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - goto out; - } + if (size) + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); - /* - * if size != 0 then we know we already have an extent - * to add, so add it. - */ - if (size) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - if (ret) - goto out; - } + if (start_blk > last_blk || ret) + goto out; - logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, map_bh.b_blocknr); - size = map_bh.b_size; - flags = 0; - if (buffer_unwritten(&map_bh)) - flags = FIEMAP_EXTENT_UNWRITTEN; + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; + flags = 0; + if (buffer_unwritten(&map_bh)) + flags = FIEMAP_EXTENT_UNWRITTEN; - start_blk += logical_to_blk(inode, size); + start_blk += logical_to_blk(inode, size); - /* - * If we are past the EOF, then we need to make sure as - * soon as we find a hole that the last extent we found - * is marked with FIEMAP_EXTENT_LAST - */ - if (!past_eof && logical + size >= isize) - past_eof = true; - } +prep_next: cond_resched(); if (fatal_signal_pending(current)) ret = -EINTR; -- cgit v1.2.3-59-g8ed1b From 179448bfe4cd201e98e728391c6b01b25c849fe8 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 28 Dec 2015 21:48:32 +0800 Subject: f2fs: add a max block check for get_data_block_bmap This patch adds a max block check for get_data_block_bmap. Trinity test program will send a block number as parameter into ioctl_fibmap, which will be used in get_node_path(), when the block number large than f2fs max blocks, it will trigger kernel bug. Signed-off-by: Yunlei He Signed-off-by: Xue Liu [Jaegeuk Kim: fix missing condition, pointed by Chao Yu] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d67c599510d9..6fbfc70ac8a0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -761,6 +761,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, static int get_data_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + /* Block number less than F2FS MAX BLOCKS */ + if (unlikely(iblock >= max_file_size(0))) + return -EFBIG; + return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3406e9966064..e04b2be6cd64 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1726,6 +1726,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); +loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 75704d9caae2..a2e3a8f893ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -907,7 +907,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static loff_t max_file_size(unsigned bits) +loff_t max_file_size(unsigned bits) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; -- cgit v1.2.3-59-g8ed1b From e96248bb45d42375b23e1c083ec5a55151503e82 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:11:32 +0800 Subject: f2fs: clean up f2fs_ioc_write_checkpoint Use f2fs_sync_fs to clean up codes in f2fs_ioc_write_checkpoint. Signed-off-by: Chao Yu [Jaegeuk Kim: remove unused err variable] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2d87a3cf6768..91f576a7903c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1619,8 +1619,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct cp_control cpc; - int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1628,13 +1626,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; - cpc.reason = __get_cp_reason(sbi); - - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - - return err; + return f2fs_sync_fs(sbi->sb, 1); } static int f2fs_defragment_range(struct f2fs_sb_info *sbi, -- cgit v1.2.3-59-g8ed1b From 8dc0d6a11e7d985dd466ce0a8c71eaea50dd7cc6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Dec 2015 16:13:09 -0800 Subject: f2fs: early check broken symlink length in the encrypted case If link is broken, its len is zero, and we don't need to move forward. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index fb41c8082696..6c4a94310b54 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -931,7 +931,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook { struct page *cpage = NULL; char *caddr, *paddr = NULL; - struct f2fs_str cstr; + struct f2fs_str cstr = FSTR_INIT(NULL, 0); struct f2fs_str pstr = FSTR_INIT(NULL, 0); struct inode *inode = d_inode(dentry); struct f2fs_encrypted_symlink_data *sd; @@ -952,6 +952,12 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook /* Symlink is encrypted */ sd = (struct f2fs_encrypted_symlink_data *)caddr; cstr.len = le16_to_cpu(sd->len); + + /* this is broken symlink case */ + if (unlikely(cstr.len == 0)) { + res = -ENOENT; + goto errout; + } cstr.name = kmalloc(cstr.len, GFP_NOFS); if (!cstr.name) { res = -ENOMEM; @@ -960,7 +966,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook memcpy(cstr.name, sd->encrypted_path, cstr.len); /* this is broken symlink case */ - if (cstr.name[0] == 0 && cstr.len == 0) { + if (unlikely(cstr.name[0] == 0)) { res = -ENOENT; goto errout; } -- cgit v1.2.3-59-g8ed1b From 819d9153d4c87329910a4cb01198610cd24ec62d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Dec 2015 13:48:11 -0800 Subject: f2fs: use i_size_read to get i_size We need to use i_size_read() to get inode->i_size. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fbfc70ac8a0..14b40a9db5b3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1386,10 +1386,11 @@ skip_write: static void f2fs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; + loff_t i_size = i_size_read(inode); - if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); - truncate_blocks(inode, inode->i_size, true); + if (to > i_size) { + truncate_pagecache(inode, i_size); + truncate_blocks(inode, i_size, true); } } -- cgit v1.2.3-59-g8ed1b From ed3d12561a731b99b58c6c95151291cebf0b3feb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Dec 2015 11:39:06 -0800 Subject: f2fs: load largest extent all the time Otherwise, we can get mismatched largest extent information. One example is: 1. mount f2fs w/ extent_cache 2. make a small extent 3. umount 4. mount f2fs w/o extent_cache 5. update the largest extent 6. umount 7. mount f2fs w/ extent_cache 8. get the old extent made by #2 Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 18 +++++++++++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 3 ++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5305a29f91a3..b37184f720e8 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -166,20 +166,27 @@ static void __drop_largest_extent(struct inode *inode, largest->len = 0; } -void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +/* return true, if inode page is changed */ +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!f2fs_may_extent_tree(inode)) - return; + if (!f2fs_may_extent_tree(inode)) { + /* drop largest extent */ + if (i_ext && i_ext->len) { + i_ext->len = 0; + return true; + } + return false; + } et = __grab_extent_tree(inode); - if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) - return; + if (!i_ext || !i_ext->len) + return false; set_extent_info(&ei, le32_to_cpu(i_ext->fofs), le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); @@ -196,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) } out: write_unlock(&et->lock); + return false; } static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e04b2be6cd64..a3395088e0f0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2083,7 +2083,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); * extent_cache.c */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); -void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); +bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ec3fb32c4726..e95500802daa 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - f2fs_init_extent_tree(inode, &ri->i_ext); + if (f2fs_init_extent_tree(inode, &ri->i_ext)) + set_page_dirty(node_page); get_inline_info(fi, ri); -- cgit v1.2.3-59-g8ed1b From 4e0d836d5fb26d2cdbb75b0d16d98bef6b798490 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Dec 2015 17:40:31 +0800 Subject: f2fs: fix to skip recovering dot dentries in a readonly fs If filesystem is readonly, leave user message info instead of recovering inline dot inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6c4a94310b54..a629af5cb0ce 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -214,6 +214,13 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) struct page *page; int err = 0; + if (f2fs_readonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "skip recovering inline_dots inode (ino:%lu, pino:%u) " + "in readonly mountpoint", dir->i_ino, pino); + return 0; + } + f2fs_balance_fs(sbi); f2fs_lock_op(sbi); -- cgit v1.2.3-59-g8ed1b From 732d56489f21c04f7bf60c675f7d152c9239a09c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Dec 2015 15:46:33 -0800 Subject: f2fs: fix f2fs_ioc_abort_volatile_write There are two rules to handle aborting volatile or atomic writes. 1. drop atomic writes - we don't need to keep any stale db data. 2. write journal data - we should keep the journal data with fsync for db recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 91f576a7903c..b04ab40ddc73 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1433,9 +1433,14 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (ret) return ret; - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - commit_inmem_pages(inode, true); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + commit_inmem_pages(inode, true); + } + if (f2fs_is_volatile_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + } mnt_drop_write_file(filp); return ret; -- cgit v1.2.3-59-g8ed1b From 1f6fa26199bb164157fbf81f850df1991d10c959 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 10:28:52 -0800 Subject: f2fs: remove f2fs_bug_on in terms of max_depth There is no report on this bug_on case, but if malicious attacker changed this field intentionally, we can just reset it as a MAX value. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3da58265c0d4..29bb8dd76a46 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, namehash = f2fs_dentry_hash(&name); - f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); - nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, goto out; max_depth = F2FS_I(dir)->i_current_depth; + if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { + f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, + "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); + max_depth = MAX_DIR_HASH_DEPTH; + F2FS_I(dir)->i_current_depth = max_depth; + mark_inode_dirty(dir); + } for (level = 0; level < max_depth; level++) { de = find_in_level(dir, level, &fname, res_page); -- cgit v1.2.3-59-g8ed1b From 8d4ea29b6426470456ee9daee64bac55a3b13289 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 13:08:02 -0800 Subject: f2fs: write pending bios when cp_error is set When testing ioc_shutdown, put_super is able to be hanged by waiting for writebacking pages as follows. INFO: task umount:2723 blocked for more than 120 seconds. Tainted: G O 4.4.0-rc3+ #8 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. umount D ffff88000859f9d8 0 2723 2110 0x00000000 ffff88000859f9d8 0000000000000000 0000000000000000 ffffffff81e11540 ffff880078c225c0 ffff8800085a0000 ffff88007fc17440 7fffffffffffffff ffffffff818239f0 ffff88000859fb48 ffff88000859f9f0 ffffffff8182310c Call Trace: [] ? bit_wait+0x50/0x50 [] schedule+0x3c/0x90 [] schedule_timeout+0x2d9/0x430 [] ? mark_held_locks+0x6f/0xa0 [] ? ktime_get+0x7d/0x140 [] ? bit_wait+0x50/0x50 [] ? kvm_clock_get_cycles+0x25/0x30 [] ? ktime_get+0xac/0x140 [] ? bit_wait+0x50/0x50 [] io_schedule_timeout+0xa4/0x110 [] bit_wait_io+0x35/0x50 [] __wait_on_bit+0x5d/0x90 [] wait_on_page_bit+0xcb/0xf0 [] ? autoremove_wake_function+0x40/0x40 [] truncate_inode_pages_range+0x4bc/0x840 [] truncate_inode_pages_final+0x4d/0x60 [] f2fs_evict_inode+0x75/0x400 [f2fs] [] evict+0xbc/0x190 [] iput+0x229/0x2c0 [] f2fs_put_super+0x105/0x1a0 [f2fs] [] generic_shutdown_super+0x6a/0xf0 [] kill_block_super+0x27/0x70 [] kill_f2fs_super+0x20/0x30 [f2fs] [] deactivate_locked_super+0x43/0x70 [] deactivate_super+0x5c/0x60 [] cleanup_mnt+0x3f/0x90 [] __cleanup_mnt+0x12/0x20 [] task_work_run+0x73/0xa0 [] exit_to_usermode_loop+0xcc/0xd0 [] syscall_return_slowpath+0xcc/0xe0 [] int_ret_from_sys_call+0x25/0x9f Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6b89ac69b7e4..5dbafd5e83d9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); - if (wbc->for_reclaim) + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_bio(sbi, META, WRITE); return 0; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 14b40a9db5b3..4851e84d0283 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1179,7 +1179,7 @@ out: unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); - if (wbc->for_reclaim) { + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94d9753f8c53..669c44ef9303 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1368,7 +1368,7 @@ static int f2fs_write_node_page(struct page *page, up_read(&sbi->node_write); unlock_page(page); - if (wbc->for_reclaim) + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_bio(sbi, NODE, WRITE); return 0; -- cgit v1.2.3-59-g8ed1b From c46a155bdf3c8877719aa63d1bf1d6e79e2a9764 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 13:49:17 -0800 Subject: f2fs: use IPU for fdatasync This patch fixes missing IPU condition when fdatasync is called. With this patch, fdatasync is able to avoid additional node writes for recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b04ab40ddc73..e3d32f6b4b4f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -202,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_f2fs_sync_file_enter(inode); /* if fdatasync is triggered, let's do in-place-update */ - if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) + if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(fi, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); clear_inode_flag(fi, FI_NEED_IPU); -- cgit v1.2.3-59-g8ed1b From c00ba5548500a6f5dfd3c0e0300b338b584018ba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 15:24:14 -0800 Subject: f2fs: monitor zombie_tree count This patch adds an entry to show the number of zombie extent_tree. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ed5dfcc8886f..b73e8e133c8b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; si->total_ext = atomic64_read(&sbi->total_hit_ext); si->ext_tree = atomic_read(&sbi->total_ext_tree); + si->zombie_tree = atomic_read(&sbi->total_zombie_tree); si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -292,8 +293,8 @@ static int stat_show(struct seq_file *s, void *v) !si->total_ext ? 0 : div64_u64(si->hit_total * 100, si->total_ext), si->hit_total, si->total_ext); - seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", - si->ext_tree, si->ext_node); + seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", + si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - inmem: %4d, wb: %4d\n", si->inmem_pages, si->wb_pages); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a3395088e0f0..d81bf5a43714 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1890,7 +1890,7 @@ struct f2fs_stat_info { int main_area_segs, main_area_sections, main_area_zones; unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; - int ext_tree, ext_node; + int ext_tree, zombie_tree, ext_node; int ndirty_node, ndirty_meta; int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; int nats, dirty_nats, sits, dirty_sits, fnids; -- cgit v1.2.3-59-g8ed1b From 137d09f002df7d4e52513d75f8910945a6c1bb08 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 15:02:16 -0800 Subject: f2fs: introduce zombie list for fast shrinking extent trees This patch removes refcount, and instead, adds zombie_list to shrink directly without radix tree traverse. Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 49 +++++++++++++++++++++---------------------------- fs/f2fs/f2fs.h | 3 ++- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index b37184f720e8..4dee2be9a648 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -68,13 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) et->root = RB_ROOT; et->cached_en = NULL; rwlock_init(&et->lock); - atomic_set(&et->refcount, 0); + INIT_LIST_HEAD(&et->list); et->count = 0; atomic_inc(&sbi->total_ext_tree); } else { atomic_dec(&sbi->total_zombie_tree); + list_del_init(&et->list); } - atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); /* never died until evict_inode */ @@ -551,9 +551,9 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; + struct extent_tree *et, *next; struct extent_node *en, *tmp; unsigned long ino = F2FS_ROOT_INO(sbi); - struct radix_tree_root *root = &sbi->extent_tree_root; unsigned int found; unsigned int node_cnt = 0, tree_cnt = 0; int remained; @@ -569,29 +569,20 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) goto out; /* 1. remove unreferenced extent tree */ - while ((found = radix_tree_gang_lookup(root, - (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { - unsigned i; - - ino = treevec[found - 1]->ino + 1; - for (i = 0; i < found; i++) { - struct extent_tree *et = treevec[i]; - - if (!atomic_read(&et->refcount)) { - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, true); - write_unlock(&et->lock); + list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); - radix_tree_delete(root, et->ino); - kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - atomic_dec(&sbi->total_zombie_tree); - tree_cnt++; + list_del_init(&et->list); + radix_tree_delete(&sbi->extent_tree_root, et->ino); + kmem_cache_free(extent_tree_slab, et); + atomic_dec(&sbi->total_ext_tree); + atomic_dec(&sbi->total_zombie_tree); + tree_cnt++; - if (node_cnt + tree_cnt >= nr_shrink) - goto unlock_out; - } - } + if (node_cnt + tree_cnt >= nr_shrink) + goto unlock_out; } up_write(&sbi->extent_tree_lock); @@ -619,7 +610,7 @@ free_node: */ ino = F2FS_ROOT_INO(sbi); - while ((found = radix_tree_gang_lookup(root, + while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { unsigned i; @@ -670,8 +661,10 @@ void f2fs_destroy_extent_tree(struct inode *inode) return; if (inode->i_nlink && !is_bad_inode(inode) && et->count) { - atomic_dec(&et->refcount); + down_write(&sbi->extent_tree_lock); + list_add_tail(&et->list, &sbi->zombie_list); atomic_inc(&sbi->total_zombie_tree); + up_write(&sbi->extent_tree_lock); return; } @@ -680,8 +673,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* delete extent tree entry in radix tree */ down_write(&sbi->extent_tree_lock); - atomic_dec(&et->refcount); - f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + f2fs_bug_on(sbi, et->count); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); @@ -737,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); atomic_set(&sbi->total_ext_tree, 0); + INIT_LIST_HEAD(&sbi->zombie_list); atomic_set(&sbi->total_zombie_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d81bf5a43714..e2990c978661 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -359,8 +359,8 @@ struct extent_tree { struct rb_root root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ struct extent_info largest; /* largested extent info */ + struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ - atomic_t refcount; /* reference count of rb-tree */ unsigned int count; /* # of extent node in rb-tree*/ }; @@ -764,6 +764,7 @@ struct f2fs_sb_info { struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ atomic_t total_zombie_tree; /* extent zombie tree count */ atomic_t total_ext_node; /* extent info count */ -- cgit v1.2.3-59-g8ed1b From 3a9e6433a367211a172cb7b4d5b727c720bd0de0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Dec 2015 18:20:10 +0800 Subject: f2fs crypto: check CONFIG_F2FS_FS_XATTR for encrypted symlink Add missed CONFIG_F2FS_FS_XATTR for encrypted symlink inode in order to avoid unneeded registry of ->{get,set,remove}xattr. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a629af5cb0ce..0d61a6864ab1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1016,10 +1016,12 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .put_link = kfree_put_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = f2fs_listxattr, .removexattr = generic_removexattr, +#endif }; #endif -- cgit v1.2.3-59-g8ed1b From e0afc4d6d0d3e7e5a99f691bc64ae7c74bea790e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Dec 2015 14:35:37 +0800 Subject: f2fs: introduce max_file_blocks in sbi Introduce max_file_blocks in sbi to store max block index of file in f2fs, it could be used to avoid unneeded calculation of max block index in runtime. Signed-off-by: Chao Yu [Jaegeuk Kim: fix overflow of sbi->max_file_blocks] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4851e84d0283..89a978c57da9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -762,7 +762,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { /* Block number less than F2FS MAX BLOCKS */ - if (unlikely(iblock >= max_file_size(0))) + if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks)) return -EFBIG; return __get_data_block(inode, iblock, bh_result, create, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e2990c978661..882babaa678e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -783,6 +783,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ unsigned int total_valid_inode_count; /* valid inode count */ + loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ @@ -1727,7 +1728,6 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); -loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a2e3a8f893ed..0bbd756821a7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -907,7 +907,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -loff_t max_file_size(unsigned bits) +static loff_t max_file_blocks(void) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; @@ -923,7 +923,6 @@ loff_t max_file_size(unsigned bits) leaf_count *= NIDS_PER_BLOCK; result += leaf_count; - result <<= bits; return result; } @@ -1278,7 +1277,9 @@ try_onemore: if (err) goto free_options; - sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + sbi->max_file_blocks = max_file_blocks(); + sb->s_maxbytes = sbi->max_file_blocks << + le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); -- cgit v1.2.3-59-g8ed1b From a51311938e14c17f5a94d30baac9d7bec71f5858 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 2 Jan 2016 09:19:41 -0800 Subject: f2fs: cover more area with nat_tree_lock There was a subtle bug on nat cache management which incurs wrong nid allocation or wrong block addresses when try_to_free_nats is triggered heavily. This patch enlarges the previous coverage of nat_tree_lock to avoid data race. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 669c44ef9303..4dab09f141b7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, { struct nat_entry *e; - down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&e->ni, ne); } - up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + down_write(&nm_i->nat_tree_lock); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); @@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) cache: /* cache nat entry */ cache_nat_entry(NM_I(sbi), nid, &ne); + up_write(&nm_i->nat_tree_lock); } /* @@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne && - (!get_nat_flag(ne, IS_CHECKPOINTED) || + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); + down_read(&nm_i->nat_tree_lock); + while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } mutex_unlock(&curseg->curseg_mutex); + up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(sum, i); - down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (!ne) { ne = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&ne->ni, &raw_ne); } __set_nat_cache_dirty(nm_i, ne); - up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; - struct f2fs_nm_info *nm_i = NM_I(sbi); /* * there are two steps to flush nat entries: @@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, raw_ne = &nat_blk->entries[nid - start_nid]; } raw_nat_from_node_info(raw_ne, &ne->ni); - - down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - up_write(&NM_I(sbi)->nat_tree_lock); - if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); } @@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, set->entry_cnt); - down_write(&nm_i->nat_tree_lock); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - up_write(&nm_i->nat_tree_lock); kmem_cache_free(nat_entry_set_slab, set); } @@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; + + down_write(&nm_i->nat_tree_lock); + /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; @@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } - up_write(&nm_i->nat_tree_lock); /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); + up_write(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } -- cgit v1.2.3-59-g8ed1b From 957efb0c2144cc5ff1795f43bf2d2ca430eaa227 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 2 Jan 2016 09:23:27 -0800 Subject: Revert "f2fs: check the node block address of newly allocated nid" Original issue is fixed by: f2fs: cover more area with nat_tree_lock This reverts commit 24928634f81b1592e83b37dcd89ed45c28f12feb. --- fs/f2fs/node.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4dab09f141b7..6d5f548d2090 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1602,8 +1602,6 @@ retry: /* We should not use stale free nids created by build_free_nids */ if (nm_i->fcnt && !on_build_free_nids(nm_i)) { - struct node_info ni; - f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); list_for_each_entry(i, &nm_i->free_nid_list, list) if (i->state == NID_NEW) @@ -1614,13 +1612,6 @@ retry: i->state = NID_ALLOC; nm_i->fcnt--; spin_unlock(&nm_i->free_nid_list_lock); - - /* check nid is allocated already */ - get_node_info(sbi, *nid, &ni); - if (ni.blk_addr != NULL_ADDR) { - alloc_nid_done(sbi, *nid); - goto retry; - } return true; } spin_unlock(&nm_i->free_nid_list_lock); -- cgit v1.2.3-59-g8ed1b From de1475cc53b2d6442443dcf5d66ed0fc50ed3c7e Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 4 Jan 2016 15:56:50 +0800 Subject: f2fs: read isize while holding i_mutex in fiemap make sure the isize we read doesn't change during the process. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 89a978c57da9..ac5bea0f5f09 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -784,7 +784,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, { struct buffer_head map_bh; sector_t start_blk, last_blk; - loff_t isize = i_size_read(inode); + loff_t isize; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; @@ -800,6 +800,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); if (start >= isize) goto out; -- cgit v1.2.3-59-g8ed1b From e84587250ab7e38b7d85e93a8c317e065e5c0a1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:13:37 +0800 Subject: f2fs: check node id earily when readaheading node page Add node id check in ra_node_page and get_node_page_ra like get_node_page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6d5f548d2090..c1ddf3d88dd9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1041,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) struct page *apage; int err; + if (!nid) + return; + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + apage = find_get_page(NODE_MAPPING(sbi), nid); if (apage && PageUptodate(apage)) { f2fs_put_page(apage, 0); @@ -1108,6 +1112,7 @@ struct page *get_node_page_ra(struct page *parent, int start) nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); repeat: page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) @@ -1127,9 +1132,9 @@ repeat: end = start + MAX_RA_NODE; end = min(end, NIDS_PER_BLOCK); for (i = start + 1; i < end; i++) { - nid_t tnid = get_nid(parent, i, false); - if (!tnid) - continue; + nid_t tnid; + + tnid = get_nid(parent, i, false); ra_node_page(sbi, tnid); } -- cgit v1.2.3-59-g8ed1b From 0e022ea8fc49ed9c72ab9dcd9ca96414dc026184 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Jan 2016 16:52:46 +0800 Subject: f2fs: introduce __get_node_page to reuse common code There are duplicated code in between get_node_page and get_node_page_ra, introduce __get_node_page to includes common parts of these two, and export get_node_page and get_node_page_ra by reusing __get_node_page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 88 +++++++++++++++++++++++----------------------------------- 1 file changed, 35 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c1ddf3d88dd9..5a2d800f4abc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1060,56 +1060,35 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_put_page(apage, err ? 1 : 0); } -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +/* + * readahead MAX_RA_NODE number of node pages. + */ +void ra_node_pages(struct page *parent, int start) { - struct page *page; - int err; + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + struct blk_plug plug; + int i, end; + nid_t nid; - if (!nid) - return ERR_PTR(-ENOENT); - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); -repeat: - page = grab_cache_page(NODE_MAPPING(sbi), nid); - if (!page) - return ERR_PTR(-ENOMEM); + blk_start_plug(&plug); - err = read_node_page(page, READ_SYNC); - if (err < 0) { - f2fs_put_page(page, 1); - return ERR_PTR(err); - } else if (err == LOCKED_PAGE) { - goto page_hit; + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start; i < end; i++) { + nid = get_nid(parent, i, false); + ra_node_page(sbi, nid); } - lock_page(page); - - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_put_page(page, 1); - goto repeat; - } -page_hit: - f2fs_bug_on(sbi, nid != nid_of_node(page)); - return page; + blk_finish_plug(&plug); } -/* - * Return a locked page for the desired node page. - * And, readahead MAX_RA_NODE number of node pages. - */ -struct page *get_node_page_ra(struct page *parent, int start) +struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, + struct page *parent, int start) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); - struct blk_plug plug; struct page *page; - int err, i, end; - nid_t nid; + int err; - /* First, try getting the desired direct node. */ - nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); f2fs_bug_on(sbi, check_nid_range(sbi, nid)); @@ -1126,21 +1105,11 @@ repeat: goto page_hit; } - blk_start_plug(&plug); - - /* Then, try readahead for siblings of the desired node */ - end = start + MAX_RA_NODE; - end = min(end, NIDS_PER_BLOCK); - for (i = start + 1; i < end; i++) { - nid_t tnid; - - tnid = get_nid(parent, i, false); - ra_node_page(sbi, tnid); - } - - blk_finish_plug(&plug); + if (parent) + ra_node_pages(parent, start + 1); lock_page(page); + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); @@ -1154,6 +1123,19 @@ page_hit: return page; } +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + return __get_node_page(sbi, nid, NULL, 0); +} + +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + nid_t nid = get_nid(parent, start, false); + + return __get_node_page(sbi, nid, parent, start); +} + void sync_inode_page(struct dnode_of_data *dn) { if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { -- cgit v1.2.3-59-g8ed1b From 7612118ae8cdd36cbd74d873855d70252d2d49e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 Jan 2016 22:03:47 -0800 Subject: f2fs: check the page status filled from disk After reading a page, we need to check whether there is any error. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ac5bea0f5f09..77c3bbb9bee0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode, struct page *page; struct dnode_of_data dn; int err; -repeat: + page = f2fs_grab_cache_page(mapping, index, true); if (!page) { /* @@ -442,12 +442,11 @@ repeat: } else { f2fs_put_page(page, 1); - page = get_read_data_page(inode, index, READ_SYNC, true); + /* if ipage exists, blkaddr should be NEW_ADDR */ + f2fs_bug_on(F2FS_I_SB(inode), ipage); + page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - goto repeat; - - /* wait for read completion */ - lock_page(page); + return page; } got_it: if (new_i_size && i_size_read(inode) < -- cgit v1.2.3-59-g8ed1b From 12719ae14e57980ebf0a7baa63bc80494c76b192 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 13:23:12 -0800 Subject: f2fs: avoid unnecessary f2fs_balance_fs calls Only when node page is newly dirtied, it needs to check whether we need to do f2fs_gc. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/inode.c | 19 ++++++++++--------- fs/f2fs/node.c | 26 ++++++++++++++------------ fs/f2fs/node.h | 4 ++-- fs/f2fs/super.c | 2 -- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 77c3bbb9bee0..3cf86fda8138 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -225,8 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); - set_page_dirty(node_page); - dn->node_changed = true; + if (set_page_dirty(node_page)) + dn->node_changed = true; } int reserve_new_block(struct dnode_of_data *dn) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 882babaa678e..461b32923c14 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1674,8 +1674,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); int try_to_free_nats(struct f2fs_sb_info *, int); -void update_inode(struct inode *, struct page *); -void update_inode_page(struct inode *); +int update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); void f2fs_evict_inode(struct inode *); void handle_failed_inode(struct inode *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e95500802daa..cabc1ff108a1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -222,7 +222,7 @@ bad_inode: return ERR_PTR(ret); } -void update_inode(struct inode *inode, struct page *node_page) +int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; @@ -260,15 +260,16 @@ void update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - set_page_dirty(node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + + return set_page_dirty(node_page); } -void update_inode_page(struct inode *inode) +int update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; + int ret = 0; retry: node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { @@ -279,10 +280,11 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi); } - return; + return 0; } - update_inode(inode, node_page); + ret = update_inode(inode, node_page); f2fs_put_page(node_page, 1); + return ret; } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -300,9 +302,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - update_inode_page(inode); - - f2fs_balance_fs(sbi); + if (update_inode_page(inode)) + f2fs_balance_fs(sbi); return 0; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a2d800f4abc..c091b757bda6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -543,7 +543,6 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); - dn->node_changed = true; done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); @@ -679,8 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_dnode(&rdn); if (ret < 0) goto out_err; - set_nid(page, i, 0, false); - dn->node_changed = true; + if (set_nid(page, i, 0, false)) + dn->node_changed = true; } } else { child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; @@ -693,8 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, rdn.nid = child_nid; ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { - set_nid(page, i, 0, false); - dn->node_changed = true; + if (set_nid(page, i, 0, false)) + dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { goto out_err; @@ -755,8 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, err = truncate_dnode(dn); if (err < 0) goto fail; - set_nid(pages[idx], i, 0, false); - dn->node_changed = true; + if (set_nid(pages[idx], i, 0, false)) + dn->node_changed = true; } if (offset[idx + 1] == 0) { @@ -981,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn, fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); - set_page_dirty(page); + if (set_page_dirty(page)) + dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) F2FS_I(dn->inode)->i_xattr_nid = dn->nid; @@ -1138,18 +1138,20 @@ struct page *get_node_page_ra(struct page *parent, int start) void sync_inode_page(struct dnode_of_data *dn) { + int ret = 0; + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - update_inode(dn->inode, dn->node_page); + ret = update_inode(dn->inode, dn->node_page); } else if (dn->inode_page) { if (!dn->inode_page_locked) lock_page(dn->inode_page); - update_inode(dn->inode, dn->inode_page); + ret = update_inode(dn->inode, dn->inode_page); if (!dn->inode_page_locked) unlock_page(dn->inode_page); } else { - update_inode_page(dn->inode); + ret = update_inode_page(dn->inode); } - dn->node_changed = true; + dn->node_changed = ret ? true: false; } int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2de759a7746f..d4d1f636fe1c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page) return true; } -static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); @@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); - set_page_dirty(p); + return set_page_dirty(p); } static inline nid_t get_nid(struct page *p, int off, bool i) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0bbd756821a7..f5cc790646e2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -591,8 +591,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync) mutex_lock(&sbi->gc_mutex); err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); - } else { - f2fs_balance_fs(sbi); } f2fs_trace_ios(NULL, 1); -- cgit v1.2.3-59-g8ed1b From 2a4b8e9fab9cea45d90179d9ee8e718c5ed26457 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 13:52:34 -0800 Subject: f2fs: remove redundant calls This patch removes redundant calls. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e3d32f6b4b4f..69bc65fd862c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -267,8 +267,6 @@ sync_nodes: if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); - - f2fs_balance_fs(sbi); goto sync_nodes; } @@ -484,7 +482,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) F2FS_I(dn->inode)) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); - set_page_dirty(dn->node_page); sync_inode_page(dn); } dn->ofs_in_node = ofs; -- cgit v1.2.3-59-g8ed1b From 2c4db1a6f6b42e2a9fb611cbbeb71a3a9a358ee0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 14:15:04 -0800 Subject: f2fs: clean up f2fs_balance_fs This patch adds one parameter to clean up all the callers of f2fs_balance_fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 ++++++----------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 17 ++++++++--------- fs/f2fs/inline.c | 3 +-- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 22 +++++++++++----------- fs/f2fs/segment.c | 6 ++++-- fs/f2fs/xattr.c | 2 +- 8 files changed, 33 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3cf86fda8138..6fae75ddae6d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -546,8 +546,7 @@ static int __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); } return err; @@ -557,8 +556,7 @@ sync_out: f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); return err; } @@ -657,8 +655,7 @@ get_next: if (create) { f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); f2fs_lock_op(sbi); } @@ -718,8 +715,7 @@ put_out: unlock_out: if (create) { f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); } out: trace_f2fs_map_blocks(inode, map, err); @@ -1178,8 +1174,7 @@ out: if (err) ClearPageUptodate(page); unlock_page(page); - if (need_balance_fs) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, need_balance_fs); if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); @@ -1506,7 +1501,7 @@ repeat: if (need_balance && has_not_enough_free_secs(sbi, 0)) { unlock_page(page); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); lock_page(page); if (page->mapping != mapping) { /* The page got truncated from under us */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 461b32923c14..412865482a0b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1780,7 +1780,7 @@ void destroy_node_manager_caches(void); */ void register_inmem_page(struct inode *, struct page *); int commit_inmem_pages(struct inode *, bool); -void f2fs_balance_fs(struct f2fs_sb_info *); +void f2fs_balance_fs(struct f2fs_sb_info *, bool); void f2fs_balance_fs_bg(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *); int create_flush_cmd_control(struct f2fs_sb_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 69bc65fd862c..ff06827aa369 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -55,8 +55,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vma->vm_file); lock_page(page); @@ -677,7 +676,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) err = f2fs_truncate(inode, true); if (err) return err; - f2fs_balance_fs(F2FS_I_SB(inode)); + f2fs_balance_fs(F2FS_I_SB(inode), true); } else { /* * do not trim all blocks after i_size if target size is @@ -732,7 +731,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, if (!len) return 0; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); page = get_new_data_page(inode, NULL, index, false); @@ -818,7 +817,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT; blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT; @@ -921,7 +920,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) int ret = 0; for (; end < nrpages; start++, end++) { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); ret = __exchange_data_block(inode, end, start, true); f2fs_unlock_op(sbi); @@ -1103,7 +1102,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1155,7 +1154,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -1653,7 +1652,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, pg_start = range->start >> PAGE_CACHE_SHIFT; pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); mutex_lock(&inode->i_mutex); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5ffbd169b719..c3f0b7d4cfca 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -200,8 +200,7 @@ out: f2fs_put_page(page, 1); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cabc1ff108a1..2ac4b780e8b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -303,7 +303,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * during the urgent cleaning time when runing out of free sections. */ if (update_inode_page(inode)) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); return 0; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d61a6864ab1..53d6227f5581 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -140,7 +140,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !f2fs_is_child_context_consistent_with_parent(dir, inode)) return -EPERM; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -221,7 +221,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) return 0; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -302,7 +302,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -361,7 +361,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -452,7 +452,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); @@ -498,7 +498,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -539,7 +539,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -642,7 +642,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_whiteout; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -675,7 +675,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_inode); update_inode_page(new_inode); } else { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -816,7 +816,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_new_dir; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a3474bad5770..c7bbc915d962 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -213,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort) * inode becomes free by iget_locked in f2fs_iget. */ if (!abort) { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); } @@ -262,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort) * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. */ -void f2fs_balance_fs(struct f2fs_sb_info *sbi) +void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { + if (!need) + return; /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 862368a32e53..822a8af89c12 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -609,7 +609,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, if (ipage) return __f2fs_setxattr(inode, index, name, value, size, ipage, flags); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); /* protect xattr_ver */ -- cgit v1.2.3-59-g8ed1b From da5af127a1a17bac121c6889c88cc90f8a278a84 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:19:27 +0800 Subject: f2fs: recognize encrypted data in f2fs_fiemap This patch fixes to teach f2fs_fiemap to recognize encrypted data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fae75ddae6d..a3bce12b0cce 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -830,9 +830,13 @@ next: flags |= FIEMAP_EXTENT_LAST; } - if (size) + if (size) { + if (f2fs_encrypted_inode(inode)) + flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; + ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); + } if (start_blk > last_blk || ret) goto out; -- cgit v1.2.3-59-g8ed1b From 68e353851002dc07555b067a0baff1cc2f709c04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:22:52 +0800 Subject: f2fs: use atomic type for node count in extent tree 1. rename field in struct extent_tree from count to node_cnt for readability. 2. alter to use atomic type for node_cnt. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 17 +++++++++-------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 4dee2be9a648..9febbc622bf5 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, rb_link_node(&en->rb_node, parent, p); rb_insert_color(&en->rb_node, &et->root); - et->count++; + atomic_inc(&et->node_cnt); atomic_inc(&sbi->total_ext_node); return en; } @@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { rb_erase(&en->rb_node, &et->root); - et->count--; + atomic_dec(&et->node_cnt); atomic_dec(&sbi->total_ext_node); if (et->cached_en == en) @@ -69,7 +69,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); - et->count = 0; + atomic_set(&et->node_cnt, 0); atomic_inc(&sbi->total_ext_tree); } else { atomic_dec(&sbi->total_zombie_tree); @@ -133,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, { struct rb_node *node, *next; struct extent_node *en; - unsigned int count = et->count; + unsigned int count = atomic_read(&et->node_cnt); node = rb_first(&et->root); while (node) { @@ -154,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, node = next; } - return count - et->count; + return count - atomic_read(&et->node_cnt); } static void __drop_largest_extent(struct inode *inode, @@ -192,7 +192,7 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); write_lock(&et->lock); - if (et->count) + if (atomic_read(&et->node_cnt)) goto out; en = __init_extent_tree(sbi, et, &ei); @@ -660,7 +660,8 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (!et) return; - if (inode->i_nlink && !is_bad_inode(inode) && et->count) { + if (inode->i_nlink && !is_bad_inode(inode) && + atomic_read(&et->node_cnt)) { down_write(&sbi->extent_tree_lock); list_add_tail(&et->list, &sbi->zombie_list); atomic_inc(&sbi->total_zombie_tree); @@ -673,7 +674,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* delete extent tree entry in radix tree */ down_write(&sbi->extent_tree_lock); - f2fs_bug_on(sbi, et->count); + f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 412865482a0b..ae0007df6c2c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -361,7 +361,7 @@ struct extent_tree { struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ - unsigned int count; /* # of extent node in rb-tree*/ + atomic_t node_cnt; /* # of extent node in rb-tree*/ }; /* -- cgit v1.2.3-59-g8ed1b From 9b72a388f5867f4a31113a41d24bbf1026611d7b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:24:00 +0800 Subject: f2fs: skip releasing nodes in chindless extent tree If there are no nodes in extent tree, let's skip releasing step to avoid any overhead of grabbing/releasing extent tree lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 9febbc622bf5..ccd5c636d3fe 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -570,9 +570,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) /* 1. remove unreferenced extent tree */ list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, true); - write_unlock(&et->lock); + if (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); + } list_del_init(&et->list); radix_tree_delete(&sbi->extent_tree_root, et->ino); @@ -618,6 +620,9 @@ free_node: for (i = 0; i < found; i++) { struct extent_tree *et = treevec[i]; + if (!atomic_read(&et->node_cnt)) + continue; + if (write_trylock(&et->lock)) { node_cnt += __free_extent_tree(sbi, et, false); write_unlock(&et->lock); @@ -641,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) struct extent_tree *et = F2FS_I(inode)->extent_tree; unsigned int node_cnt = 0; - if (!et) + if (!et || !atomic_read(&et->node_cnt)) return 0; write_lock(&et->lock); -- cgit v1.2.3-59-g8ed1b From 6beceb5427aa8731f958d2484e0fd8ff21d604dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jan 2016 15:51:50 -0800 Subject: f2fs: introduce time and interval facility This patch adds time and interval arrays to store some timing variables. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 21 ++++++++++++++++++++- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 7 +++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5dbafd5e83d9..3842af954cd5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1139,7 +1139,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ - sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); + f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae0007df6c2c..5bbb6a407e79 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -721,6 +721,11 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ }; +enum { + CP_TIME, + MAX_TIME, +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -747,7 +752,8 @@ struct f2fs_sb_info { struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; - long cp_expires, cp_interval; /* next expected periodic cp */ + unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ + long interval_time[MAX_TIME]; /* to store thresholds */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -837,6 +843,19 @@ struct f2fs_sb_info { unsigned int shrinker_run_no; }; +static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) +{ + sbi->last_time[type] = jiffies; +} + +static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) +{ + struct timespec ts = {sbi->interval_time[type], 0}; + unsigned long interval = timespec_to_jiffies(&ts); + + return time_after(jiffies, sbi->last_time[type] + interval); +} + /* * Inline functions */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c7bbc915d962..fed23d5a7b34 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -293,7 +293,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - jiffies > sbi->cp_expires) { + f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f5cc790646e2..787047f59c00 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -218,7 +218,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1122,7 +1122,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; - sbi->cp_interval = DEF_CP_INTERVAL; + sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); INIT_LIST_HEAD(&sbi->s_list); @@ -1467,8 +1467,7 @@ try_onemore: f2fs_commit_super(sbi, true); } - sbi->cp_expires = round_jiffies_up(jiffies); - + f2fs_update_time(sbi, CP_TIME); return 0; free_kobj: -- cgit v1.2.3-59-g8ed1b From d0239e1bf5204d602281f93c01d46bcf3531098d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jan 2016 16:57:48 -0800 Subject: f2fs: detect idle time depending on user behavior This patch adds last time that user requested filesystem operations. This information is used to detect whether system is idle or not later. Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/data.c | 1 + fs/f2fs/dir.c | 4 ++++ fs/f2fs/f2fs.h | 15 +++++++++++++++ fs/f2fs/file.c | 12 ++++++++++++ fs/f2fs/gc.c | 1 - fs/f2fs/gc.h | 8 -------- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 4 ++++ fs/f2fs/xattr.c | 1 + 10 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 0345f2d1c727..e5200f354abf 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" Description: Controls the checkpoint timing. +What: /sys/fs/f2fs//idle_interval +Date: January 2016 +Contact: "Jaegeuk Kim" +Description: + Controls the idle timing. + What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a3bce12b0cce..ac9e7c6aac74 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1596,6 +1596,7 @@ static int f2fs_write_end(struct file *file, } f2fs_put_page(page, 1); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 29bb8dd76a46..faa7495e2d7e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -636,6 +636,7 @@ fail: f2fs_put_page(dentry_page, 1); out: f2fs_fname_free_filename(&fname); + f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; } @@ -657,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); fail: up_write(&F2FS_I(inode)->i_sem); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } @@ -701,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); + if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5bbb6a407e79..4331b9fe6f27 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -126,6 +127,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define DEF_CP_INTERVAL 60 /* 60 secs */ +#define DEF_IDLE_INTERVAL 120 /* 2 mins */ struct cp_control { int reason; @@ -723,6 +725,7 @@ enum { enum { CP_TIME, + REQ_TIME, MAX_TIME, }; @@ -856,6 +859,18 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } +static inline bool is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->root_rl; + + if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) + return 0; + + return f2fs_time_over(sbi, REQ_TIME); +} + /* * Inline functions */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff06827aa369..3d43857e9892 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -96,6 +96,7 @@ mapped: clear_cold_data(page); out: sb_end_pagefault(inode->i_sb); + f2fs_update_time(sbi, REQ_TIME); return block_page_mkwrite_return(err); } @@ -280,6 +281,7 @@ flush_out: remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); + f2fs_update_time(sbi, REQ_TIME); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); f2fs_trace_ios(NULL, 1); @@ -485,6 +487,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) } dn->ofs_in_node = ofs; + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); return nr_free; @@ -1236,6 +1239,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } out: @@ -1351,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) return ret; set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; } @@ -1398,6 +1404,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return ret; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return 0; } @@ -1439,6 +1446,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) } mnt_drop_write_file(filp); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return ret; } @@ -1478,6 +1486,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) default: return -EINVAL; } + f2fs_update_time(sbi, REQ_TIME); return 0; } @@ -1508,6 +1517,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return 0; } @@ -1531,6 +1541,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) sizeof(policy))) return -EFAULT; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return f2fs_process_policy(&policy, inode); #else return -EOPNOTSUPP; @@ -1807,6 +1818,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) } err = f2fs_defragment_range(sbi, filp, &range); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) goto out; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c09be339569c..f610c2a9bdde 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index b4a65be9f7d3..a993967dcdb9 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) return true; return false; } - -static inline int is_idle(struct f2fs_sb_info *sbi) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; - return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); -} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fed23d5a7b34..d8ad1abfa4fd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -293,7 +293,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - f2fs_time_over(sbi, CP_TIME)) { + (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 787047f59c00..3bf990b80026 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -219,6 +219,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -237,6 +238,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(cp_interval), + ATTR_LIST(idle_interval), NULL, }; @@ -1123,6 +1125,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; + sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); INIT_LIST_HEAD(&sbi->s_list); @@ -1468,6 +1471,7 @@ try_onemore: } f2fs_update_time(sbi, CP_TIME); + f2fs_update_time(sbi, REQ_TIME); return 0; free_kobj: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 822a8af89c12..0108f487cc8e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -618,5 +618,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); + f2fs_update_time(sbi, REQ_TIME); return err; } -- cgit v1.2.3-59-g8ed1b From 42190d2a8663f3e181894dc4e37a1af06aab2cbb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 13:45:17 -0800 Subject: f2fs: monitor the number of background checkpoint This patch adds to show the number of background checkpoint. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 ++- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/segment.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b73e8e133c8b..48f2ae9452ef 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v) si->dirty_count); seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); - seq_printf(s, "CP calls: %d\n", si->cp_count); + seq_printf(s, "CP calls: %d (BG: %d)\n", + si->cp_count, si->bg_cp_count); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4331b9fe6f27..2c0e478cefb4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1937,7 +1937,7 @@ struct f2fs_stat_info { int util_free, util_valid, util_invalid; int rsvd_segs, overp_segs; int dirty_count, node_pages, meta_pages; - int prefree_count, call_count, cp_count; + int prefree_count, call_count, cp_count, bg_cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; @@ -1958,6 +1958,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) } #define stat_inc_cp_count(si) ((si)->cp_count++) +#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -2040,6 +2041,7 @@ int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) +#define stat_inc_bg_cp_count(si) #define stat_inc_call_count(si) #define stat_inc_bggc_count(si) #define stat_inc_dirty_inode(sbi, type) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8ad1abfa4fd..5904a411c86f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -297,6 +297,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); } } -- cgit v1.2.3-59-g8ed1b From 1663cae48ce3ce991c0e3f1a6fbdbd57f3dce9af Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 16:14:08 -0800 Subject: f2fs: fix wrong memory condition check This patch fixes wrong decision for avaliable_free_memory. The return valus is already set as false, so we should consider true condition below only. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c091b757bda6..342597a5897f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -71,8 +71,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else { - if (sbi->sb->s_bdi->wb.dirty_exceeded) - return false; + if (!sbi->sb->s_bdi->wb.dirty_exceeded) + return true; } return res; } -- cgit v1.2.3-59-g8ed1b From 447135a86659c646017b8e707c1243c186bf2dff Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 17:08:38 -0800 Subject: f2fs: should unset atomic flag after successful commit If there is an error during commit, we should keep the flag in order to abort it. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3d43857e9892..18ddb1e5182a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1378,8 +1378,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (f2fs_is_atomic_file(inode)) { clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); ret = commit_inmem_pages(inode, false); - if (ret) + if (ret) { + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); goto err_out; + } } ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); -- cgit v1.2.3-59-g8ed1b