From 31867b23d7d1ee3535136c6a410a6cf56f666bfc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Dec 2018 11:00:38 -0800 Subject: f2fs: wait on atomic writes to count F2FS_CP_WB_DATA Otherwise, we can get wrong counts incurring checkpoint hang. IO_W (CP: -24, Data: 24, Flush: ( 0 0 1), Discard: ( 0 0)) Thread A Thread B - f2fs_write_data_pages - __write_data_page - f2fs_submit_page_write - inc_page_count(F2FS_WB_DATA) type is F2FS_WB_DATA due to file is non-atomic one - f2fs_ioc_start_atomic_write - set_inode_flag(FI_ATOMIC_FILE) - f2fs_write_end_io - dec_page_count(F2FS_WB_CP_DATA) type is F2FS_WB_DATA due to file becomes atomic one Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bba56b39dcc5..ae2b45e75847 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1750,10 +1750,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto skip_flush; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); @@ -1761,7 +1763,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -skip_flush: + set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); -- cgit v1.2.3-59-g8ed1b From 7c77bf7de1574ac7a31a2b76f4927404307d13e7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 1 Jan 2019 00:11:30 -0800 Subject: f2fs: don't access node/meta inode mapping after iput This fixes wrong access of address spaces of node and meta inodes after iput. Fixes: 60aa4d5536ab ("f2fs: fix use-after-free issue when accessing sbi->stat_info") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 19 ++++++++++++------- fs/f2fs/super.c | 5 +++++ 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ebcc121920ba..503fde8349e6 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -96,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = NODE_MAPPING(sbi)->nrpages; - si->meta_pages = META_MAPPING(sbi)->nrpages; + if (sbi->node_inode) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); @@ -175,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; int i; if (si->base_mem) @@ -258,10 +259,14 @@ get_cache: sizeof(struct extent_node); si->page_mem = 0; - npages = NODE_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + if (sbi->node_inode) { + unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } + if (sbi->meta_inode) { + unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c46a1d4318d4..14f033e1ab42 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1075,7 +1075,10 @@ static void f2fs_put_super(struct super_block *sb) f2fs_bug_on(sbi, sbi->fsync_node_num); iput(sbi->node_inode); + sbi->node_inode = NULL; + iput(sbi->meta_inode); + sbi->meta_inode = NULL; /* * iput() can update stat information, if f2fs_write_checkpoint() @@ -3410,6 +3413,7 @@ free_node_inode: f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); + sbi->node_inode = NULL; free_stats: f2fs_destroy_stats(sbi); free_nm: @@ -3422,6 +3426,7 @@ free_devices: free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); + sbi->meta_inode = NULL; free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: -- cgit v1.2.3-59-g8ed1b From f365c6cc85b1d2348d73bf327258874fcc7ac161 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 1 Jan 2019 21:33:11 +0800 Subject: f2fs: change error code to -ENOMEM from -EINVAL The error case of failing allocating memory should return -ENOMEM. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 14f033e1ab42..bc32a1035f65 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -269,7 +269,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, if (!qname) { f2fs_msg(sb, KERN_ERR, "Not enough memory for storing quotafile name"); - return -EINVAL; + return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) -- cgit v1.2.3-59-g8ed1b From 8e11403876b1e29d290ea42bb20bf45ef927ef6e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 4 Jan 2019 01:38:29 +0000 Subject: f2fs: remove set but not used variable 'err' Fixes gcc '-Wunused-but-set-variable' warning: fs/f2fs/data.c: In function 'f2fs_dio_submit_bio': fs/f2fs/data.c:2585:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] Signed-off-by: YueHaibing Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f91d8630c9a2..4e402add2e60 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2582,14 +2582,11 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, { struct f2fs_private_dio *dio; bool write = (bio_op(bio) == REQ_OP_WRITE); - int err; dio = f2fs_kzalloc(F2FS_I_SB(inode), sizeof(struct f2fs_private_dio), GFP_NOFS); - if (!dio) { - err = -ENOMEM; + if (!dio) goto out; - } dio->inode = inode; dio->orig_end_io = bio->bi_end_io; -- cgit v1.2.3-59-g8ed1b From 36c5733f9570e9052a6437012be8034085f25ab7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Jan 2019 17:39:53 +0800 Subject: f2fs: check inject_rate validity during configuring Type of inject_rate is unsigned int, let's check new value's validity during configuring. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0575edbe3ed6..02d4012a9183 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -222,6 +222,8 @@ out: #ifdef CONFIG_F2FS_FAULT_INJECTION if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) return -EINVAL; + if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) + return -EINVAL; #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); -- cgit v1.2.3-59-g8ed1b From 5d539245cb18afa8943cbf2be0cdc033e49f3a4a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Jan 2019 17:19:08 -0800 Subject: f2fs: export FS_NOCOW_FL flag to user This exports pin_file status to user. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 12fabd6735dd..9286ec381453 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2301,11 +2301,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ +#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */ #define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */ +#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */ #define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ /* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ae2b45e75847..0d461321edfc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1651,6 +1651,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) flags |= F2FS_ENCRYPT_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) flags |= F2FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + flags |= F2FS_NOCOW_FL; flags &= F2FS_FL_USER_VISIBLE; -- cgit v1.2.3-59-g8ed1b From c20e57b32d817d35271c4b205cb6ab80d8d93aeb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Jan 2019 14:26:18 +0100 Subject: f2fs: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Jaegeuk Kim Cc: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Greg Kroah-Hartman Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 20 +++----------------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/super.c | 5 +---- 3 files changed, 6 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 503fde8349e6..99e9a5c37b71 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -511,30 +511,16 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) kvfree(si); } -int __init f2fs_create_root_stats(void) +void __init f2fs_create_root_stats(void) { - struct dentry *file; - f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); - if (!f2fs_debugfs_root) - return -ENOMEM; - file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, - NULL, &stat_fops); - if (!file) { - debugfs_remove(f2fs_debugfs_root); - f2fs_debugfs_root = NULL; - return -ENOMEM; - } - - return 0; + debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, + &stat_fops); } void f2fs_destroy_root_stats(void) { - if (!f2fs_debugfs_root) - return; - debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9286ec381453..7df41cd1eb35 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3329,7 +3329,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); -int __init f2fs_create_root_stats(void); +void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) do { } while (0) @@ -3367,7 +3367,7 @@ void f2fs_destroy_root_stats(void); static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline int __init f2fs_create_root_stats(void) { return 0; } +static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc32a1035f65..ea514acede36 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3550,9 +3550,7 @@ static int __init init_f2fs_fs(void) err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; - err = f2fs_create_root_stats(); - if (err) - goto free_filesystem; + f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; @@ -3560,7 +3558,6 @@ static int __init init_f2fs_fs(void) free_root_stats: f2fs_destroy_root_stats(); -free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); -- cgit v1.2.3-59-g8ed1b From ddf06b753a8573eb0323d91efeaffe397f7b673d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Jan 2019 10:21:24 +0800 Subject: f2fs: fix to trigger fsck if dirent.name_len is zero While traversing dirents in f2fs_fill_dentries(), if bitmap is valid, filename length should not be zero, otherwise, directory structure consistency could be corrupted, in this case, let's print related info and set SBI_NEED_FSCK to trigger fsck for repairing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 50d0d36280fa..926166528cd4 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -800,6 +800,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (de->name_len == 0) { bit_pos++; ctx->pos = start_pos + bit_pos; + printk_ratelimited( + "%s, invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, le32_to_cpu(de->ino)); + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } -- cgit v1.2.3-59-g8ed1b From 720db068634c91553a8e1d9a0fcd8c7050e06d2b Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 7 Jan 2019 15:02:34 +0800 Subject: f2fs: check if file namelen exceeds max value Dentry bitmap is not enough to detect incorrect dentries. So this patch also checks the namelen value of a dentry. Signed-off-by: Gong Chen Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 926166528cd4..ba7535399d95 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -814,7 +814,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, /* check memory boundary before moving forward */ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - if (unlikely(bit_pos > d->max)) { + if (unlikely(bit_pos > d->max || + le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { f2fs_msg(sbi->sb, KERN_WARNING, "%s: corrupted namelen=%d, run fsck to fix.", __func__, le16_to_cpu(de->name_len)); -- cgit v1.2.3-59-g8ed1b From 2f84babfe5eb270e13d54afebc26b6d19f49cba2 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 14 Jan 2019 22:05:14 +0800 Subject: f2fs: add brackets for macros Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7df41cd1eb35..f71b44f21ffb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -254,7 +254,7 @@ struct discard_entry { /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ - (MAX_PLIST_NUM - 1) : (blk_num - 1)) + (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) enum { D_PREP, /* initial */ @@ -2763,9 +2763,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode) #define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) #define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ - ((offsetof(typeof(*f2fs_inode), field) + \ + ((offsetof(typeof(*(f2fs_inode)), field) + \ sizeof((f2fs_inode)->field)) \ - <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { @@ -2794,8 +2794,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) -#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ - (!is_read_io(fio->op) || fio->is_meta)) +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \ + (!is_read_io((fio)->op) || (fio)->is_meta)) bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -- cgit v1.2.3-59-g8ed1b From ac92985864e187a1735502f6a02f54eaa655b2aa Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 15 Jan 2019 20:02:15 +0000 Subject: f2fs: UBSAN: set boolean value iostat_enable correctly When setting /sys/fs/f2fs//iostat_enable with non-bool value, UBSAN reports the following warning. [ 7562.295484] ================================================================================ [ 7562.296531] UBSAN: Undefined behaviour in fs/f2fs/f2fs.h:2776:10 [ 7562.297651] load of value 64 is not a valid value for type '_Bool' [ 7562.298642] CPU: 1 PID: 7487 Comm: dd Not tainted 4.20.0-rc4+ #79 [ 7562.298653] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 7562.298662] Call Trace: [ 7562.298760] dump_stack+0x46/0x5b [ 7562.298811] ubsan_epilogue+0x9/0x40 [ 7562.298830] __ubsan_handle_load_invalid_value+0x72/0x90 [ 7562.298863] f2fs_file_write_iter+0x29f/0x3f0 [ 7562.298905] __vfs_write+0x115/0x160 [ 7562.298922] vfs_write+0xa7/0x190 [ 7562.298934] ksys_write+0x50/0xc0 [ 7562.298973] do_syscall_64+0x4a/0xe0 [ 7562.298992] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 7562.299001] RIP: 0033:0x7fa45ec19c00 [ 7562.299004] Code: 73 01 c3 48 8b 0d 88 92 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d dd eb 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ce 8f 01 00 48 89 04 24 [ 7562.299044] RSP: 002b:00007ffca52b49e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 7562.299052] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa45ec19c00 [ 7562.299059] RDX: 0000000000000400 RSI: 000000000093f000 RDI: 0000000000000001 [ 7562.299065] RBP: 000000000093f000 R08: 0000000000000004 R09: 0000000000000000 [ 7562.299071] R10: 00007ffca52b47b0 R11: 0000000000000246 R12: 0000000000000400 [ 7562.299077] R13: 000000000093f000 R14: 000000000093f400 R15: 0000000000000000 [ 7562.299091] ================================================================================ So, if iostat_enable is enabled, set its value as true. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 02d4012a9183..5b83e1d66cb3 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -280,10 +280,16 @@ out: return count; } - *ui = t; - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) - f2fs_reset_iostat(sbi); + if (!strcmp(a->attr.name, "iostat_enable")) { + sbi->iostat_enable = !!t; + if (!sbi->iostat_enable) + f2fs_reset_iostat(sbi); + return count; + } + + *ui = (unsigned int)t; + return count; } -- cgit v1.2.3-59-g8ed1b From f9aa52a8cbe09fe25244d59c29660bbe635df613 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Jan 2019 09:51:28 +0800 Subject: f2fs: fix to initialize variable to avoid UBSAN/smatch warning As Dan Carpenter as below: The patch df634f444ee9: "f2fs: use rb_*_cached friends" from Oct 4, 2018, leads to the following static checker warning: fs/f2fs/extent_cache.c:606 f2fs_update_extent_tree_range() error: uninitialized symbol 'leftmost'. And also Eric Biggers, and Kyungtae Kim reported, there is an UBSAN warning described as below: We report a bug in linux-4.20.2: "UBSAN: Undefined behaviour in fs/f2fs/extent_cache.c" kernel config: https://kt0755.github.io/etc/config_v4.20_stable repro: https://kt0755.github.io/etc/repro.4a3e7.c (f2fs is mounted on /mnt/f2fs/) This arose in f2fs_update_extent_tree_range (fs/f2fs/extent_cache.c:605). It seems that, for some reason, its last argument became "24" although that was supposed to be bool type. ========================================= UBSAN: Undefined behaviour in fs/f2fs/extent_cache.c:605:4 load of value 24 is not a valid value for type '_Bool' CPU: 0 PID: 6774 Comm: syz-executor5 Not tainted 4.20.2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xb1/0x118 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_load_invalid_value+0x17a/0x1be lib/ubsan.c:457 f2fs_update_extent_tree_range+0x1d4a/0x1d50 fs/f2fs/extent_cache.c:605 f2fs_update_extent_cache+0x2b6/0x350 fs/f2fs/extent_cache.c:804 f2fs_update_data_blkaddr+0x61/0x70 fs/f2fs/data.c:656 f2fs_outplace_write_data+0x1d6/0x4b0 fs/f2fs/segment.c:3140 f2fs_convert_inline_page+0x86d/0x2060 fs/f2fs/inline.c:163 f2fs_convert_inline_inode+0x6b5/0xad0 fs/f2fs/inline.c:208 f2fs_preallocate_blocks+0x78b/0xb00 fs/f2fs/data.c:982 f2fs_file_write_iter+0x31b/0xf40 fs/f2fs/file.c:3062 call_write_iter include/linux/fs.h:1857 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x538/0x6e0 fs/read_write.c:487 vfs_write+0x1b3/0x520 fs/read_write.c:549 ksys_write+0xde/0x1c0 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x7e/0xc0 fs/read_write.c:607 do_syscall_64+0xbe/0x4f0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4497b9 Code: e8 8c 9f 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 9b 6b fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f1ea15edc68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f1ea15ee6cc RCX: 00000000004497b9 RDX: 0000000000001000 RSI: 0000000020000140 RDI: 0000000000000013 RBP: 000000000071bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000bb50 R14: 00000000006f4bf0 R15: 00007f1ea15ee700 ========================================= As I checked, this uninitialized variable won't cause extent cache corruption, but in order to avoid such kind of warning of both UBSAN and smatch, fix to initialize related variable. Reported-by: Dan Carpenter Reported-by: Eric Biggers Reported-by: Kyungtae Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 1cb0fcc67d2d..caf77fe8ac07 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -506,7 +506,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; bool updated = false; - bool leftmost; + bool leftmost = false; if (!et) return; -- cgit v1.2.3-59-g8ed1b From 2010987365ab3b381df20bb6f175b59551cfe67d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 10 Jan 2019 16:40:12 +0800 Subject: f2fs: fix to set sbi dirty correctly In order to record direct IO count, we add two additional type in enum count_type: F2FS_DIO_{WRITE,READ}, but those IO won't dirty filesystem metadata, so we don't need to set filesystem dirty in inc_page_count(), fix it. Fixes: 02b16d0a34a1 ("f2fs: add to account direct IO") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f71b44f21ffb..0f564883e078 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1799,13 +1799,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || - count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || - count_type == F2FS_RD_META) - return; - - set_sbi_flag(sbi, SBI_IS_DIRTY); + if (count_type == F2FS_DIRTY_DENTS || + count_type == F2FS_DIRTY_NODES || + count_type == F2FS_DIRTY_META || + count_type == F2FS_DIRTY_QDATA || + count_type == F2FS_DIRTY_IMETA) + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) -- cgit v1.2.3-59-g8ed1b From 03f2c02d8be76d2080c174cd65d2dfcbbf783708 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 14 Jan 2019 10:42:11 -0800 Subject: f2fs: run discard jobs when put_super When we umount f2fs, we need to avoid long delay due to discard commands, which is actually taking tens of seconds, if storage is very slow on UNMAP. So, this patch introduces timeout-based work on it. By default, let me give 5 seconds for discard. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++ fs/f2fs/f2fs.h | 5 ++++- fs/f2fs/segment.c | 11 ++++++++++- fs/f2fs/super.c | 4 +++- fs/f2fs/sysfs.c | 3 +++ 5 files changed, 27 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a7ce33199457..91822ce25831 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -86,6 +86,13 @@ Description: The unit size is one block, now only support configuring in range of [1, 512]. +What: /sys/fs/f2fs//umount_discard_timeout +Date: January 2019 +Contact: "Jaegeuk Kim" +Description: + Set timeout to issue discard commands during umount. + Default: 5 secs + What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0f564883e078..6b6ec5600089 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -191,6 +191,7 @@ enum { #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ struct cp_control { int reason; @@ -310,6 +311,7 @@ struct discard_policy { bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ unsigned int granularity; /* discard granularity */ + int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { @@ -1110,6 +1112,7 @@ enum { DISCARD_TIME, GC_TIME, DISABLE_TIME, + UMOUNT_DISCARD_TIMEOUT, MAX_TIME, }; @@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9b79056d705d..5b2b9be6f28d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->timeout = 0; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, int i, issued = 0; bool io_interrupted = false; + if (dpolicy->timeout != 0) + f2fs_update_time(sbi, dpolicy->timeout); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (i + 1 < dpolicy->granularity) break; @@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super */ -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; @@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ea514acede36..24efd76ca151 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1048,7 +1048,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - dropped = f2fs_wait_discard_bios(sbi); + dropped = f2fs_issue_discard_timeout(sbi); if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && !sbi->discard_blks && !dropped) { @@ -2706,6 +2706,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = + DEF_UMOUNT_DISCARD_TIMEOUT; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5b83e1d66cb3..18c627e8fc90 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, interval_time[DISCARD_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), ATTR_LIST(gc_idle_interval), + ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), -- cgit v1.2.3-59-g8ed1b From db610a640eeeb268c36a4558414f28e1c269433e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Jan 2019 17:48:38 -0800 Subject: f2fs: add quick mode of checkpoint=disable for QA This mode returns mount() quickly with EAGAIN. We can trigger this by shutdown(F2FS_GOING_DOWN_NEED_FSCK). Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 6 +++--- fs/f2fs/segment.c | 3 +++ fs/f2fs/super.c | 5 +++++ include/linux/f2fs_fs.h | 1 + 6 files changed, 19 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f955cd3e0677..622dca707752 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1259,6 +1259,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK)) + __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); else diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6b6ec5600089..fe95abb05d40 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -191,6 +191,7 @@ enum { #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ #define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ struct cp_control { @@ -1101,6 +1102,7 @@ enum { SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */ SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0d461321edfc..fe6f92fbba38 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1972,11 +1972,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) break; case F2FS_GOING_DOWN_NEED_FSCK: set_sbi_flag(sbi, SBI_NEED_FSCK); + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + set_sbi_flag(sbi, SBI_IS_DIRTY); /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); - if (ret) - goto out; - break; + goto out; default: ret = -EINVAL; goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5b2b9be6f28d..342b720fb4db 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -868,6 +868,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) if (holes[DATA] > ovp || holes[NODE] > ovp) return -EAGAIN; + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && + dirty_segments(sbi) > overprovision_segments(sbi)) + return -EAGAIN; return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 24efd76ca151..5e1f8573a17f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3205,6 +3205,10 @@ try_onemore: if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) { + set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; + } /* Initialize device list */ err = f2fs_scan_devices(sbi); @@ -3392,6 +3396,7 @@ skip_recovery: cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); + clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); return 0; free_meta: diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d7711048ef93..d6befe1f9dc7 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 -- cgit v1.2.3-59-g8ed1b From b86232536c3ec068b0960ffc62c61a872412b2b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jan 2019 09:12:13 -0800 Subject: f2fs: try to keep CP_TRIMMED_FLAG after successful umount If every discard were issued successfully, we can avoid further discard. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 342b720fb4db..fdd8cd21522f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1063,6 +1063,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_UMOUNT) { dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; + /* we need to issue all to keep CP_TRIMMED_FLAG */ + dpolicy->granularity = 1; } } -- cgit v1.2.3-59-g8ed1b From b460866d27089ffaf915a5a3340b4d8c9f079d33 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jan 2019 10:26:39 -0800 Subject: f2fs: don't wake up too frequently, if there is lots of IOs Otherwise, it wakes up discard thread which will sleep again by busy IOs in a loop. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index a77f76f528b6..5c7ed0442d6e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -865,7 +865,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) } } mutex_unlock(&dcc->cmd_lock); - if (!wakeup) + if (!wakeup || !is_idle(sbi, DISCARD_TIME)) return; wake_up: dcc->discard_wake = 1; -- cgit v1.2.3-59-g8ed1b From 11ac8ef8d8c575409dcce3bcf1dda3c9f8c7f5e9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jan 2019 12:05:25 -0800 Subject: f2fs: avoid null pointer exception in dcc_info If dcc_info is not set yet, we can get null pointer panic. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fe95abb05d40..8c928cd72b61 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2161,10 +2161,17 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || get_pages(sbi, F2FS_DIO_READ) || - get_pages(sbi, F2FS_DIO_WRITE) || - atomic_read(&SM_I(sbi)->dcc_info->queued_discard) || - atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + get_pages(sbi, F2FS_DIO_WRITE)) return false; + + if (SM_I(sbi) && SM_I(sbi)->dcc_info && + atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) + return false; + + if (SM_I(sbi) && SM_I(sbi)->fcc_info && + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return false; + return f2fs_time_over(sbi, type); } -- cgit v1.2.3-59-g8ed1b From 0e0667b625cf64243df83171bff61f9d350b9ca5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 27 Jan 2019 17:59:53 -0800 Subject: f2fs: flush quota blocks after turnning it off After quota_off, we'll get some dirty blocks. If put_super don't have a chance to flush them by checkpoint, it causes NULL pointer exception in end_io after iput(node_inode). (e.g., by checkpoint=disable) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5e1f8573a17f..7694cb350734 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2026,6 +2026,12 @@ void f2fs_quota_off_umount(struct super_block *sb) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } + /* + * In case of checkpoint=disable, we must flush quota blocks. + * This can cause NULL exception for node_inode in end_io, since + * put_super already dropped it. + */ + sync_filesystem(sb); } static void f2fs_truncate_quota_inode_pages(struct super_block *sb) -- cgit v1.2.3-59-g8ed1b From 812a95977fd2f0d1f220c716a98a7f22e22f488d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Jan 2019 14:04:33 -0800 Subject: f2fs: sync filesystem after roll-forward recovery Some works after roll-forward recovery can get an error which will release all the data structures. Let's flush them in order to make it clean. One possible corruption came from: [ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null) [ 90.675349] Call trace: [ 90.677869] __list_del_entry_valid+0x94/0xb4 [ 90.682351] remove_dirty_inode+0xac/0x114 [ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8 [ 90.691302] f2fs_write_data_pages+0x40/0x4c [ 90.695695] do_writepages+0x80/0xf0 [ 90.699372] __writeback_single_inode+0xdc/0x4ac [ 90.704113] writeback_sb_inodes+0x280/0x440 [ 90.708501] wb_writeback+0x1b8/0x3d0 [ 90.712267] wb_workfn+0x1a8/0x4d4 [ 90.715765] process_one_work+0x1c0/0x3d4 [ 90.719883] worker_thread+0x224/0x344 [ 90.723739] kthread+0x120/0x130 [ 90.727055] ret_from_fork+0x10/0x18 Reported-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- fs/f2fs/node.c | 4 +++- fs/f2fs/super.c | 44 +++++++++++++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 622dca707752..03fea4efd64b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* collect a number of dirty meta pages and write together */ - if (wbc->for_kupdate || - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_META) < + nr_pages_to_skip(sbi, META)) goto skip_write; /* if locked failed, cp will flush dirty pages instead */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4f450e573312..f6ff84e29749 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, f2fs_balance_fs_bg(sbi); /* collect a number of dirty node pages and write together */ - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_NODES) < + nr_pages_to_skip(sbi, NODE)) goto skip_write; if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7694cb350734..384d1c248857 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1458,9 +1458,16 @@ static int f2fs_enable_quotas(struct super_block *sb); static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { + unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; - int err; + int err = 0; + int ret; + if (s_flags & SB_RDONLY) { + f2fs_msg(sbi->sb, KERN_ERR, + "checkpoint=disable on readonly fs"); + return -EINVAL; + } sbi->sb->s_flags |= SB_ACTIVE; f2fs_update_time(sbi, DISABLE_TIME); @@ -1468,18 +1475,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) while (!f2fs_time_over(sbi, DISABLE_TIME)) { mutex_lock(&sbi->gc_mutex); err = f2fs_gc(sbi, true, false, NULL_SEGNO); - if (err == -ENODATA) + if (err == -ENODATA) { + err = 0; break; + } if (err && err != -EAGAIN) - return err; + break; } - err = sync_filesystem(sbi->sb); - if (err) - return err; + ret = sync_filesystem(sbi->sb); + if (ret || err) { + err = ret ? ret: err; + goto restore_flag; + } - if (f2fs_disable_cp_again(sbi)) - return -EAGAIN; + if (f2fs_disable_cp_again(sbi)) { + err = -EAGAIN; + goto restore_flag; + } mutex_lock(&sbi->gc_mutex); cpc.reason = CP_PAUSE; @@ -1488,7 +1501,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->unusable_block_count = 0; mutex_unlock(&sbi->gc_mutex); - return 0; +restore_flag: + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return err; } static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) @@ -3369,7 +3384,7 @@ skip_recovery: if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) - goto free_meta; + goto sync_free_meta; } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { f2fs_enable_checkpoint(sbi); } @@ -3382,7 +3397,7 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) - goto free_meta; + goto sync_free_meta; } kvfree(options); @@ -3405,6 +3420,11 @@ skip_recovery: clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); return 0; +sync_free_meta: + /* safe to flush all the data */ + sync_filesystem(sbi->sb); + retry = false; + free_meta: #ifdef CONFIG_QUOTA f2fs_truncate_quota_inode_pages(sb); @@ -3418,6 +3438,8 @@ free_meta: * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); + /* evict some inodes being cached by GC */ + evict_inodes(sb); f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); -- cgit v1.2.3-59-g8ed1b From eecfa42716e63a99c4d1053b137aa10a3b94c0da Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 25 Jan 2019 20:11:39 +0800 Subject: f2fs: use xattr_prefix to wrap up Let's use xattr_prefix instead of open code. No logic changes. Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 18d5ffbc5e8c..fa620d31ea5f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -538,7 +538,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (!handler || (handler->list && !handler->list(dentry))) continue; - prefix = handler->prefix ?: handler->name; + prefix = xattr_prefix(handler); prefix_len = strlen(prefix); size = prefix_len + entry->e_name_len + 1; if (buffer) { -- cgit v1.2.3-59-g8ed1b From a0770e13c8da83bdb64738c0209ab02dd3cfff8b Mon Sep 17 00:00:00 2001 From: zhengliang Date: Mon, 4 Mar 2019 09:32:25 +0800 Subject: f2fs: fix to data block override node segment by mistake v4: Rearrange the previous three versions. The following scenario could lead to data block override by mistake. TASK A | TASK kworker | TASK B | TASK C | | | open | | | write | | | close | | | | f2fs_write_data_pages | | | f2fs_write_cache_pages | | | f2fs_outplace_write_data | | | f2fs_allocate_data_block (get block in seg S, | | | S is full, and only | | | have this valid data | | | block) | | | allocate_segment | | | locate_dirty_segment (mark S as PRE) | | | f2fs_submit_page_write (submit but is not | | | written on dev) | | unlink | | | iput_final | | | f2fs_drop_inode | | | f2fs_truncate | | | (not evict) | | | | | write_checkpoint | | | flush merged bio but not wait file data writeback | | | set_prefree_as_free (mark S as FREE) | | | | update NODE/DATA | | | allocate_segment (select S) | writeback done | | So we need to guarantee io complete before truncate inode in f2fs_drop_inode. Reviewed-by: Chao Yu Signed-off-by: Zheng Liang Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 384d1c248857..e382be2f10f9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -915,6 +915,10 @@ static int f2fs_drop_inode(struct inode *inode) sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0); + f2fs_submit_merged_write_cond(F2FS_I_SB(inode), + inode, NULL, 0, DATA); + truncate_inode_pages_final(inode->i_mapping); + if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); -- cgit v1.2.3-59-g8ed1b From 025cdb166c1e6a142fdec46bcd0873e00f0be0fd Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 23 Jan 2019 15:49:44 +0800 Subject: f2fs: jump to label 'free_node_inode' when failing from d_make_root() When sb->s_root is NULL dput() will do nothing, so jump to label 'free_node_inode' instead of lable 'free_root_inode' when failing from d_make_root(). Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e382be2f10f9..83bbe7424fc1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3322,7 +3322,7 @@ try_onemore: sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { err = -ENOMEM; - goto free_root_inode; + goto free_node_inode; } err = f2fs_register_sysfs(sbi); -- cgit v1.2.3-59-g8ed1b From c42d28ce3e16dbd88e575c0acfda96d221dae2c9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 2 Feb 2019 17:33:01 +0800 Subject: f2fs: fix potential data inconsistence of checkpoint Previously, we changed lock from cp_rwsem to node_change, it solved the deadlock issue which was caused by below race condition: Thread A Thread B - f2fs_setattr - f2fs_lock_op -- read_lock - dquot_transfer - __dquot_transfer - dquot_acquire - commit_dqblk - f2fs_quota_write - f2fs_write_begin - f2fs_write_failed - write_checkpoint - block_operations - f2fs_lock_all -- write_lock - f2fs_truncate_blocks - f2fs_lock_op -- read_lock But it breaks the sematics of cp_rwsem, in other callers like: - f2fs_file_write_iter -> f2fs_write_begin -> f2fs_write_failed - f2fs_direct_IO -> f2fs_write_failed We allow to truncate dnode w/o cp_rwsem held, result in incorrect sit bitmap update, which can cause further data corruption. So this patch reverts previous fix implementation, and try to fix deadlock by skipping calling f2fs_truncate_blocks() in f2fs_write_failed() only for quota file, and keep the preallocated data/node in the tail of quota file, we can expecte that the preallocated space can be used to store quota info latter soon. Fixes: af033b2aa8a8 ("f2fs: guarantee journalled quota data by checkpoint") Signed-off-by: Gao Xiang Signed-off-by: Sheng Yong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/file.c | 14 ++++++-------- fs/f2fs/inline.c | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4e402add2e60..e099babf85bd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2312,7 +2312,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - f2fs_truncate_blocks(inode, i_size, true, true); + if (!IS_NOQUOTA(inode)) + f2fs_truncate_blocks(inode, i_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8c928cd72b61..4665bff1bf55 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2843,8 +2843,7 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, - bool buf_write); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); int f2fs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fe6f92fbba38..b8f5d1208619 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -589,8 +589,7 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, - bool buf_write) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -598,7 +597,6 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, int count = 0, err = 0; struct page *ipage; bool truncate_page = false; - int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); @@ -608,7 +606,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, goto free_partial; if (lock) - __do_map_lock(sbi, flag, true); + f2fs_lock_op(sbi); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -646,7 +644,7 @@ free_next: err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - __do_map_lock(sbi, flag, false); + f2fs_unlock_op(sbi); free_partial: /* lastly zero out the first data page */ if (!err) @@ -681,7 +679,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true); if (err) return err; @@ -1262,7 +1260,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true, false); + ret = f2fs_truncate_blocks(inode, new_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1447,7 +1445,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d636cbcf68f2..a1381d05956b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -298,7 +298,7 @@ process_inline: clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false, false)) + if (f2fs_truncate_blocks(inode, 0, false)) return false; goto process_inline; } @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - f2fs_truncate_blocks(dir, 0, false, false); + f2fs_truncate_blocks(dir, 0, false); f2fs_remove_dirty_inode(dir); return err; } -- cgit v1.2.3-59-g8ed1b From 9083977dabf3833298ddcd40dee28687f1e6b483 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 4 Feb 2019 13:36:53 +0530 Subject: f2fs: do not use mutex lock in atomic context Fix below warning coming because of using mutex lock in atomic context. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:98 in_atomic(): 1, irqs_disabled(): 0, pid: 585, name: sh Preemption disabled at: __radix_tree_preload+0x28/0x130 Call trace: dump_backtrace+0x0/0x2b4 show_stack+0x20/0x28 dump_stack+0xa8/0xe0 ___might_sleep+0x144/0x194 __might_sleep+0x58/0x8c mutex_lock+0x2c/0x48 f2fs_trace_pid+0x88/0x14c f2fs_set_node_page_dirty+0xd0/0x184 Do not use f2fs_radix_tree_insert() to avoid doing cond_resched() with spin_lock() acquired. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index ce2a5eb210b6..d0ab533a9ce8 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -14,7 +14,7 @@ #include "trace.h" static RADIX_TREE(pids, GFP_ATOMIC); -static struct mutex pids_lock; +static spinlock_t pids_lock; static struct last_io_info last_io; static inline void __print_last_io(void) @@ -58,23 +58,29 @@ void f2fs_trace_pid(struct page *page) set_page_private(page, (unsigned long)pid); +retry: if (radix_tree_preload(GFP_NOFS)) return; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) goto out; if (p) radix_tree_delete(&pids, pid); - f2fs_radix_tree_insert(&pids, pid, current); + if (radix_tree_insert(&pids, pid, current)) { + spin_unlock(&pids_lock); + radix_tree_preload_end(); + cond_resched(); + goto retry; + } trace_printk("%3x:%3x %4x %-16s\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); out: - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); radix_tree_preload_end(); } @@ -119,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) void f2fs_build_trace_ios(void) { - mutex_init(&pids_lock); + spin_lock_init(&pids_lock); } #define PIDVEC_SIZE 128 @@ -147,7 +153,7 @@ void f2fs_destroy_trace_ios(void) pid_t next_pid = 0; unsigned int found; - mutex_lock(&pids_lock); + spin_lock(&pids_lock); while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { unsigned idx; @@ -155,5 +161,5 @@ void f2fs_destroy_trace_ios(void) for (idx = 0; idx < found; idx++) radix_tree_delete(&pids, pid[idx]); } - mutex_unlock(&pids_lock); + spin_unlock(&pids_lock); } -- cgit v1.2.3-59-g8ed1b From 500e0b28ecd3c5aade98f3c3a339d18dcb166bb6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 Feb 2019 00:08:25 +0800 Subject: f2fs: fix to check inline_xattr_size boundary correctly We use below condition to check inline_xattr_size boundary: if (!F2FS_OPTION(sbi).inline_xattr_size || F2FS_OPTION(sbi).inline_xattr_size >= DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE - DEF_INLINE_RESERVED_SIZE - DEF_MIN_INLINE_SIZE) There is there problems in that check: - we should allow inline_xattr_size equaling to min size of inline {data,dentry} area. - F2FS_TOTAL_EXTRA_ATTR_SIZE and inline_xattr_size are based on different size unit, previous one is 4 bytes, latter one is 1 bytes. - DEF_MIN_INLINE_SIZE only indicate min size of inline data area, however, we need to consider min size of inline dentry area as well, minimal inline dentry should at least contain two entries: '.' and '..', so that min inline_dentry size is 40 bytes. .bitmap 1 * 1 = 1 .reserved 1 * 1 = 1 .dentry 11 * 2 = 22 .filename 8 * 2 = 16 total 40 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/super.c | 13 +++++++------ include/linux/f2fs_fs.h | 13 +++++++------ 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4665bff1bf55..f1f0d2810852 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -459,7 +459,6 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 -#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); static inline int get_inline_xattr_addrs(struct inode *inode); #define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 83bbe7424fc1..be8be445c6ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -834,12 +834,13 @@ static int parse_options(struct super_block *sb, char *options) "set with inline_xattr option"); return -EINVAL; } - if (!F2FS_OPTION(sbi).inline_xattr_size || - F2FS_OPTION(sbi).inline_xattr_size >= - DEF_ADDRS_PER_INODE - - F2FS_TOTAL_EXTRA_ATTR_SIZE - - DEF_INLINE_RESERVED_SIZE - - DEF_MIN_INLINE_SIZE) { + if (F2FS_OPTION(sbi).inline_xattr_size < + sizeof(struct f2fs_xattr_header) / sizeof(__le32) || + F2FS_OPTION(sbi).inline_xattr_size > + DEF_ADDRS_PER_INODE - + F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - + DEF_INLINE_RESERVED_SIZE - + MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) { f2fs_msg(sb, KERN_ERR, "inline xattr size is out of range"); return -EINVAL; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8d57aaee8166..666db8eb71e0 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -490,12 +490,12 @@ typedef __le32 f2fs_hash_t; /* * space utilization of regular dentry and inline dentry (w/o extra reservation) - * regular dentry inline dentry - * bitmap 1 * 27 = 27 1 * 23 = 23 - * reserved 1 * 3 = 3 1 * 7 = 7 - * dentry 11 * 214 = 2354 11 * 182 = 2002 - * filename 8 * 214 = 1712 8 * 182 = 1456 - * total 4096 3488 + * regular dentry inline dentry (def) inline dentry (min) + * bitmap 1 * 27 = 27 1 * 23 = 23 1 * 1 = 1 + * reserved 1 * 3 = 3 1 * 7 = 7 1 * 1 = 1 + * dentry 11 * 214 = 2354 11 * 182 = 2002 11 * 2 = 22 + * filename 8 * 214 = 1712 8 * 182 = 1456 8 * 2 = 16 + * total 4096 3488 40 * * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. @@ -507,6 +507,7 @@ typedef __le32 f2fs_hash_t; #define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) +#define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ /* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ struct f2fs_dir_entry { -- cgit v1.2.3-59-g8ed1b From 6d52e135c8a8b2063e0a9fe1f12c06e9208a941c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 Feb 2019 00:16:15 +0800 Subject: f2fs: don't allow negative ->write_io_size_bits As Dan reported: "We put an upper bound on ->write_io_size_bits but we don't have a lower bound." So let's add lower bound check for ->write_io_size_bits in parse_options(). [We don't allow configuring ->write_io_size_bits to zero, since at least we need to fill one dummy page for aligned IO.] Reported-by: Dan Carpenter Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index be8be445c6ed..230845221c74 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -586,7 +586,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg > __ilog2_u32(BIO_MAX_PAGES)) { + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { f2fs_msg(sb, KERN_WARNING, "Not support %d, larger than %d", 1 << arg, BIO_MAX_PAGES); -- cgit v1.2.3-59-g8ed1b From fb40d618b03978b7cc5820697894461f4a2af98b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 Feb 2019 07:59:57 -0800 Subject: f2fs: don't clear CP_QUOTA_NEED_FSCK_FLAG If we met this once, let fsck.f2fs clear this only. Note that, this addresses all the subtle fault injection test. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 03fea4efd64b..c65a1e8e1e95 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1267,8 +1267,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); - else - __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* + * TODO: we count on fsck.f2fs to clear this flag until we figure out + * missing cases which clear it incorrectly. + */ if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); -- cgit v1.2.3-59-g8ed1b From 0af725fcb77a148b7da249fa01f98ceeaa149eec Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 Feb 2019 19:04:38 -0800 Subject: f2fs: fix wrong #endif We have to cover whole headerfile with last #endif. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f1f0d2810852..19cbf17550c7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3626,8 +3626,6 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, #define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) #endif -#endif - static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA @@ -3640,3 +3638,5 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) #endif return false; } + +#endif -- cgit v1.2.3-59-g8ed1b From bc73a4b2414f2914fa895747166312b1527a97bb Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 19 Feb 2019 10:31:52 +0800 Subject: f2fs: silence VM_WARN_ON_ONCE in mempool_alloc Note that __GFP_ZERO is not supported for mempool_alloc, which also documented in the mempool_alloc comments. Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e099babf85bd..65c9586b2952 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -299,9 +299,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, for (; start < F2FS_IO_SIZE(sbi); start++) { struct page *page = mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); + GFP_NOIO | __GFP_NOFAIL); f2fs_bug_on(sbi, !page); + zero_user_segment(page, 0, PAGE_SIZE); SetPagePrivate(page); set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); lock_page(page); -- cgit v1.2.3-59-g8ed1b From aa2c8c43e4a5c242f5b0331c8b7a941b85f9435a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 Feb 2019 16:23:53 +0800 Subject: f2fs: fix to retry fill_super only if recovery failed With current retry mechanism in f2fs_fill_super, first fill_super fails due to no memory, then second fill_super runs w/o recovery, if we succeed, we may lose fsynced data, it doesn't make sense. Let's retry fill_super only if it occurs non-ENOMEM error during recovery. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 230845221c74..45121190a2ba 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3053,10 +3053,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct f2fs_super_block *raw_super; struct inode *root; int err; - bool retry = true, need_fsck = false; + bool skip_recovery = false, need_fsck = false; char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; + int retry_cnt = 1; try_onemore: err = -EINVAL; @@ -3345,7 +3346,7 @@ try_onemore: goto free_meta; if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) - goto skip_recovery; + goto reset_checkpoint; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -3362,11 +3363,13 @@ try_onemore: if (need_fsck) set_sbi_flag(sbi, SBI_NEED_FSCK); - if (!retry) - goto skip_recovery; + if (skip_recovery) + goto reset_checkpoint; err = f2fs_recover_fsync_data(sbi, false); if (err < 0) { + if (err != -ENOMEM) + skip_recovery = true; need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); @@ -3382,7 +3385,7 @@ try_onemore: goto free_meta; } } -skip_recovery: +reset_checkpoint: /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); @@ -3428,7 +3431,7 @@ skip_recovery: sync_free_meta: /* safe to flush all the data */ sync_filesystem(sbi->sb); - retry = false; + retry_cnt = 0; free_meta: #ifdef CONFIG_QUOTA @@ -3488,8 +3491,8 @@ free_sbi: kvfree(sbi); /* give only one another chance */ - if (retry) { - retry = false; + if (retry_cnt > 0 && skip_recovery) { + retry_cnt--; shrink_dcache_sb(sb); goto try_onemore; } -- cgit v1.2.3-59-g8ed1b From dc37910d4c63296513c5795187d020ad9793822b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 Feb 2019 17:08:18 +0800 Subject: f2fs: make fault injection covering __submit_flush_wait() This patch changes to allow failure of f2fs_bio_alloc() in __submit_flush_wait(), which can simulate flush error in checkpoint() for covering more error paths. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fdd8cd21522f..2537893e9466 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -542,9 +542,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(sbi, 0, true); + struct bio *bio; int ret; + bio = f2fs_bio_alloc(sbi, 0, false); + if (!bio) + return -ENOMEM; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; bio_set_dev(bio, bdev); ret = submit_bio_wait(bio); -- cgit v1.2.3-59-g8ed1b From 6492a335fd8084fa894beb0c4182de439a12e8d5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 21 Feb 2019 20:37:14 +0800 Subject: f2fs: fix encrypted page memory leak For IPU path of f2fs_do_write_data_page(), in its error path, we need to release encrypted page and fscrypt context, otherwise it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 65c9586b2952..3f3becd46362 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1861,8 +1861,13 @@ got_it: if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); - if (err && PageWriteback(page)) - end_page_writeback(page); + if (err) { + if (f2fs_encrypted_file(inode)) + fscrypt_pullback_bio_page(&fio->encrypted_page, + true); + if (PageWriteback(page)) + end_page_writeback(page); + } trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); return err; -- cgit v1.2.3-59-g8ed1b From e46f6bd82c831d20f9b6c149076cf5d2c443d638 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 21 Feb 2019 20:40:13 +0800 Subject: f2fs: fix to update iostat correctly in IPU path In error path of IPU, we didn't account iostat correctly, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2537893e9466..d74359325da6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3182,10 +3182,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); - if (!err) + if (!err) { update_device_state(fio); - - f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + } return err; } -- cgit v1.2.3-59-g8ed1b From 613f3dcdf0d8ffdb3e5d820f6d69076905efddc3 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 21 Feb 2019 12:57:35 +0800 Subject: f2fs: no need to take page lock in readdir VFS will take inode_lock for readdir, therefore no need to take page lock in readdir at all just as the majority of other generic filesystems. This patch improves concurrency since .iterate_shared was introduced to VFS years ago. Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ba7535399d95..103f3686a045 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -896,7 +896,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_get_lock_data_page(inode, n, false); + dentry_page = f2fs_find_data_page(inode, n); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { @@ -914,11 +914,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); break; } - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); } out_free: fscrypt_fname_free_buffer(&fstr); -- cgit v1.2.3-59-g8ed1b From 428e3bcf07696ff252b7ff3ff7711c2b9bbdb908 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 25 Feb 2019 09:46:45 -0800 Subject: f2fs: give random value to i_generation This follows to give random number to i_generation along with commit 232530680290b ("ext4: improve smp scalability for inode generation") This can be used for DUN for UFS HW encryption. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 -- fs/f2fs/namei.c | 3 ++- fs/f2fs/super.c | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 19cbf17550c7..3007759dd2dd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1242,8 +1242,6 @@ struct f2fs_sb_info { unsigned int nquota_files; /* # of quota sysfile */ - u32 s_next_generation; /* for NFS support */ - /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 62d9829f3a6a..f218d9424d8b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); F2FS_I(inode)->i_crtime = inode->i_mtime; - inode->i_generation = sbi->s_next_generation++; + inode->i_generation = prandom_u32(); if (S_ISDIR(inode->i_mode)) F2FS_I(inode)->i_current_depth = 1; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 45121190a2ba..42eb5c86330a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3129,7 +3129,6 @@ try_onemore: sb->s_maxbytes = sbi->max_file_blocks << le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; -- cgit v1.2.3-59-g8ed1b From ca597bddedd94906cd761d8be6a3ad21292725de Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Feb 2019 09:48:27 +0800 Subject: f2fs: fix to dirty inode for i_mode recovery As Seulbae Kim reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202637 We didn't recover permission field correctly after sudden power-cut, the reason is in setattr we didn't add inode into global dirty list once i_mode is changed, so latter checkpoint triggered by fsync will not flush last i_mode into disk, result in this problem, fix it. Reported-by: Seulbae Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8f5d1208619..3a8c8eb0f549 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -766,7 +766,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; - bool size_changed = false; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; @@ -841,8 +840,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) down_write(&F2FS_I(inode)->i_sem); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); - - size_changed = true; } __setattr_copy(inode, attr); @@ -856,7 +853,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } /* file size may changed here */ - f2fs_mark_inode_dirty_sync(inode, size_changed); + f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ f2fs_balance_fs(F2FS_I_SB(inode), true); -- cgit v1.2.3-59-g8ed1b From 48432984d718c95cf13e26d487c2d1b697c3c01f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Feb 2019 17:11:03 +0800 Subject: f2fs: fix to avoid deadlock of atomic file operations Thread A Thread B - __fput - f2fs_release_file - drop_inmem_pages - mutex_lock(&fi->inmem_lock) - __revoke_inmem_pages - lock_page(page) - open - f2fs_setattr - truncate_setsize - truncate_inode_pages_range - lock_page(page) - truncate_cleanup_page - f2fs_invalidate_page - drop_inmem_page - mutex_lock(&fi->inmem_lock); We may encounter above ABBA deadlock as reported by Kyungtae Kim: I'm reporting a bug in linux-4.17.19: "INFO: task hung in drop_inmem_page" (no reproducer) I think this might be somehow related to the following: https://groups.google.com/forum/#!searchin/syzkaller-bugs/INFO$3A$20task$20hung$20in$20%7Csort:date/syzkaller-bugs/c6soBTrdaIo/AjAzPeIzCgAJ ========================================= INFO: task syz-executor7:10822 blocked for more than 120 seconds. Not tainted 4.17.19 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor7 D27024 10822 6346 0x00000004 Call Trace: context_switch kernel/sched/core.c:2867 [inline] __schedule+0x721/0x1e60 kernel/sched/core.c:3515 schedule+0x88/0x1c0 kernel/sched/core.c:3559 schedule_preempt_disabled+0x18/0x30 kernel/sched/core.c:3617 __mutex_lock_common kernel/locking/mutex.c:833 [inline] __mutex_lock+0x5bd/0x1410 kernel/locking/mutex.c:893 mutex_lock_nested+0x1b/0x20 kernel/locking/mutex.c:908 drop_inmem_page+0xcb/0x810 fs/f2fs/segment.c:327 f2fs_invalidate_page+0x337/0x5e0 fs/f2fs/data.c:2401 do_invalidatepage mm/truncate.c:165 [inline] truncate_cleanup_page+0x261/0x330 mm/truncate.c:187 truncate_inode_pages_range+0x552/0x1610 mm/truncate.c:367 truncate_inode_pages mm/truncate.c:478 [inline] truncate_pagecache+0x6d/0x90 mm/truncate.c:801 truncate_setsize+0x81/0xa0 mm/truncate.c:826 f2fs_setattr+0x44f/0x1270 fs/f2fs/file.c:781 notify_change+0xa62/0xe80 fs/attr.c:313 do_truncate+0x12e/0x1e0 fs/open.c:63 do_last fs/namei.c:2955 [inline] path_openat+0x2042/0x29f0 fs/namei.c:3505 do_filp_open+0x1bd/0x2c0 fs/namei.c:3540 do_sys_open+0x35e/0x4e0 fs/open.c:1101 __do_sys_open fs/open.c:1119 [inline] __se_sys_open fs/open.c:1114 [inline] __x64_sys_open+0x89/0xc0 fs/open.c:1114 do_syscall_64+0xc4/0x4e0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4497b9 RSP: 002b:00007f734e459c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 RAX: ffffffffffffffda RBX: 00007f734e45a6cc RCX: 00000000004497b9 RDX: 0000000000000104 RSI: 00000000000a8280 RDI: 0000000020000080 RBP: 000000000071bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000007230 R14: 00000000006f02d0 R15: 00007f734e45a700 INFO: task syz-executor7:10858 blocked for more than 120 seconds. Not tainted 4.17.19 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor7 D28880 10858 6346 0x00000004 Call Trace: context_switch kernel/sched/core.c:2867 [inline] __schedule+0x721/0x1e60 kernel/sched/core.c:3515 schedule+0x88/0x1c0 kernel/sched/core.c:3559 __rwsem_down_write_failed_common kernel/locking/rwsem-xadd.c:565 [inline] rwsem_down_write_failed+0x5e6/0xc90 kernel/locking/rwsem-xadd.c:594 call_rwsem_down_write_failed+0x17/0x30 arch/x86/lib/rwsem.S:117 __down_write arch/x86/include/asm/rwsem.h:142 [inline] down_write+0x58/0xa0 kernel/locking/rwsem.c:72 inode_lock include/linux/fs.h:713 [inline] do_truncate+0x120/0x1e0 fs/open.c:61 do_last fs/namei.c:2955 [inline] path_openat+0x2042/0x29f0 fs/namei.c:3505 do_filp_open+0x1bd/0x2c0 fs/namei.c:3540 do_sys_open+0x35e/0x4e0 fs/open.c:1101 __do_sys_open fs/open.c:1119 [inline] __se_sys_open fs/open.c:1114 [inline] __x64_sys_open+0x89/0xc0 fs/open.c:1114 do_syscall_64+0xc4/0x4e0 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4497b9 RSP: 002b:00007f734e3b4c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 RAX: ffffffffffffffda RBX: 00007f734e3b56cc RCX: 00000000004497b9 RDX: 0000000000000104 RSI: 00000000000a8280 RDI: 0000000020000080 RBP: 000000000071c238 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000007230 R14: 00000000006f02d0 R15: 00007f734e3b5700 INFO: task syz-executor5:10829 blocked for more than 120 seconds. Not tainted 4.17.19 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor5 D28760 10829 6308 0x80000002 Call Trace: context_switch kernel/sched/core.c:2867 [inline] __schedule+0x721/0x1e60 kernel/sched/core.c:3515 schedule+0x88/0x1c0 kernel/sched/core.c:3559 io_schedule+0x21/0x80 kernel/sched/core.c:5179 wait_on_page_bit_common mm/filemap.c:1100 [inline] __lock_page+0x2b5/0x390 mm/filemap.c:1273 lock_page include/linux/pagemap.h:483 [inline] __revoke_inmem_pages+0xb35/0x11c0 fs/f2fs/segment.c:231 drop_inmem_pages+0xa3/0x3e0 fs/f2fs/segment.c:306 f2fs_release_file+0x2c7/0x330 fs/f2fs/file.c:1556 __fput+0x2c7/0x780 fs/file_table.c:209 ____fput+0x1a/0x20 fs/file_table.c:243 task_work_run+0x151/0x1d0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x8ba/0x30a0 kernel/exit.c:865 do_group_exit+0x13b/0x3a0 kernel/exit.c:968 get_signal+0x6bb/0x1650 kernel/signal.c:2482 do_signal+0x84/0x1b70 arch/x86/kernel/signal.c:810 exit_to_usermode_loop+0x155/0x190 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] syscall_return_slowpath arch/x86/entry/common.c:265 [inline] do_syscall_64+0x445/0x4e0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4497b9 RSP: 002b:00007f1c68e74ce8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: fffffffffffffe00 RBX: 000000000071bf80 RCX: 00000000004497b9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000071bf80 RBP: 000000000071bf80 R08: 0000000000000000 R09: 000000000071bf58 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f1c68e759c0 R15: 00007f1c68e75700 This patch tries to use trylock_page to mitigate such deadlock condition for fix. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d74359325da6..e730c334abba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -215,7 +215,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) } static int __revoke_inmem_pages(struct inode *inode, - struct list_head *head, bool drop, bool recover) + struct list_head *head, bool drop, bool recover, + bool trylock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inmem_pages *cur, *tmp; @@ -227,7 +228,16 @@ static int __revoke_inmem_pages(struct inode *inode, if (drop) trace_f2fs_commit_inmem_page(page, INMEM_DROP); - lock_page(page); + if (trylock) { + /* + * to avoid deadlock in between page lock and + * inmem_lock. + */ + if (!trylock_page(page)) + continue; + } else { + lock_page(page); + } f2fs_wait_on_page_writeback(page, DATA, true, true); @@ -318,13 +328,19 @@ void f2fs_drop_inmem_pages(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - mutex_lock(&fi->inmem_lock); - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - if (!list_empty(&fi->inmem_ilist)) - list_del_init(&fi->inmem_ilist); - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - mutex_unlock(&fi->inmem_lock); + while (!list_empty(&fi->inmem_pages)) { + mutex_lock(&fi->inmem_lock); + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, true); + + if (list_empty(&fi->inmem_pages)) { + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + } + mutex_unlock(&fi->inmem_lock); + } clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; @@ -429,12 +445,15 @@ retry: * recovery or rewrite & commit last transaction. For other * error number, revoking was done by filesystem itself. */ - err = __revoke_inmem_pages(inode, &revoke_list, false, true); + err = __revoke_inmem_pages(inode, &revoke_list, + false, true, false); /* drop all uncommitted pages */ - __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + __revoke_inmem_pages(inode, &fi->inmem_pages, + true, false, false); } else { - __revoke_inmem_pages(inode, &revoke_list, false, false); + __revoke_inmem_pages(inode, &revoke_list, + false, false, false); } return err; -- cgit v1.2.3-59-g8ed1b From 559e87c497a820e132995ad56c8361509e5410da Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 26 Feb 2019 19:01:15 +0800 Subject: f2fs: trace f2fs_ioc_shutdown This patch supports to trace f2fs_ioc_shutdown. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ include/trace/events/f2fs.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3a8c8eb0f549..807a97ad2430 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1987,6 +1987,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) out: if (in != F2FS_GOING_DOWN_FULLSYNC) mnt_drop_write_file(filp); + + trace_f2fs_shutdown(sbi, in, ret); + return ret; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 8a28a2b1be74..0cb746fe6ae1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -149,6 +149,14 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_SPEC_LOG_NUM, "log type is 2" }, \ { CP_RECOVER_DIR, "dir needs recovery" }) +#define show_shutdown_mode(type) \ + __print_symbolic(type, \ + { F2FS_GOING_DOWN_FULLSYNC, "full sync" }, \ + { F2FS_GOING_DOWN_METASYNC, "meta sync" }, \ + { F2FS_GOING_DOWN_NOSYNC, "no sync" }, \ + { F2FS_GOING_DOWN_METAFLUSH, "meta flush" }, \ + { F2FS_GOING_DOWN_NEED_FSCK, "need fsck" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1619,6 +1627,30 @@ DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, TP_ARGS(sb, type, count) ); +TRACE_EVENT(f2fs_shutdown, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret), + + TP_ARGS(sbi, mode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, mode) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->mode = mode; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), mode: %s, ret:%d", + show_dev(__entry->dev), + show_shutdown_mode(__entry->mode), + __entry->ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From 2a6a7e722e7a78d774ce02b847c5b183a3ff2672 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Mar 2019 17:52:33 +0800 Subject: f2fs: fix to use kvfree instead of kzfree As Jiqun Li reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202747 System can panic due to using wrong allocate/free function pair in xattr interface: - use kvmalloc to allocate memory - use kzfree to free memory Let's fix to use kvfree instead of kzfree, BTW, we are safe to get rid of kzfree, since there is no such confidential data stored as xattr, we don't need to zero it before free memory. Fixes: 5222595d093e ("f2fs: use kvmalloc, if kmalloc is failed") Reported-by: Jiqun Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index fa620d31ea5f..ca47224d78ab 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -340,7 +340,7 @@ check: *base_addr = txattr_addr; return 0; out: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -383,7 +383,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; fail: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -510,7 +510,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -556,7 +556,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -687,7 +687,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: - kzfree(base_addr); + kvfree(base_addr); return error; } -- cgit v1.2.3-59-g8ed1b From 25720cc05e492467099a2d4d21a50f6ee8555cfd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Mar 2019 16:18:33 +0800 Subject: f2fs: remove wrong comment in f2fs_invalidate_page() Since 8c242db9b8c0 ("f2fs: fix stale ATOMIC_WRITTEN_PAGE private pointer"), we've started to not skip clear private flag for atomic_write page truncation, so removing old wrong comment in f2fs_invalidate_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3f3becd46362..2510e935301e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2711,7 +2711,6 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, clear_cold_data(page); - /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); -- cgit v1.2.3-59-g8ed1b From 240a59156d9bcfabceddb66be449e7b32fb5dc4a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Mar 2019 17:30:59 +0800 Subject: f2fs: fix to add refcount once page is tagged PG_private As Gao Xiang reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202749 f2fs may skip pageout() due to incorrect page reference count. The problem here is that MM defined the rule [1] very clearly that once page was set with PG_private flag, we should increment the refcount in that page, also main flows like pageout(), migrate_page() will assume there is one additional page reference count if page_has_private() returns true. But currently, f2fs won't add/del refcount when changing PG_private flag. Anyway, f2fs should follow MM's rule to make MM's related flows running as expected. [1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com/ Reported-by: Gao Xiang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 21 ++++++++------------- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 21 +++++++++++++++++++++ fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 9 +++------ 6 files changed, 36 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c65a1e8e1e95..a98e1b02279e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -406,7 +406,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -957,7 +957,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2510e935301e..fa6318549af5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2714,8 +2714,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); } int f2fs_release_page(struct page *page, gfp_t wait) @@ -2729,8 +2728,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0; clear_cold_data(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); return 1; } @@ -2798,12 +2796,8 @@ int f2fs_migrate_page(struct address_space *mapping, return -EAGAIN; } - /* - * A reference is expected if PagePrivate set when move mapping, - * however F2FS breaks this for maintaining dirty page counts when - * truncating pages. So here adjusting the 'extra_count' make it work. - */ - extra_count = (atomic_written ? 1 : 0) - page_has_private(page); + /* one extra reference was held for atomic_write page */ + extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, page, mode, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { @@ -2824,9 +2818,10 @@ int f2fs_migrate_page(struct address_space *mapping, get_page(newpage); } - if (PagePrivate(page)) - SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + if (PagePrivate(page)) { + f2fs_set_page_private(newpage, page_private(page)); + f2fs_clear_page_private(page); + } if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 103f3686a045..fb647e58edb5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_page_cache_dirty_tag(page); clear_page_dirty_for_io(page); - ClearPagePrivate(page); + f2fs_clear_page_private(page); ClearPageUptodate(page); clear_cold_data(page); inode_dec_dirty_pages(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3007759dd2dd..a165f7870004 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2835,6 +2835,27 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, return true; } +static inline void f2fs_set_page_private(struct page *page, + unsigned long data) +{ + if (PagePrivate(page)) + return; + + get_page(page); + SetPagePrivate(page); + set_page_private(page, data); +} + +static inline void f2fs_clear_page_private(struct page *page) +{ + if (!PagePrivate(page)) + return; + + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); +} + /* * file.c */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f6ff84e29749..3f99ab288695 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1961,7 +1961,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e730c334abba..aa7fe79b62b2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) f2fs_trace_pid(page); - set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); - SetPagePrivate(page); + f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@ -280,8 +279,7 @@ next: ClearPageUptodate(page); clear_cold_data(page); } - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 1); list_del(&cur->list); @@ -370,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) kmem_cache_free(inmem_entry_slab, cur); ClearPageUptodate(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 0); trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); -- cgit v1.2.3-59-g8ed1b From 86109c9064daf2ac44ef7b4f1eeb039260351e9c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Mar 2019 17:31:30 +0800 Subject: f2fs: don't trigger read IO for beyond EOF page In f2fs_mpage_readpages(), if page is beyond EOF, we should just zero out it, but previously, before checking previous mapping info, we missed to check filesize boundary, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fa6318549af5..912fdcc2dbb8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1551,6 +1551,9 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (last_block > last_block_in_file) last_block = last_block_in_file; + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; /* * Map blocks using the previous result first. */ @@ -1563,16 +1566,11 @@ static int f2fs_mpage_readpages(struct address_space *mapping, * Then do more f2fs_map_blocks() calls until we are * done with this page. */ - map.m_flags = 0; + map.m_lblk = block_in_file; + map.m_len = last_block - block_in_file; - if (block_in_file < last_block) { - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, - F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; - } + if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT)) + goto set_error_page; got_it: if ((map.m_flags & F2FS_MAP_MAPPED)) { block_nr = map.m_pblk + block_in_file - map.m_lblk; @@ -1587,6 +1585,7 @@ got_it: DATA_GENERIC)) goto set_error_page; } else { +zero_out: zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) SetPageUptodate(page); -- cgit v1.2.3-59-g8ed1b From 70db5b04cbe19e5ae7e85ada2d3e82bcfdf90352 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Mar 2019 11:49:53 -0700 Subject: f2fs: give some messages for inline_xattr_size This patch adds some kernel messages when user sets wrong inline_xattr_size. Fixes: 500e0b28ecd3 ("f2fs: fix to check inline_xattr_size boundary correctly") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 42eb5c86330a..324938ec95f3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -821,6 +821,8 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { + int min_size, max_size; + if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_msg(sb, KERN_ERR, @@ -834,15 +836,18 @@ static int parse_options(struct super_block *sb, char *options) "set with inline_xattr option"); return -EINVAL; } - if (F2FS_OPTION(sbi).inline_xattr_size < - sizeof(struct f2fs_xattr_header) / sizeof(__le32) || - F2FS_OPTION(sbi).inline_xattr_size > - DEF_ADDRS_PER_INODE - + + min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32); + max_size = DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - DEF_INLINE_RESERVED_SIZE - - MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) { + MIN_INLINE_DENTRY_SIZE / sizeof(__le32); + + if (F2FS_OPTION(sbi).inline_xattr_size < min_size || + F2FS_OPTION(sbi).inline_xattr_size > max_size) { f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range"); + "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } -- cgit v1.2.3-59-g8ed1b From dd6c89b5f2b93ceced4111e7b69d4efd8c312713 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Mar 2019 17:19:04 +0800 Subject: f2fs: fix to do sanity check with inode.i_inline_xattr_size As Paul Bandha reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202709 When I run the poc on the mounted f2fs img I get a buffer overflow in read_inline_xattr due to there being no sanity check on the value of i_inline_xattr_size. I created the img by just modifying the value of i_inline_xattr_size in the inode: i_name [test1.txt] i_ext: fofs:0 blkaddr:0 len:0 i_extra_isize [0x 18 : 24] i_inline_xattr_size [0x ffff : 65535] i_addr[ofs] [0x 0 : 0] mkdir /mnt/f2fs mount ./f2fs1.img /mnt/f2fs gcc poc.c -o poc ./poc int main() { int y = syscall(SYS_listxattr, "/mnt/f2fs/test1.txt", NULL, 0); printf("ret %d", y); printf("errno: %d\n", errno); } BUG: KASAN: slab-out-of-bounds in read_inline_xattr+0x18f/0x260 Read of size 262140 at addr ffff88011035efd8 by task f2fs1poc/3263 CPU: 0 PID: 3263 Comm: f2fs1poc Not tainted 4.18.0-custom #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x83/0x250 kasan_report+0x213/0x350 memcpy+0x1f/0x50 read_inline_xattr+0x18f/0x260 read_all_xattrs+0xba/0x190 f2fs_listxattr+0x9d/0x3f0 listxattr+0xb2/0xd0 path_listxattr+0x93/0xe0 do_syscall_64+0x9d/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Let's add sanity check for inode.i_inline_xattr_size during f2fs_iget() to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 15 +++++++++++++++ fs/f2fs/super.c | 5 +---- fs/f2fs/xattr.h | 6 ++++++ 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index bec52961630b..d44e6de70cb3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -14,6 +14,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "xattr.h" #include @@ -248,6 +249,20 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (!fi->i_inline_xattr_size || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) has corrupted " + "i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); + return false; + } + if (F2FS_I(inode)->extent_tree) { struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 324938ec95f3..21750df930b3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -838,10 +838,7 @@ static int parse_options(struct super_block *sb, char *options) } min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32); - max_size = DEF_ADDRS_PER_INODE - - F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - - DEF_INLINE_RESERVED_SIZE - - MIN_INLINE_DENTRY_SIZE / sizeof(__le32); + max_size = MAX_INLINE_XATTR_SIZE; if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 67db134da0f5..9172ee082ca8 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -78,6 +78,12 @@ struct f2fs_xattr_entry { sizeof(struct f2fs_xattr_header) - \ sizeof(struct f2fs_xattr_entry)) +#define MAX_INLINE_XATTR_SIZE \ + (DEF_ADDRS_PER_INODE - \ + F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ + DEF_INLINE_RESERVED_SIZE - \ + MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) + /* * On-disk structure of f2fs_xattr * We use inline xattrs space + 1 block for xattr. -- cgit v1.2.3-59-g8ed1b From 2c28aba8b2e2a51749fa66e01b68e1cd5b53e022 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Mar 2019 19:32:26 +0800 Subject: f2fs: fix to adapt small inline xattr space in __find_inline_xattr() With below testcase, we will fail to find existed xattr entry: 1. mkfs.f2fs -O extra_attr -O flexible_inline_xattr /dev/zram0 2. mount -t f2fs -o inline_xattr_size=1 /dev/zram0 /mnt/f2fs/ 3. touch /mnt/f2fs/file 4. setfattr -n "user.name" -v 0 /mnt/f2fs/file 5. getfattr -n "user.name" /mnt/f2fs/file /mnt/f2fs/file: user.name: No such attribute The reason is for inode which has very small inline xattr size, __find_inline_xattr() will fail to traverse any entry due to first entry may not be loaded from xattr node yet, later, we may skip to check entire xattr datas in __find_xattr(), result in such wrong condition. This patch adds condition to check such case to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ca47224d78ab..848a785abe25 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -224,11 +224,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, { struct f2fs_xattr_entry *entry; unsigned int inline_size = inline_xattr_size(inode); + void *max_addr = base_addr + inline_size; list_for_each_xattr(entry, base_addr) { - if ((void *)entry + sizeof(__u32) > base_addr + inline_size || - (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) > - base_addr + inline_size) { + if ((void *)entry + sizeof(__u32) > max_addr || + (void *)XATTR_NEXT_ENTRY(entry) > max_addr) { *last_addr = entry; return NULL; } @@ -239,6 +239,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, if (!memcmp(entry->e_name, name, len)) break; } + + /* inline xattr header or entry across max inline xattr size */ + if (IS_XATTR_LAST_ENTRY(entry) && + (void *)entry + sizeof(__u32) > max_addr) { + *last_addr = entry; + return NULL; + } return entry; } -- cgit v1.2.3-59-g8ed1b From aadcef64b22f668c1a107b86d3521d9cac915c24 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Mar 2019 15:44:27 +0800 Subject: f2fs: fix to avoid deadlock in f2fs_read_inline_dir() As Jiqun Li reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202883 sometimes, dead lock when make system call SYS_getdents64 with fsync() is called by another process. monkey running on android9.0 1. task 9785 held sbi->cp_rwsem and waiting lock_page() 2. task 10349 held mm_sem and waiting sbi->cp_rwsem 3. task 9709 held lock_page() and waiting mm_sem so this is a dead lock scenario. task stack is show by crash tools as following crash_arm64> bt ffffffc03c354080 PID: 9785 TASK: ffffffc03c354080 CPU: 1 COMMAND: "RxIoScheduler-3" >> #7 [ffffffc01b50fac0] __lock_page at ffffff80081b11e8 crash-arm64> bt 10349 PID: 10349 TASK: ffffffc018b83080 CPU: 1 COMMAND: "BUGLY_ASYNC_UPL" >> #3 [ffffffc01f8cfa40] rwsem_down_read_failed at ffffff8008a93afc PC: 00000033 LR: 00000000 SP: 00000000 PSTATE: ffffffffffffffff crash-arm64> bt 9709 PID: 9709 TASK: ffffffc03e7f3080 CPU: 1 COMMAND: "IntentService[A" >> #3 [ffffffc001e67850] rwsem_down_read_failed at ffffff8008a93afc >> #8 [ffffffc001e67b80] el1_ia at ffffff8008084fc4 PC: ffffff8008274114 [compat_filldir64+120] LR: ffffff80083584d4 [f2fs_fill_dentries+448] SP: ffffffc001e67b80 PSTATE: 80400145 X29: ffffffc001e67b80 X28: 0000000000000000 X27: 000000000000001a X26: 00000000000093d7 X25: ffffffc070d52480 X24: 0000000000000008 X23: 0000000000000028 X22: 00000000d43dfd60 X21: ffffffc001e67e90 X20: 0000000000000011 X19: ffffff80093a4000 X18: 0000000000000000 X17: 0000000000000000 X16: 0000000000000000 X15: 0000000000000000 X14: ffffffffffffffff X13: 0000000000000008 X12: 0101010101010101 X11: 7f7f7f7f7f7f7f7f X10: 6a6a6a6a6a6a6a6a X9: 7f7f7f7f7f7f7f7f X8: 0000000080808000 X7: ffffff800827409c X6: 0000000080808000 X5: 0000000000000008 X4: 00000000000093d7 X3: 000000000000001a X2: 0000000000000011 X1: ffffffc070d52480 X0: 0000000000800238 >> #9 [ffffffc001e67be0] f2fs_fill_dentries at ffffff80083584d0 PC: 0000003c LR: 00000000 SP: 00000000 PSTATE: 000000d9 X12: f48a02ff X11: d4678960 X10: d43dfc00 X9: d4678ae4 X8: 00000058 X7: d4678994 X6: d43de800 X5: 000000d9 X4: d43dfc0c X3: d43dfc10 X2: d46799c8 X1: 00000000 X0: 00001068 Below potential deadlock will happen between three threads: Thread A Thread B Thread C - f2fs_do_sync_file - f2fs_write_checkpoint - down_write(&sbi->node_change) -- 1) - do_page_fault - down_write(&mm->mmap_sem) -- 2) - do_wp_page - f2fs_vm_page_mkwrite - getdents64 - f2fs_read_inline_dir - lock_page -- 3) - f2fs_sync_node_pages - lock_page -- 3) - __do_map_lock - down_read(&sbi->node_change) -- 1) - f2fs_fill_dentries - dir_emit - compat_filldir64 - do_page_fault - down_read(&mm->mmap_sem) -- 2) Since f2fs_readdir is protected by inode.i_rwsem, there should not be any updates in inode page, we're safe to lookup dents in inode page without its lock held, so taking off the lock to improve concurrency of readdir and avoid potential deadlock. Reported-by: Jiqun Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a1381d05956b..bb6a152310ef 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -659,6 +659,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (IS_ERR(ipage)) return PTR_ERR(ipage); + /* + * f2fs_readdir was protected by inode.i_rwsem, it is safe to access + * ipage without page's lock held. + */ + unlock_page(ipage); + inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -667,7 +673,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (!err) ctx->pos = d.max; - f2fs_put_page(ipage, 1); + f2fs_put_page(ipage, 0); return err < 0 ? err : 0; } -- cgit v1.2.3-59-g8ed1b From aff7b628ac2d58616b74789389ebb1e987081f49 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Mar 2019 16:15:08 -0700 Subject: f2fs: set pin_file under CAP_SYS_ADMIN Android uses pin_file for uncrypt during OTA, and that should be managed by CAP_SYS_ADMIN only. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 807a97ad2430..012815d816e6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2873,8 +2873,8 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!inode_owner_or_capable(inode)) - return -EACCES; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; -- cgit v1.2.3-59-g8ed1b