From c5bca38d2edc8a8030a8f1b99115480046c5bd7d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jun 2022 15:32:41 -0700 Subject: f2fs: use the updated test_dummy_encryption helper functions Switch f2fs over to the functions that are replacing fscrypt_set_test_dummy_encryption(). Since f2fs hasn't been converted to the new mount API yet, this doesn't really provide a benefit for f2fs. But it allows fscrypt_set_test_dummy_encryption() to be removed. Also take the opportunity to eliminate an #ifdef. Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 37221e94e5ef..3112fe92f934 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -492,9 +493,19 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, bool is_remount) { struct f2fs_sb_info *sbi = F2FS_SB(sb); -#ifdef CONFIG_FS_ENCRYPTION + struct fs_parameter param = { + .type = fs_value_is_string, + .string = arg->from ? arg->from : "", + }; + struct fscrypt_dummy_policy *policy = + &F2FS_OPTION(sbi).dummy_enc_policy; int err; + if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { + f2fs_warn(sbi, "test_dummy_encryption option not supported"); + return -EINVAL; + } + if (!f2fs_sb_has_encrypt(sbi)) { f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; @@ -506,12 +517,12 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, * needed to allow it to be set or changed during remount. We do allow * it to be specified during remount, but only if there is no change. */ - if (is_remount && !F2FS_OPTION(sbi).dummy_enc_policy.policy) { + if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); return -EINVAL; } - err = fscrypt_set_test_dummy_encryption( - sb, arg->from, &F2FS_OPTION(sbi).dummy_enc_policy); + + err = fscrypt_parse_test_dummy_encryption(¶m, policy); if (err) { if (err == -EEXIST) f2fs_warn(sbi, @@ -524,12 +535,14 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, opt, err); return -EINVAL; } + err = fscrypt_add_test_dummy_key(sb, policy); + if (err) { + f2fs_warn(sbi, "Error adding test dummy encryption key [%d]", + err); + return err; + } f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; -#else - f2fs_warn(sbi, "test_dummy_encryption option not supported"); - return -EINVAL; -#endif } #ifdef CONFIG_F2FS_FS_COMPRESSION -- cgit v1.2.3-59-g8ed1b From 7859e97f62202e81129a0d667153b463fad6513e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 11 Jun 2022 10:55:43 -0700 Subject: f2fs: do not skip updating inode when retrying to flush node page Let's try to flush dirty inode again to improve subtle i_blocks mismatch. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cf6f7fc83c08..095a634436e3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1945,7 +1945,6 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; bool submitted = false; - bool may_dirty = true; /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[NODE]) && @@ -1998,11 +1997,8 @@ continue_unlock: } /* flush dirty inode */ - if (IS_INODE(page) && may_dirty) { - may_dirty = false; - if (flush_dirty_inode(page)) - goto lock_node; - } + if (IS_INODE(page) && flush_dirty_inode(page)) + goto lock_node; write_node: f2fs_wait_on_page_writeback(page, NODE, true, true); -- cgit v1.2.3-59-g8ed1b From a4a0e16dbf77582c4f58ab472229dd071b5c4260 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Tue, 31 May 2022 09:16:56 +0800 Subject: f2fs: optimize error handling in redirty_blocks Current error handling is at risk of page leaks. However, we dot't seek any failure scenarios, just use f2fs_bug_on. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd14cef1b08f..2d1114b0ceef 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3903,10 +3903,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) for (i = 0; i < page_len; i++, redirty_idx++) { page = find_lock_page(mapping, redirty_idx); - if (!page) { - ret = -ENOMEM; - break; - } + + /* It will never fail, when page has pinned above */ + f2fs_bug_on(F2FS_I_SB(inode), !page); + set_page_dirty(page); f2fs_put_page(page, 1); f2fs_put_page(page, 0); -- cgit v1.2.3-59-g8ed1b From 29be7ec3df7a3ed95584d39aa4014df0cd056991 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 19 Jun 2022 01:51:55 +0800 Subject: f2fs: initialize page_array_entry slab only if compression feature is on Otherwise, in image which doesn't support compression feature, page_array_entry will be initialized w/o use. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 24824cd96f36..fa237e5c7173 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1908,6 +1908,9 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; char slab_name[32]; + if (!f2fs_sb_has_compression(sbi)) + return 0; + sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev)); sbi->page_array_slab_size = sizeof(struct page *) << -- cgit v1.2.3-59-g8ed1b From 7a8fc586180d8c57db5cc1e2acb32bb9986f642b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 20 Jun 2022 10:38:42 -0700 Subject: f2fs: introduce memory mode Introduce memory mode to supports "normal" and "low" memory modes. "low" mode is to support low memory devices. Because of the nature of low memory devices, in this mode, f2fs will try to save memory sometimes by sacrificing performance. "normal" mode is the default mode and same as before. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 5 +++++ fs/f2fs/f2fs.h | 13 +++++++++++++ fs/f2fs/super.c | 24 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index ad8dc8c040a2..2965601e21bb 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -336,6 +336,11 @@ discard_unit=%s Control discard unit, the argument can be "block", "segment" default, it is helpful for large sized SMR or ZNS devices to reduce memory cost by getting rid of fs metadata supports small discard. +memory=%s Control memory mode. This supports "normal" and "low" modes. + "low" mode is introduced to support low memory devices. + Because of the nature of low memory devices, in this mode, f2fs + will try to save memory sometimes by sacrificing performance. + "normal" mode is the default mode and same as before. ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d9bbecd008d2..fea97093d927 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,6 +159,7 @@ struct f2fs_mount_info { int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ + int memory_mode; /* memory mode */ int discard_unit; /* * discard command's offset/size should * be aligned to this unit: block, @@ -1360,6 +1361,13 @@ enum { DISCARD_UNIT_SECTION, /* basic discard unit is section */ }; +enum { + MEMORY_MODE_NORMAL, /* memory mode for normal devices */ + MEMORY_MODE_LOW, /* memory mode for low memry devices */ +}; + + + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -4398,6 +4406,11 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; +} + static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3112fe92f934..cf9cf24f9b56 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -160,6 +160,7 @@ enum { Opt_gc_merge, Opt_nogc_merge, Opt_discard_unit, + Opt_memory_mode, Opt_err, }; @@ -236,6 +237,7 @@ static match_table_t f2fs_tokens = { {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, {Opt_discard_unit, "discard_unit=%s"}, + {Opt_memory_mode, "memory=%s"}, {Opt_err, NULL}, }; @@ -1235,6 +1237,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_memory_mode: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "normal")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_NORMAL; + } else if (!strcmp(name, "low")) { + F2FS_OPTION(sbi).memory_mode = + MEMORY_MODE_LOW; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -2006,6 +2024,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) seq_printf(seq, ",discard_unit=%s", "section"); + if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_NORMAL) + seq_printf(seq, ",memory=%s", "normal"); + else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW) + seq_printf(seq, ",memory=%s", "low"); + return 0; } @@ -2027,6 +2050,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; + F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; sbi->sb->s_flags &= ~SB_INLINECRYPT; -- cgit v1.2.3-59-g8ed1b From 14de5fc3ddf3dee89068a54a4d137fc17c2c971c Mon Sep 17 00:00:00 2001 From: duguowei Date: Mon, 20 Jun 2022 21:39:45 +0800 Subject: f2fs: remove redundant code for gc condition Remove the redundant code and use local variant as the argument directly. Make it more human-readable. Signed-off-by: duguowei [Jaegeuk Kim: make code neat] Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 3fe145e8e594..19b956c2d697 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -120,15 +120,13 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) return free_blks - ovp_blks; } -static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +static inline block_t limit_invalid_user_blocks(block_t user_block_count) { - return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; + return (long)(user_block_count * LIMIT_INVALID_BLOCK) / 100; } -static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +static inline block_t limit_free_user_blocks(block_t reclaimable_user_blocks) { - block_t reclaimable_user_blocks = sbi->user_block_count - - written_block_count(sbi); return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } @@ -163,15 +161,16 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { - block_t invalid_user_blocks = sbi->user_block_count - - written_block_count(sbi); + block_t user_block_count = sbi->user_block_count; + block_t invalid_user_blocks = user_block_count - + written_block_count(sbi); /* * Background GC is triggered with the following conditions. * 1. There are a number of invalid blocks. * 2. There is not enough free space. */ - if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && - free_user_blocks(sbi) < limit_free_user_blocks(sbi)) - return true; - return false; + return (invalid_user_blocks > + limit_invalid_user_blocks(user_block_count) && + free_user_blocks(sbi) < + limit_free_user_blocks(invalid_user_blocks)); } -- cgit v1.2.3-59-g8ed1b From b771aadc6e4c221a468fe4a2dfcfffec01a06722 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Jun 2022 10:57:24 -0700 Subject: f2fs: enforce single zone capacity In order to simplify the complicated per-zone capacity, let's support only one capacity for entire zoned device. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 19 ++++++------------- fs/f2fs/segment.h | 3 +++ fs/f2fs/super.c | 33 ++++++++++++--------------------- 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fea97093d927..fcb1460e0890 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1235,7 +1235,6 @@ struct f2fs_dev_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int nr_blkz; /* Total number of zones */ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */ - block_t *zone_capacity_blocks; /* Array of zone capacity in blks */ #endif }; @@ -1672,6 +1671,7 @@ struct f2fs_sb_info { unsigned int meta_ino_num; /* meta inode number*/ unsigned int log_blocks_per_seg; /* log2 blocks per segment */ unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 874c1b9c41a2..447b03579049 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4895,7 +4895,7 @@ static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno, static inline unsigned int f2fs_usable_zone_segs_in_sec( struct f2fs_sb_info *sbi, unsigned int segno) { - unsigned int dev_idx, zone_idx, unusable_segs_in_sec; + unsigned int dev_idx, zone_idx; dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno)); zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx); @@ -4904,18 +4904,12 @@ static inline unsigned int f2fs_usable_zone_segs_in_sec( if (is_conv_zone(sbi, zone_idx, dev_idx)) return sbi->segs_per_sec; - /* - * If the zone_capacity_blocks array is NULL, then zone capacity - * is equal to the zone size for all zones - */ - if (!FDEV(dev_idx).zone_capacity_blocks) + if (!sbi->unusable_blocks_per_sec) return sbi->segs_per_sec; /* Get the segment count beyond zone capacity block */ - unusable_segs_in_sec = (sbi->blocks_per_blkz - - FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >> - sbi->log_blocks_per_seg; - return sbi->segs_per_sec - unusable_segs_in_sec; + return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >> + sbi->log_blocks_per_seg); } /* @@ -4944,12 +4938,11 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( if (is_conv_zone(sbi, zone_idx, dev_idx)) return sbi->blocks_per_seg; - if (!FDEV(dev_idx).zone_capacity_blocks) + if (!sbi->unusable_blocks_per_sec) return sbi->blocks_per_seg; sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); - sec_cap_blkaddr = sec_start_blkaddr + - FDEV(dev_idx).zone_capacity_blocks[zone_idx]; + sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); /* * If segment starts before zone capacity and spans beyond diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 3f277dfcb131..813a892cd979 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -101,6 +101,9 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#define CAP_BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ + (sbi)->unusable_blocks_per_sec) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec) #define GET_SEG_FROM_SEC(sbi, secno) \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cf9cf24f9b56..faf9a767d05a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1522,7 +1522,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); - kfree(FDEV(i).zone_capacity_blocks); #endif } kvfree(sbi->devs); @@ -3673,24 +3672,29 @@ err_valid_block: #ifdef CONFIG_BLK_DEV_ZONED struct f2fs_report_zones_args { + struct f2fs_sb_info *sbi; struct f2fs_dev_info *dev; - bool zone_cap_mismatch; }; static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct f2fs_report_zones_args *rz_args = data; + block_t unusable_blocks = (zone->len - zone->capacity) >> + F2FS_LOG_SECTORS_PER_BLOCK; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return 0; set_bit(idx, rz_args->dev->blkz_seq); - rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >> - F2FS_LOG_SECTORS_PER_BLOCK; - if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch) - rz_args->zone_cap_mismatch = true; - + if (!rz_args->sbi->unusable_blocks_per_sec) { + rz_args->sbi->unusable_blocks_per_sec = unusable_blocks; + return 0; + } + if (rz_args->sbi->unusable_blocks_per_sec != unusable_blocks) { + f2fs_err(rz_args->sbi, "F2FS supports single zone capacity\n"); + return -EINVAL; + } return 0; } @@ -3731,26 +3735,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (!FDEV(devi).blkz_seq) return -ENOMEM; - /* Get block zones type and zone-capacity */ - FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi, - FDEV(devi).nr_blkz * sizeof(block_t), - GFP_KERNEL); - if (!FDEV(devi).zone_capacity_blocks) - return -ENOMEM; - + rep_zone_arg.sbi = sbi; rep_zone_arg.dev = &FDEV(devi); - rep_zone_arg.zone_cap_mismatch = false; ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb, &rep_zone_arg); if (ret < 0) return ret; - - if (!rep_zone_arg.zone_cap_mismatch) { - kfree(FDEV(devi).zone_capacity_blocks); - FDEV(devi).zone_capacity_blocks = NULL; - } - return 0; } #endif -- cgit v1.2.3-59-g8ed1b From 074b5ea2900ea8e40f8e7a3fd37e0a55ad3d5874 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Jun 2022 11:03:57 -0700 Subject: f2fs: adjust zone capacity when considering valid block count This patch fixes counting unusable blocks set by zone capacity when checking the valid block count in a section. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 2 +- fs/f2fs/file.c | 6 +++--- fs/f2fs/gc.c | 4 ++-- fs/f2fs/segment.c | 7 +++---- fs/f2fs/segment.h | 8 ++++---- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c92625ef16d0..c01471573977 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,7 +39,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = BLKS_PER_SEC(sbi); + blks_per_sec = CAP_BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, true); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2d1114b0ceef..0f29af7876a6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1681,7 +1681,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, return 0; if (f2fs_is_pinned_file(inode)) { - block_t sec_blks = BLKS_PER_SEC(sbi); + block_t sec_blks = CAP_BLKS_PER_SEC(sbi); block_t sec_len = roundup(map.m_len, sec_blks); map.m_len = sec_blks; @@ -2432,7 +2432,7 @@ do_more: ret = -EAGAIN; goto out; } - range->start += BLKS_PER_SEC(sbi); + range->start += CAP_BLKS_PER_SEC(sbi); if (range->start <= end) goto do_more; out: @@ -2557,7 +2557,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; } - sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); + sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d5fb426e0747..c38bdaf831af 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -487,7 +487,7 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi, unsigned long long age, u, accu; unsigned long long max_mtime = sit_i->dirty_max_mtime; unsigned long long min_mtime = sit_i->dirty_min_mtime; - unsigned int sec_blocks = BLKS_PER_SEC(sbi); + unsigned int sec_blocks = CAP_BLKS_PER_SEC(sbi); unsigned int vblocks; unsigned int dirty_threshold = max(am->max_candidate_count, am->candidate_ratio * @@ -1487,7 +1487,7 @@ next_step: */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || (!force_migrate && get_valid_blocks(sbi, segno, true) == - BLKS_PER_SEC(sbi))) + CAP_BLKS_PER_SEC(sbi))) return submitted; if (check_valid_map(sbi, segno, off) == 0) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 447b03579049..ce571c0d7126 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -728,7 +728,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, get_valid_blocks(sbi, segno, true); f2fs_bug_on(sbi, unlikely(!valid_blocks || - valid_blocks == BLKS_PER_SEC(sbi))); + valid_blocks == CAP_BLKS_PER_SEC(sbi))); if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); @@ -764,7 +764,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); if (!valid_blocks || - valid_blocks == BLKS_PER_SEC(sbi)) { + valid_blocks == CAP_BLKS_PER_SEC(sbi)) { clear_bit(secno, dirty_i->dirty_secmap); return; } @@ -4483,7 +4483,6 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno = 0, offset = 0, secno; block_t valid_blocks, usable_blks_in_seg; - block_t blks_per_sec = BLKS_PER_SEC(sbi); while (1) { /* find dirty segment based on free segmap */ @@ -4512,7 +4511,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); - if (!valid_blocks || valid_blocks == blks_per_sec) + if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; if (IS_CURSEC(sbi, secno)) continue; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 813a892cd979..d1d63766f2c7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -612,10 +612,10 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); - unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi); - unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi); - unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi); - unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi); + unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); + unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int free, need_lower, need_upper; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) -- cgit v1.2.3-59-g8ed1b From 8e0f54a70e9b0c88d6375933d281d631b7248153 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Jun 2022 15:49:47 -0700 Subject: f2fs: add a sysfs entry to show zone capacity This patch adds a sysfs entry showing the unusable space in a section made by zone capacity. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/sysfs.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 9b583dd0298b..22c1efd49773 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -580,3 +580,9 @@ Date: January 2022 Contact: "Jaegeuk Kim" Description: Controls max # of node block writes to be used for roll forward recovery. This can limit the roll forward recovery time. + +What: /sys/fs/f2fs//unusable_blocks_per_sec +Date: June 2022 +Contact: "Jaegeuk Kim" +Description: Shows the number of unusable blocks in a section which was defined by + the zone capacity reported by underlying zoned device. diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4c50aedd5144..6eeefe60a7af 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -713,6 +713,11 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .offset = _offset \ } +#define F2FS_RO_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0444, \ + f2fs_sbi_show, NULL, \ + offsetof(struct struct_name, elname)) + #define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ F2FS_ATTR_OFFSET(struct_type, name, 0644, \ f2fs_sbi_show, f2fs_sbi_store, \ @@ -811,6 +816,8 @@ F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED F2FS_FEATURE_RO_ATTR(block_zoned); +F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, unusable_blocks_per_sec, + unusable_blocks_per_sec); #endif F2FS_FEATURE_RO_ATTR(atomic_write); F2FS_FEATURE_RO_ATTR(extra_attr); @@ -919,6 +926,9 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), #endif +#ifdef CONFIG_BLK_DEV_ZONED + ATTR_LIST(unusable_blocks_per_sec), +#endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), ATTR_LIST(compr_saved_block), -- cgit v1.2.3-59-g8ed1b From 67ca06872eb02944b4c6f92cffa9242e92c63109 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Jul 2022 14:30:15 +0800 Subject: f2fs: fix to invalidate META_MAPPING before DIO write Quoted from commit e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") " Encrypted pages during GC are read and cached in META_MAPPING. However, due to cached pages in META_MAPPING, there is an issue where newly written pages are lost by IPU or DIO writes. Thread A - f2fs_gc() Thread B /* phase 3 */ down_write(i_gc_rwsem) ra_data_block() ---- (a) up_write(i_gc_rwsem) f2fs_direct_IO() : - down_read(i_gc_rwsem) - __blockdev_direct_io() - get_data_block_dio_write() - f2fs_dio_submit_bio() ---- (b) - up_read(i_gc_rwsem) /* phase 4 */ down_write(i_gc_rwsem) move_data_block() ---- (c) up_write(i_gc_rwsem) (a) In phase 3 of f2fs_gc(), up-to-date page is read from storage and cached in META_MAPPING. (b) In thread B, writing new data by IPU or DIO write on same blkaddr as read in (a). cached page in META_MAPPING become out-dated. (c) In phase 4 of f2fs_gc(), out-dated page in META_MAPPING is copied to new blkaddr. In conclusion, the newly written data in (b) is lost. To address this issue, invalidating pages in META_MAPPING before IPU or DIO write. " In previous commit, we missed to cover extent cache hit case, and passed wrong value for parameter @end of invalidate_mapping_pages(), fix both issues. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") Cc: Hyeong-Jun Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7fcbcf979737..f2a272613477 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1463,9 +1463,12 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, *map->m_next_extent = pgofs + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) + if (flag == F2FS_GET_BLOCK_DIO) { f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + invalidate_mapping_pages(META_MAPPING(sbi), + map->m_pblk, map->m_pblk + map->m_len - 1); + } if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; @@ -1682,7 +1685,7 @@ sync_out: f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk); + map->m_pblk, map->m_pblk + map->m_len - 1); if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; -- cgit v1.2.3-59-g8ed1b From 1dd55358efc4f74e49fa9e1e97a03804852a4c20 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sun, 17 Jul 2022 10:36:13 +0200 Subject: f2fs: Delete f2fs_copy_page() and replace with memcpy_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit f2fs_copy_page() is a wrapper around two kmap() + one memcpy() from/to the mapped pages. It unnecessarily duplicates a kernel API and it makes use of kmap(), which is being deprecated in favor of kmap_local_page(). Two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap’s pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. With kmap_local_page() the mappings are per thread, CPU local, can take page faults, and can be called from any context (including interrupts). It is faster than kmap() in kernels with HIGHMEM enabled. Therefore, its use in __clone_blkaddrs() is safe and should be preferred. Delete f2fs_copy_page() and use a plain memcpy_page() in the only one site calling the removed function. memcpy_page() avoids open coding two kmap_local_page() + one memcpy() between the two kernel virtual addresses. Suggested-by: Christoph Hellwig Suggested-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Reviewed-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 ---------- fs/f2fs/file.c | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fcb1460e0890..6d9f7d577c8a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2704,16 +2704,6 @@ static inline struct page *f2fs_pagecache_get_page( return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_copy_page(struct page *src, struct page *dst) -{ - char *src_kaddr = kmap(src); - char *dst_kaddr = kmap(dst); - - memcpy(dst_kaddr, src_kaddr, PAGE_SIZE); - kunmap(dst); - kunmap(src); -} - static inline void f2fs_put_page(struct page *page, int unlock) { if (!page) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0f29af7876a6..eb43852752a6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1278,7 +1278,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } - f2fs_copy_page(psrc, pdst); + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); -- cgit v1.2.3-59-g8ed1b From 7165841d578e0592848e09dc9d131aa30be44e1b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Jul 2022 11:17:15 +0800 Subject: f2fs: fix to check inline_data during compressed inode conversion When converting inode to compressed one via ioctl, it needs to check inline_data, since inline_data flag and compressed flag are incompatible. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6d9f7d577c8a..116d491b346d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4404,7 +4404,7 @@ static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || - f2fs_is_atomic_file(inode)) + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode)) return false; return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } -- cgit v1.2.3-59-g8ed1b From a8634ccf5d626506100bb753327e36e6c6b3df70 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Thu, 7 Jul 2022 17:09:24 +0800 Subject: f2fs: allow compression of files without blocks Files created by truncate(1) have a size but no blocks, so they can be allowed to enable compression. Signed-off-by: Chao Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eb43852752a6..6b1b030830ca 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1879,7 +1879,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (iflags & F2FS_COMPR_FL) { if (!f2fs_may_compress(inode)) return -EINVAL; - if (S_ISREG(inode->i_mode) && inode->i_size) + if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) return -EINVAL; set_compress_context(inode); -- cgit v1.2.3-59-g8ed1b From 0d5b9d8156396bbe1c982708b38ab9e188c45ec9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Jul 2022 23:26:43 +0800 Subject: f2fs: invalidate meta pages only for post_read required inode After commit e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write"), invalidate_mapping_pages() will be called to avoid race condition in between IPU/DIO and readahead for GC. However, readahead flow is only used for post_read required inode, so this patch adds check condition to avoids unnecessary page cache invalidating for non-post_read inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 9 ++++++++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f2a272613477..de140afc42c1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1463,12 +1463,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, *map->m_next_extent = pgofs + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) { + if (flag == F2FS_GET_BLOCK_DIO) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk + map->m_len - 1); - } if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; @@ -1684,8 +1681,6 @@ sync_out: */ f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk + map->m_len - 1); if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; @@ -2733,6 +2728,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .submitted = false, .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, + .post_read = f2fs_post_read_required(inode), .io_type = io_type, .io_wbc = wbc, .bio = bio, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 116d491b346d..d0f428aef34b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1199,6 +1199,7 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ int compr_blocks; /* # of compressed block addresses */ bool encrypted; /* indicate file is encrypted */ + bool post_read; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ce571c0d7126..12b0edc48f7b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3433,7 +3433,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - invalidate_mapping_pages(META_MAPPING(sbi), + if (fio->post_read) + invalidate_mapping_pages(META_MAPPING(sbi), fio->new_blkaddr, fio->new_blkaddr); stat_inc_inplace_blocks(fio->sbi); @@ -3616,10 +3617,16 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; + if (!f2fs_post_read_required(inode)) + return; + for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); + + invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) -- cgit v1.2.3-59-g8ed1b From 1adaa71ea9bff3a0ca0b37eabbb7f89225b595cb Mon Sep 17 00:00:00 2001 From: qixiaoyu1 Date: Mon, 18 Jul 2022 11:28:40 +0800 Subject: f2fs: don't bother wait_ms by foreground gc f2fs_gc returns -EINVAL via f2fs_balance_fs when there is enough free secs after write checkpoint, but with gc_merge enabled, it will cause the sleep time of gc thread to be set to no_gc_sleep_time even if there are many dirty segments can be selected. Signed-off-by: qixiaoyu1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c38bdaf831af..6da21d405ce1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -150,8 +150,11 @@ do_gc: gc_control.nr_free_secs = foreground ? 1 : 0; /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, &gc_control)) - wait_ms = gc_th->no_gc_sleep_time; + if (f2fs_gc(sbi, &gc_control)) { + /* don't bother wait_ms by foreground gc */ + if (!foreground) + wait_ms = gc_th->no_gc_sleep_time; + } if (foreground) wake_up_all(&gc_th->fggc_wq); -- cgit v1.2.3-59-g8ed1b From f8e2f32bcde5945e8f8dbb8714178c24d221366b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 18 Jul 2022 16:02:48 -0700 Subject: f2fs: introduce sysfs atomic write statistics introduce the below 4 new sysfs node for atomic write statistics. - current_atomic_write: the total current atomic write block count, which is not committed yet. - peak_atomic_write: the peak value of total current atomic write block count after boot. - committed_atomic_block: the accumulated total committed atomic write block count after boot. - revoked_atomic_block: the accumulated total revoked atomic write block count after boot. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 24 +++++++++++++++++ fs/f2fs/data.c | 7 ++--- fs/f2fs/f2fs.h | 30 +++++++++++++++++++++ fs/f2fs/file.c | 1 + fs/f2fs/segment.c | 6 +++++ fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 46 +++++++++++++++++++++++++++++++++ 7 files changed, 112 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 22c1efd49773..083ac2d63eef 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -586,3 +586,27 @@ Date: June 2022 Contact: "Jaegeuk Kim" Description: Shows the number of unusable blocks in a section which was defined by the zone capacity reported by underlying zoned device. + +What: /sys/fs/f2fs//current_atomic_write +Date: July 2022 +Contact: "Daeho Jeong" +Description: Show the total current atomic write block count, which is not committed yet. + This is a read-only entry. + +What: /sys/fs/f2fs//peak_atomic_write +Date: July 2022 +Contact: "Daeho Jeong" +Description: Show the peak value of total current atomic write block count after boot. + If you write "0" here, you can initialize to "0". + +What: /sys/fs/f2fs//committed_atomic_block +Date: July 2022 +Contact: "Daeho Jeong" +Description: Show the accumulated total committed atomic write block count after boot. + If you write "0" here, you can initialize to "0". + +What: /sys/fs/f2fs//revoked_atomic_block +Date: July 2022 +Contact: "Daeho Jeong" +Description: Show the accumulated total revoked atomic write block count after boot. + If you write "0" here, you can initialize to "0". diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index de140afc42c1..b3dbe77f902b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3406,12 +3406,11 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, struct inode *cow_inode = F2FS_I(inode)->cow_inode; pgoff_t index = page->index; int err = 0; - block_t ori_blk_addr; + block_t ori_blk_addr = NULL_ADDR; /* If pos is beyond the end of file, reserve a new block in COW inode */ if ((pos & PAGE_MASK) >= i_size_read(inode)) - return __reserve_data_block(cow_inode, index, blk_addr, - node_changed); + goto reserve_block; /* Look for the block in COW inode first */ err = __find_data_block(cow_inode, index, blk_addr); @@ -3425,10 +3424,12 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, if (err) return err; +reserve_block: /* Finally, we should reserve a new block in COW inode for the update */ err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); if (err) return err; + inc_atomic_write_cnt(inode); if (ori_blk_addr != NULL_ADDR) *blk_addr = ori_blk_addr; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d0f428aef34b..a19f4dfc85a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -813,6 +813,8 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned short i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + + unsigned int atomic_write_cnt; }; static inline void get_extent_info(struct extent_info *ext, @@ -1813,6 +1815,12 @@ struct f2fs_sb_info { int max_fragment_chunk; /* max chunk size for block fragmentation mode */ int max_fragment_hole; /* max hole size for block fragmentation mode */ + /* For atomic write statistics */ + atomic64_t current_atomic_write; + s64 peak_atomic_write; + u64 committed_atomic_block; + u64 revoked_atomic_block; + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -2427,6 +2435,28 @@ static inline void inode_dec_dirty_pages(struct inode *inode) dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } +static inline void inc_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + u64 current_write; + + fi->atomic_write_cnt++; + atomic64_inc(&sbi->current_atomic_write); + current_write = atomic64_read(&sbi->current_atomic_write); + if (current_write > sbi->peak_atomic_write) + sbi->peak_atomic_write = current_write; +} + +static inline void release_atomic_write_cnt(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + atomic64_sub(fi->atomic_write_cnt, &sbi->current_atomic_write); + fi->atomic_write_cnt = 0; +} + static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) { return atomic_read(&sbi->nr_pages[count_type]); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6b1b030830ca..9604b347fb29 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2075,6 +2075,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; stat_update_max_atomic_write(inode); + fi->atomic_write_cnt = 0; out: inode_unlock(inode); mnt_drop_write_file(filp); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 12b0edc48f7b..51da73b27a45 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -196,6 +196,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_FILE); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); @@ -335,6 +336,11 @@ next: } out: + if (ret) + sbi->revoked_atomic_block += fi->atomic_write_cnt; + else + sbi->committed_atomic_block += fi->atomic_write_cnt; + __complete_revoke_list(inode, &revoke_list, ret ? true : false); return ret; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index faf9a767d05a..1c6203798fb4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3615,6 +3615,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; spin_lock_init(&sbi->gc_urgent_high_lock); + atomic64_set(&sbi->current_atomic_write, 0); sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 6eeefe60a7af..eba5fb1629d7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -339,6 +339,21 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, sbi->gc_reclaimed_segs[sbi->gc_segment_mode]); } + if (!strcmp(a->attr.name, "current_atomic_write")) { + s64 current_write = atomic64_read(&sbi->current_atomic_write); + + return sysfs_emit(buf, "%lld\n", current_write); + } + + if (!strcmp(a->attr.name, "peak_atomic_write")) + return sysfs_emit(buf, "%lld\n", sbi->peak_atomic_write); + + if (!strcmp(a->attr.name, "committed_atomic_block")) + return sysfs_emit(buf, "%llu\n", sbi->committed_atomic_block); + + if (!strcmp(a->attr.name, "revoked_atomic_block")) + return sysfs_emit(buf, "%llu\n", sbi->revoked_atomic_block); + ui = (unsigned int *)(ptr + a->offset); return sprintf(buf, "%u\n", *ui); @@ -608,6 +623,27 @@ out: return count; } + if (!strcmp(a->attr.name, "peak_atomic_write")) { + if (t != 0) + return -EINVAL; + sbi->peak_atomic_write = 0; + return count; + } + + if (!strcmp(a->attr.name, "committed_atomic_block")) { + if (t != 0) + return -EINVAL; + sbi->committed_atomic_block = 0; + return count; + } + + if (!strcmp(a->attr.name, "revoked_atomic_block")) { + if (t != 0) + return -EINVAL; + sbi->revoked_atomic_block = 0; + return count; + } + *ui = (unsigned int)t; return count; @@ -855,6 +891,12 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole); +/* For atomic write */ +F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, current_atomic_write, current_atomic_write); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); + #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), @@ -944,6 +986,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_reclaimed_segments), ATTR_LIST(max_fragment_chunk), ATTR_LIST(max_fragment_hole), + ATTR_LIST(current_atomic_write), + ATTR_LIST(peak_atomic_write), + ATTR_LIST(committed_atomic_block), + ATTR_LIST(revoked_atomic_block), NULL, }; ATTRIBUTE_GROUPS(f2fs); -- cgit v1.2.3-59-g8ed1b From 8ee236dcaa690d09ca612622e8bc8d09c302021d Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Mon, 25 Jul 2022 18:16:33 +0800 Subject: f2fs: fix to remove F2FS_COMPR_FL and tag F2FS_NOCOMP_FL at the same time If the inode has the compress flag, it will fail to use 'chattr -c +m' to remove its compress flag and tag no compress flag. However, the same command will be successful when executed again, as shown below: $ touch foo.txt $ chattr +c foo.txt $ chattr -c +m foo.txt chattr: Invalid argument while setting flags on foo.txt $ chattr -c +m foo.txt $ f2fs_io getflags foo.txt get a flag on foo.txt ret=0, flags=nocompression,inline_data Fix this by removing some checks in f2fs_setflags_common() that do not affect the original logic. I go through all the possible scenarios, and the results are as follows. Bold is the only thing that has changed. +---------------+-----------+-----------+----------+ | | file flags | + command +-----------+-----------+----------+ | | no flag | compr | nocompr | +---------------+-----------+-----------+----------+ | chattr +c | compr | compr | -EINVAL | | chattr -c | no flag | no flag | nocompr | | chattr +m | nocompr | -EINVAL | nocompr | | chattr -m | no flag | compr | no flag | | chattr +c +m | -EINVAL | -EINVAL | -EINVAL | | chattr +c -m | compr | compr | compr | | chattr -c +m | nocompr | *nocompr* | nocompr | | chattr -c -m | no flag | no flag | no flag | +---------------+-----------+-----------+----------+ Link: https://lore.kernel.org/linux-f2fs-devel/20220621064833.1079383-1-chaoliu719@gmail.com/ Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Chao Yu Signed-off-by: Chao Liu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9604b347fb29..8098ed890e94 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1873,10 +1873,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (masked_flags & F2FS_COMPR_FL) { if (!f2fs_disable_compressed_file(inode)) return -EINVAL; - } - if (iflags & F2FS_NOCOMP_FL) - return -EINVAL; - if (iflags & F2FS_COMPR_FL) { + } else { if (!f2fs_may_compress(inode)) return -EINVAL; if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) @@ -1885,10 +1882,6 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { - if (masked_flags & F2FS_COMPR_FL) - return -EINVAL; - } fi->i_flags = iflags | (fi->i_flags & ~mask); f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && -- cgit v1.2.3-59-g8ed1b From 141170b759e03958f296033bb7001be62d1d363b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jul 2022 00:03:23 +0800 Subject: f2fs: fix to avoid use f2fs_bug_on() in f2fs_new_node_page() As Dipanjan Das reported, syzkaller found a f2fs bug as below: RIP: 0010:f2fs_new_node_page+0x19ac/0x1fc0 fs/f2fs/node.c:1295 Call Trace: write_all_xattrs fs/f2fs/xattr.c:487 [inline] __f2fs_setxattr+0xe76/0x2e10 fs/f2fs/xattr.c:743 f2fs_setxattr+0x233/0xab0 fs/f2fs/xattr.c:790 f2fs_xattr_generic_set+0x133/0x170 fs/f2fs/xattr.c:86 __vfs_setxattr+0x115/0x180 fs/xattr.c:182 __vfs_setxattr_noperm+0x125/0x5f0 fs/xattr.c:216 __vfs_setxattr_locked+0x1cf/0x260 fs/xattr.c:277 vfs_setxattr+0x13f/0x330 fs/xattr.c:303 setxattr+0x146/0x160 fs/xattr.c:611 path_setxattr+0x1a7/0x1d0 fs/xattr.c:630 __do_sys_lsetxattr fs/xattr.c:653 [inline] __se_sys_lsetxattr fs/xattr.c:649 [inline] __x64_sys_lsetxattr+0xbd/0x150 fs/xattr.c:649 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 NAT entry and nat bitmap can be inconsistent, e.g. one nid is free in nat bitmap, and blkaddr in its NAT entry is not NULL_ADDR, it may trigger BUG_ON() in f2fs_new_node_page(), fix it. Reported-by: Dipanjan Das Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 095a634436e3..d1928952d371 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1292,7 +1292,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) dec_valid_node_count(sbi, dn->inode, !ofs); goto fail; } - f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); + if (unlikely(new_ni.blk_addr != NULL_ADDR)) { + err = -EFSCORRUPTED; + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto fail; + } #endif new_ni.nid = dn->nid; new_ni.ino = dn->inode->i_ino; -- cgit v1.2.3-59-g8ed1b From 7b01ad7f339e0a272ba840d0bafd5dfbb4e4d501 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jul 2022 00:04:17 +0800 Subject: f2fs: obsolete unused MAX_DISCARD_BLOCKS After commit a7eeb823854c ("f2fs: use bitmap in discard_entry"), MAX_DISCARD_BLOCKS became obsolete, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a19f4dfc85a5..ecd870e5d6da 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -230,7 +230,6 @@ enum { #define CP_PAUSE 0x00000040 #define CP_RESIZE 0x00000080 -#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ -- cgit v1.2.3-59-g8ed1b From 09beadf289d6e300553e60d6e76f13c0427ecab3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 Jul 2022 22:51:05 +0800 Subject: f2fs: fix to do sanity check on segment type in build_sit_entries() As Wenqing Liu reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216285 RIP: 0010:memcpy_erms+0x6/0x10 f2fs_update_meta_page+0x84/0x570 [f2fs] change_curseg.constprop.0+0x159/0xbd0 [f2fs] f2fs_do_replace_block+0x5c7/0x18a0 [f2fs] f2fs_replace_block+0xeb/0x180 [f2fs] recover_data+0x1abd/0x6f50 [f2fs] f2fs_recover_fsync_data+0x12ce/0x3250 [f2fs] f2fs_fill_super+0x4459/0x6190 [f2fs] mount_bdev+0x2cf/0x3b0 legacy_get_tree+0xed/0x1d0 vfs_get_tree+0x81/0x2b0 path_mount+0x47e/0x19d0 do_mount+0xce/0xf0 __x64_sys_mount+0x12c/0x1a0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd The root cause is segment type is invalid, so in f2fs_do_replace_block(), f2fs accesses f2fs_sm_info::curseg_array with out-of-range segment type, result in accessing invalid curseg->sum_blk during memcpy in f2fs_update_meta_page(). Fix this by adding sanity check on segment type in build_sit_entries(). Reported-by: Wenqing Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51da73b27a45..7b1d2c29aba6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4375,6 +4375,12 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) return err; seg_info_from_raw_sit(se, &sit); + if (se->type >= NR_PERSISTENT_LOG) { + f2fs_err(sbi, "Invalid segment type: %u, segno: %u", + se->type, start); + return -EFSCORRUPTED; + } + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (f2fs_block_unit_discard(sbi)) { @@ -4423,6 +4429,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) break; seg_info_from_raw_sit(se, &sit); + if (se->type >= NR_PERSISTENT_LOG) { + f2fs_err(sbi, "Invalid segment type: %u, segno: %u", + se->type, start); + err = -EFSCORRUPTED; + break; + } + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (f2fs_block_unit_discard(sbi)) { -- cgit v1.2.3-59-g8ed1b From 23339e5752d01a4b5e122759b002cf896d26f6c1 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 1 Aug 2022 10:08:08 -0700 Subject: f2fs: revive F2FS_IOC_ABORT_VOLATILE_WRITE F2FS_IOC_ABORT_VOLATILE_WRITE was used to abort a atomic write before. However it was removed accidentally. So revive it by changing the name, since volatile write had gone. Signed-off-by: Daeho Jeong Fiexes: 7bc155fec5b3("f2fs: kill volatile write support") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 30 ++++++++++++++++++++++++++++-- include/uapi/linux/f2fs.h | 2 +- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8098ed890e94..29711e5b6983 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2109,6 +2109,31 @@ unlock_out: return ret; } +static int f2fs_ioc_abort_atomic_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); + int ret; + + if (!inode_owner_or_capable(mnt_userns, inode)) + return -EACCES; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + if (f2fs_is_atomic_file(inode)) + f2fs_abort_atomic_write(inode, true); + + inode_unlock(inode); + + mnt_drop_write_file(filp); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return ret; +} + static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -4054,9 +4079,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_start_atomic_write(filp); case F2FS_IOC_COMMIT_ATOMIC_WRITE: return f2fs_ioc_commit_atomic_write(filp); + case F2FS_IOC_ABORT_ATOMIC_WRITE: + return f2fs_ioc_abort_atomic_write(filp); case F2FS_IOC_START_VOLATILE_WRITE: case F2FS_IOC_RELEASE_VOLATILE_WRITE: - case F2FS_IOC_ABORT_VOLATILE_WRITE: return -EOPNOTSUPP; case F2FS_IOC_SHUTDOWN: return f2fs_ioc_shutdown(filp, arg); @@ -4725,7 +4751,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_COMMIT_ATOMIC_WRITE: case F2FS_IOC_START_VOLATILE_WRITE: case F2FS_IOC_RELEASE_VOLATILE_WRITE: - case F2FS_IOC_ABORT_VOLATILE_WRITE: + case F2FS_IOC_ABORT_ATOMIC_WRITE: case F2FS_IOC_SHUTDOWN: case FITRIM: case FS_IOC_SET_ENCRYPTION_POLICY: diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 352a822d4370..3121d127d5aa 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -13,7 +13,7 @@ #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) -#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_ABORT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) #define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ -- cgit v1.2.3-59-g8ed1b From 4a2c5b7994960fac29cf8a3f4e62855bae1b27d4 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 1 Aug 2022 19:26:04 +0800 Subject: f2fs: fix null-ptr-deref in f2fs_get_dnode_of_data There is issue as follows when test f2fs atomic write: F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock F2FS-fs (loop0): invalid crc_offset: 0 F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=1, run fsck to fix. F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix. ================================================================== BUG: KASAN: null-ptr-deref in f2fs_get_dnode_of_data+0xac/0x16d0 Read of size 8 at addr 0000000000000028 by task rep/1990 CPU: 4 PID: 1990 Comm: rep Not tainted 5.19.0-rc6-next-20220715 #266 Call Trace: dump_stack_lvl+0x6e/0x91 print_report.cold+0x49a/0x6bb kasan_report+0xa8/0x130 f2fs_get_dnode_of_data+0xac/0x16d0 f2fs_do_write_data_page+0x2a5/0x1030 move_data_page+0x3c5/0xdf0 do_garbage_collect+0x2015/0x36c0 f2fs_gc+0x554/0x1d30 f2fs_balance_fs+0x7f5/0xda0 f2fs_write_single_data_page+0xb66/0xdc0 f2fs_write_cache_pages+0x716/0x1420 f2fs_write_data_pages+0x84f/0x9a0 do_writepages+0x130/0x3a0 filemap_fdatawrite_wbc+0x87/0xa0 file_write_and_wait_range+0x157/0x1c0 f2fs_do_sync_file+0x206/0x12d0 f2fs_sync_file+0x99/0xc0 vfs_fsync_range+0x75/0x140 f2fs_file_write_iter+0xd7b/0x1850 vfs_write+0x645/0x780 ksys_write+0xf1/0x1e0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd As 3db1de0e582c commit changed atomic write way which new a cow_inode for atomic write file, and also mark cow_inode as FI_ATOMIC_FILE. When f2fs_do_write_data_page write cow_inode will use cow_inode's cow_inode which is NULL. Then will trigger null-ptr-deref. To solve above issue, introduce FI_COW_FILE flag for COW inode. Fiexes: 3db1de0e582c("f2fs: change the current atomic write way") Signed-off-by: Ye Bin Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ecd870e5d6da..b2c7dae4740a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -757,6 +757,7 @@ enum { FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ FI_COMPRESS_RELEASED, /* compressed blocks were released */ FI_ALIGNED_WRITE, /* enable aligned write */ + FI_COW_FILE, /* indicate COW file */ FI_MAX, /* max flag, never be used */ }; @@ -3236,6 +3237,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode) return is_inode_flag_set(inode, FI_ATOMIC_FILE); } +static inline bool f2fs_is_cow_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_COW_FILE); +} + static inline bool f2fs_is_first_block_written(struct inode *inode) { return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 29711e5b6983..d2159d6880d6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2061,7 +2061,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); set_inode_flag(inode, FI_ATOMIC_FILE); - set_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); + set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7b1d2c29aba6..f14c5a807312 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -193,7 +193,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (f2fs_is_atomic_file(inode)) { if (clean) truncate_inode_pages_final(inode->i_mapping); - clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE); + clear_inode_flag(fi->cow_inode, FI_COW_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; release_atomic_write_cnt(inode); @@ -3172,7 +3172,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || - f2fs_is_atomic_file(inode)) + f2fs_is_cow_file(inode)) return CURSEG_HOT_DATA; return f2fs_rw_hint_to_seg_type(inode->i_write_hint); } else { -- cgit v1.2.3-59-g8ed1b From dbf8e63f48af48f3f0a069fc971c9826312dbfc1 Mon Sep 17 00:00:00 2001 From: Eunhee Rho Date: Mon, 1 Aug 2022 13:40:02 +0900 Subject: f2fs: remove device type check for direct IO To ensure serialized IOs, f2fs allows only LFS mode for zoned device. Remove redundant check for direct IO. Signed-off-by: Eunhee Rho Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2c7dae4740a..4dc7da8347ce 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4502,12 +4502,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) return true; - /* - * for blkzoned device, fallback direct IO to buffered IO, so - * all IOs can be serialized by log-structured write. - */ - if (f2fs_sb_has_blkzoned(sbi)) - return true; + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; -- cgit v1.2.3-59-g8ed1b From 912f0d6580e743eecf908f5566cc064b63c55908 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Aug 2022 20:33:54 -0700 Subject: f2fs: do not set compression bit if kernel doesn't support If kernel doesn't have CONFIG_F2FS_FS_COMPRESSION, a file having FS_COMPR_FL via ioctl(FS_IOC_SETFLAGS) is unaccessible due to f2fs_is_compress_backend_ready(). Let's avoid it. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dc7da8347ce..d187bdc39b79 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4292,8 +4292,9 @@ static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, unsigned int c_len) { } #endif -static inline void set_compress_context(struct inode *inode) +static inline int set_compress_context(struct inode *inode) { +#ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); F2FS_I(inode)->i_compress_algorithm = @@ -4316,6 +4317,10 @@ static inline void set_compress_context(struct inode *inode) stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); f2fs_mark_inode_dirty_sync(inode, true); + return 0; +#else + return -EOPNOTSUPP; +#endif } static inline bool f2fs_disable_compressed_file(struct inode *inode) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d2159d6880d6..67e5a6cd25e8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1878,8 +1878,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) return -EINVAL; - - set_compress_context(inode); + if (set_compress_context(inode)) + return -EOPNOTSUPP; } } -- cgit v1.2.3-59-g8ed1b From 90be48bd9d29ece3965e5e8b21499b6db166e57b Mon Sep 17 00:00:00 2001 From: Jaewook Kim Date: Wed, 3 Aug 2022 17:53:58 +0900 Subject: f2fs: do not allow to decompress files have FI_COMPRESS_RELEASED If a file has FI_COMPRESS_RELEASED, all writes for it should not be allowed. However, as of now, in case of compress_mode=user, writes triggered by IOCTLs like F2FS_IOC_DE/COMPRESS_FILE are allowed unexpectly, which could crash that file. To fix it, let's do not allow F2FS_IOC_DE/COMPRESS_IOCTL if a file already has FI_COMPRESS_RELEASED flag. This is the reproduction process: 1. $ touch ./file 2. $ chattr +c ./file 3. $ dd if=/dev/random of=./file bs=4096 count=30 conv=notrunc 4. $ dd if=/dev/zero of=./file bs=4096 count=34 seek=30 conv=notrunc 5. $ sync 6. $ do_compress ./file ; call F2FS_IOC_COMPRESS_FILE 7. $ get_compr_blocks ./file ; call F2FS_IOC_GET_COMPRESS_BLOCKS 8. $ release ./file ; call F2FS_IOC_RELEASE_COMPRESS_BLOCKS 9. $ do_compress ./file ; call F2FS_IOC_COMPRESS_FILE again 10. $ get_compr_blocks ./file ; call F2FS_IOC_GET_COMPRESS_BLOCKS again This reproduction process is tested in 128kb cluster size. You can find compr_blocks has a negative value. Fixes: 5fdb322ff2c2b ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE") Signed-off-by: Junbeom Yeom Signed-off-by: Sungjong Seo Signed-off-by: Youngjin Gil Signed-off-by: Jaewook Kim Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 67e5a6cd25e8..0c984718f3c5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3964,6 +3964,11 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4031,6 +4036,11 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; -- cgit v1.2.3-59-g8ed1b From bff139b49d9f70c1ac5384aac94554846aa834de Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 2 Aug 2022 12:24:37 -0700 Subject: f2fs: handle decompress only post processing in softirq Now decompression is being handled in workqueue and it makes read I/O latency non-deterministic, because of the non-deterministic scheduling nature of workqueues. So, I made it handled in softirq context only if possible, not in low memory devices, since this modification will maintain decompresion related memory a little longer. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 203 +++++++++++++++++++++++++++++++++++------------------ fs/f2fs/data.c | 52 +++++++++----- fs/f2fs/f2fs.h | 17 +++-- 3 files changed, 179 insertions(+), 93 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index fa237e5c7173..086e6f74ce32 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -729,14 +729,19 @@ out: return ret; } -void f2fs_decompress_cluster(struct decompress_io_ctx *dic) +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc); +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc); + +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; + bool bypass_callback = false; int ret; - int i; trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, dic->cluster_size, fi->i_compress_algorithm); @@ -746,41 +751,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) goto out_end_io; } - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); - if (!dic->tpages) { - ret = -ENOMEM; - goto out_end_io; - } - - for (i = 0; i < dic->cluster_size; i++) { - if (dic->rpages[i]) { - dic->tpages[i] = dic->rpages[i]; - continue; - } - - dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) { - ret = -ENOMEM; - goto out_end_io; - } - } - - if (cops->init_decompress_ctx) { - ret = cops->init_decompress_ctx(dic); - if (ret) - goto out_end_io; - } - - dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); - if (!dic->rbuf) { - ret = -ENOMEM; - goto out_destroy_decompress_ctx; - } - - dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); - if (!dic->cbuf) { - ret = -ENOMEM; - goto out_vunmap_rbuf; + ret = f2fs_prepare_decomp_mem(dic, false); + if (ret) { + bypass_callback = true; + goto out_release; } dic->clen = le32_to_cpu(dic->cbuf->clen); @@ -788,7 +762,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { ret = -EFSCORRUPTED; - goto out_vunmap_cbuf; + goto out_release; } ret = cops->decompress_pages(dic); @@ -809,17 +783,13 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic) } } -out_vunmap_cbuf: - vm_unmap_ram(dic->cbuf, dic->nr_cpages); -out_vunmap_rbuf: - vm_unmap_ram(dic->rbuf, dic->cluster_size); -out_destroy_decompress_ctx: - if (cops->destroy_decompress_ctx) - cops->destroy_decompress_ctx(dic); +out_release: + f2fs_release_decomp_mem(dic, bypass_callback, false); + out_end_io: trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, dic->clen, ret); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, in_task); } /* @@ -829,7 +799,7 @@ out_end_io: * (or in the case of a failure, cleans up without actually decompressing). */ void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr) + block_t blkaddr, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); @@ -839,12 +809,12 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, if (failed) WRITE_ONCE(dic->failed, true); - else if (blkaddr) + else if (blkaddr && in_task) f2fs_cache_compressed_page(sbi, page, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, in_task); } static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -1552,16 +1522,85 @@ destroy_out: return err; } -static void f2fs_free_dic(struct decompress_io_ctx *dic); +static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, + bool pre_alloc) +{ + return pre_alloc ^ f2fs_low_mem_mode(sbi); +} + +static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + int i; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return 0; + + dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + if (!dic->tpages) + return -ENOMEM; + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) { + dic->tpages[i] = dic->rpages[i]; + continue; + } + + dic->tpages[i] = f2fs_compress_alloc_page(); + if (!dic->tpages[i]) + return -ENOMEM; + } + + dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); + if (!dic->rbuf) + return -ENOMEM; + + dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); + if (!dic->cbuf) + return -ENOMEM; + + if (cops->init_decompress_ctx) { + int ret = cops->init_decompress_ctx(dic); + + if (ret) + return ret; + } + + return 0; +} + +static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc) +{ + const struct f2fs_compress_ops *cops = + f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return; + + if (!bypass_destroy_callback && cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); + + if (dic->cbuf) + vm_unmap_ram(dic->cbuf, dic->nr_cpages); + + if (dic->rbuf) + vm_unmap_ram(dic->rbuf, dic->cluster_size); +} + +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) { struct decompress_io_ctx *dic; pgoff_t start_idx = start_idx_of_cluster(cc); - int i; + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + int i, ret; - dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, - false, F2FS_I_SB(cc->inode)); + dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, false, sbi); if (!dic) return ERR_PTR(-ENOMEM); @@ -1587,32 +1626,43 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->nr_rpages = cc->cluster_size; dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); - if (!dic->cpages) + if (!dic->cpages) { + ret = -ENOMEM; goto out_free; + } for (i = 0; i < dic->nr_cpages; i++) { struct page *page; page = f2fs_compress_alloc_page(); - if (!page) + if (!page) { + ret = -ENOMEM; goto out_free; + } f2fs_set_compressed_page(page, cc->inode, start_idx + i + 1, dic); dic->cpages[i] = page; } + ret = f2fs_prepare_decomp_mem(dic, true); + if (ret) + goto out_free; + return dic; out_free: - f2fs_free_dic(dic); - return ERR_PTR(-ENOMEM); + f2fs_free_dic(dic, true); + return ERR_PTR(ret); } -static void f2fs_free_dic(struct decompress_io_ctx *dic) +static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback) { int i; + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + if (dic->tpages) { for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) @@ -1637,17 +1687,33 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic) kmem_cache_free(dic_entry_slab, dic); } -static void f2fs_put_dic(struct decompress_io_ctx *dic) +static void f2fs_late_free_dic(struct work_struct *work) +{ + struct decompress_io_ctx *dic = + container_of(work, struct decompress_io_ctx, free_work); + + f2fs_free_dic(dic, false); +} + +static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) { - if (refcount_dec_and_test(&dic->refcnt)) - f2fs_free_dic(dic); + if (refcount_dec_and_test(&dic->refcnt)) { + if (in_task) { + f2fs_free_dic(dic, false); + } else { + INIT_WORK(&dic->free_work, f2fs_late_free_dic); + queue_work(F2FS_I_SB(dic->inode)->post_read_wq, + &dic->free_work); + } + } } /* * Update and unlock the cluster's pagecache pages, and release the reference to * the decompress_io_ctx that was being held for I/O completion. */ -static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { int i; @@ -1668,7 +1734,7 @@ static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) unlock_page(rpage); } - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } static void f2fs_verify_cluster(struct work_struct *work) @@ -1685,14 +1751,15 @@ static void f2fs_verify_cluster(struct work_struct *work) SetPageError(rpage); } - __f2fs_decompress_end_io(dic, false); + __f2fs_decompress_end_io(dic, false, true); } /* * This is called when a compressed cluster has been decompressed * (or failed to be read and/or decompressed). */ -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task) { if (!failed && dic->need_verity) { /* @@ -1704,7 +1771,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) INIT_WORK(&dic->verity_work, f2fs_verify_cluster); fsverity_enqueue_verify_work(&dic->verity_work); } else { - __f2fs_decompress_end_io(dic, failed); + __f2fs_decompress_end_io(dic, failed, in_task); } } @@ -1713,12 +1780,12 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) * * This is called when the page is no longer needed and can be freed. */ -void f2fs_put_page_dic(struct page *page) +void f2fs_put_page_dic(struct page *page, bool in_task) { struct decompress_io_ctx *dic = (struct decompress_io_ctx *)page_private(page); - f2fs_put_dic(dic); + f2fs_put_dic(dic, in_task); } /* diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b3dbe77f902b..f48eec8f93e2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -119,7 +119,7 @@ struct bio_post_read_ctx { block_t fs_blkaddr; }; -static void f2fs_finish_read_bio(struct bio *bio) +static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -133,8 +133,9 @@ static void f2fs_finish_read_bio(struct bio *bio) if (f2fs_is_compressed_page(page)) { if (bio->bi_status) - f2fs_end_read_compressed_page(page, true, 0); - f2fs_put_page_dic(page); + f2fs_end_read_compressed_page(page, true, 0, + in_task); + f2fs_put_page_dic(page, in_task); continue; } @@ -191,7 +192,7 @@ static void f2fs_verify_bio(struct work_struct *work) fsverity_verify_bio(bio); } - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, true); } /* @@ -203,7 +204,7 @@ static void f2fs_verify_bio(struct work_struct *work) * can involve reading verity metadata pages from the file, and these verity * metadata pages may be encrypted and/or compressed. */ -static void f2fs_verify_and_finish_bio(struct bio *bio) +static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) { struct bio_post_read_ctx *ctx = bio->bi_private; @@ -211,7 +212,7 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) INIT_WORK(&ctx->work, f2fs_verify_bio); fsverity_enqueue_verify_work(&ctx->work); } else { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, in_task); } } @@ -224,7 +225,8 @@ static void f2fs_verify_and_finish_bio(struct bio *bio) * that the bio includes at least one compressed page. The actual decompression * is done on a per-cluster basis, not a per-bio basis. */ -static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) +static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, + bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; @@ -237,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) f2fs_end_read_compressed_page(page, PageError(page), - blkaddr); + blkaddr, in_task); else all_compressed = false; @@ -262,15 +264,16 @@ static void f2fs_post_read_work(struct work_struct *work) fscrypt_decrypt_bio(ctx->bio); if (ctx->enabled_steps & STEP_DECOMPRESS) - f2fs_handle_step_decompress(ctx); + f2fs_handle_step_decompress(ctx, true); - f2fs_verify_and_finish_bio(ctx->bio); + f2fs_verify_and_finish_bio(ctx->bio, true); } static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); struct bio_post_read_ctx *ctx; + bool intask = in_task(); iostat_update_and_unbind_ctx(bio, 0); ctx = bio->bi_private; @@ -281,16 +284,29 @@ static void f2fs_read_end_io(struct bio *bio) } if (bio->bi_status) { - f2fs_finish_read_bio(bio); + f2fs_finish_read_bio(bio, intask); return; } - if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { - INIT_WORK(&ctx->work, f2fs_post_read_work); - queue_work(ctx->sbi->post_read_wq, &ctx->work); - } else { - f2fs_verify_and_finish_bio(bio); + if (ctx) { + unsigned int enabled_steps = ctx->enabled_steps & + (STEP_DECRYPT | STEP_DECOMPRESS); + + /* + * If we have only decompression step between decompression and + * decrypt, we don't need post processing for this. + */ + if (enabled_steps == STEP_DECOMPRESS && + !f2fs_low_mem_mode(sbi)) { + f2fs_handle_step_decompress(ctx, intask); + } else if (enabled_steps) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + queue_work(ctx->sbi->post_read_wq, &ctx->work); + return; + } } + + f2fs_verify_and_finish_bio(bio, intask); } static void f2fs_write_end_io(struct bio *bio) @@ -2220,7 +2236,7 @@ skip_reading_dnode: if (f2fs_load_compressed_page(sbi, page, blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) - f2fs_decompress_cluster(dic); + f2fs_decompress_cluster(dic, true); continue; } @@ -2238,7 +2254,7 @@ submit_and_realloc: page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - f2fs_decompress_end_io(dic, ret); + f2fs_decompress_end_io(dic, ret, true); f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d187bdc39b79..2f3c4eef89c2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1590,6 +1590,7 @@ struct decompress_io_ctx { void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ + struct work_struct free_work; /* work for late free this structure itself */ }; #define NULL_CLUSTER ((unsigned int)(~0)) @@ -4192,9 +4193,9 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); -void f2fs_decompress_cluster(struct decompress_io_ctx *dic); +void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); void f2fs_end_read_compressed_page(struct page *page, bool failed, - block_t blkaddr); + block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, @@ -4213,8 +4214,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); -void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); -void f2fs_put_page_dic(struct page *page); +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, + bool in_task); +void f2fs_put_page_dic(struct page *page, bool in_task); unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); @@ -4260,13 +4262,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) } static inline int f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } -static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { } +static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, + bool in_task) { } static inline void f2fs_end_read_compressed_page(struct page *page, - bool failed, block_t blkaddr) + bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); } -static inline void f2fs_put_page_dic(struct page *page) +static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -- cgit v1.2.3-59-g8ed1b From e53f8643474a32ddfcfd04e5b22613fdd2a92a52 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Aug 2022 21:38:21 +0800 Subject: f2fs: clean up f2fs_abort_atomic_write() f2fs_abort_atomic_write() has checked whether current inode is atomic_write one or not, it's redundant to check in its caller, remove it for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++------ fs/f2fs/inode.c | 3 +-- fs/f2fs/segment.c | 27 ++++++++++++++------------- fs/f2fs/super.c | 3 +-- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0c984718f3c5..24f5b02c78e7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1822,8 +1822,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) atomic_read(&inode->i_writecount) != 1) return 0; - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); return 0; } @@ -1837,8 +1836,7 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) * until all the writers close its file. Since this should be done * before dropping file lock, it needs to do in ->flush. */ - if (f2fs_is_atomic_file(inode) && - F2FS_I(inode)->atomic_write_task == current) + if (F2FS_I(inode)->atomic_write_task == current) f2fs_abort_atomic_write(inode, true); return 0; } @@ -2124,8 +2122,7 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) inode_lock(inode); - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); inode_unlock(inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fc55f5bd1fcc..6d11c365d7b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -744,8 +744,7 @@ void f2fs_evict_inode(struct inode *inode) nid_t xnid = F2FS_I(inode)->i_xattr_nid; int err = 0; - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f14c5a807312..72ce00ebcede 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -190,19 +190,20 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - if (f2fs_is_atomic_file(inode)) { - if (clean) - truncate_inode_pages_final(inode->i_mapping); - clear_inode_flag(fi->cow_inode, FI_COW_FILE); - iput(fi->cow_inode); - fi->cow_inode = NULL; - release_atomic_write_cnt(inode); - clear_inode_flag(inode, FI_ATOMIC_FILE); - - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - sbi->atomic_files--; - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); - } + if (!f2fs_is_atomic_file(inode)) + return; + + if (clean) + truncate_inode_pages_final(inode->i_mapping); + clear_inode_flag(fi->cow_inode, FI_COW_FILE); + iput(fi->cow_inode); + fi->cow_inode = NULL; + release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_FILE); + + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + sbi->atomic_files--; + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c6203798fb4..1a0578a3f4e4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1411,8 +1411,7 @@ static int f2fs_drop_inode(struct inode *inode) atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); - if (f2fs_is_atomic_file(inode)) - f2fs_abort_atomic_write(inode, true); + f2fs_abort_atomic_write(inode, true); /* should remain fi->extent_tree for writepage */ f2fs_destroy_extent_node(inode); -- cgit v1.2.3-59-g8ed1b From 4f8219f8aa175d5a46703631abaae745592efe29 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Sun, 31 Jul 2022 11:33:45 +0800 Subject: f2fs: intorduce f2fs_all_cluster_page_ready When write total cluster, all pages is uptodate, there is not need to call f2fs_prepare_compress_overwrite, intorduce f2fs_all_cluster_page_ready to avoid this. Signed-off-by: Fengnan Chang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 21 ++++++++++++++------- fs/f2fs/data.c | 8 ++++++-- fs/f2fs/f2fs.h | 4 ++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 086e6f74ce32..75886b493ec3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -841,20 +841,27 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) return is_page_in_cluster(cc, index); } -bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, - int index, int nr_pages) +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct pagevec *pvec, + int index, int nr_pages, bool uptodate) { - unsigned long pgidx; - int i; + unsigned long pgidx = pvec->pages[index]->index; + int i = uptodate ? 0 : 1; - if (nr_pages - index < cc->cluster_size) + /* + * when uptodate set to true, try to check all pages in cluster is + * uptodate or not. + */ + if (uptodate && (pgidx % cc->cluster_size)) return false; - pgidx = pvec->pages[index]->index; + if (nr_pages - index < cc->cluster_size) + return false; - for (i = 1; i < cc->cluster_size; i++) { + for (; i < cc->cluster_size; i++) { if (pvec->pages[index + i]->index != pgidx + i) return false; + if (uptodate && !PageUptodate(pvec->pages[index + i])) + return false; } return true; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f48eec8f93e2..34a7a9affea0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3011,6 +3011,10 @@ readd: if (!f2fs_cluster_is_empty(&cc)) goto lock_page; + if (f2fs_all_cluster_page_ready(&cc, + &pvec, i, nr_pages, true)) + goto lock_page; + ret2 = f2fs_prepare_compress_overwrite( inode, &pagep, page->index, &fsdata); @@ -3021,8 +3025,8 @@ readd: } else if (ret2 && (!f2fs_compress_write_end(inode, fsdata, page->index, 1) || - !f2fs_all_cluster_page_loaded(&cc, - &pvec, i, nr_pages))) { + !f2fs_all_cluster_page_ready(&cc, + &pvec, i, nr_pages, false))) { retry = 1; break; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2f3c4eef89c2..35383f5353a4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4198,8 +4198,8 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); -bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, - int index, int nr_pages); +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct pagevec *pvec, + int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); int f2fs_write_multi_pages(struct compress_ctx *cc, -- cgit v1.2.3-59-g8ed1b From 01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Sun, 31 Jul 2022 11:33:46 +0800 Subject: f2fs: use onstack pages instead of pvec Since pvec have 15 pages, it not a multiple of 4, when write compressed pages, write in 64K as a unit, it will call pagevec_lookup_range_tag agagin, sometimes this will take a lot of time. Use onstack pages instead of pvec to mitigate this problem. Signed-off-by: Fengnan Chang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 ++++---- fs/f2fs/data.c | 16 +++++++--------- fs/f2fs/f2fs.h | 4 +++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 75886b493ec3..87bf06e15efe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -841,10 +841,10 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) return is_page_in_cluster(cc, index); } -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct pagevec *pvec, +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate) { - unsigned long pgidx = pvec->pages[index]->index; + unsigned long pgidx = pages[index]->index; int i = uptodate ? 0 : 1; /* @@ -858,9 +858,9 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct pagevec *pvec, return false; for (; i < cc->cluster_size; i++) { - if (pvec->pages[index + i]->index != pgidx + i) + if (pages[index + i]->index != pgidx + i) return false; - if (uptodate && !PageUptodate(pvec->pages[index + i])) + if (uptodate && !PageUptodate(pages[index + i])) return false; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 34a7a9affea0..0b159c555069 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2916,7 +2916,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0, retry = 0; - struct pagevec pvec; + struct page *pages[F2FS_ONSTACK_PAGES]; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; @@ -2947,8 +2947,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int submitted = 0; int i; - pagevec_init(&pvec); - if (get_dirty_pages(mapping->host) <= SM_I(F2FS_M_SB(mapping))->min_hot_blocks) set_inode_flag(mapping->host, FI_HOT_DATA); @@ -2974,13 +2972,13 @@ retry: tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && !retry && (index <= end)) { - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, - tag); + nr_pages = find_get_pages_range_tag(mapping, &index, end, + tag, F2FS_ONSTACK_PAGES, pages); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; + struct page *page = pages[i]; bool need_readd; readd: need_readd = false; @@ -3012,7 +3010,7 @@ readd: goto lock_page; if (f2fs_all_cluster_page_ready(&cc, - &pvec, i, nr_pages, true)) + pages, i, nr_pages, true)) goto lock_page; ret2 = f2fs_prepare_compress_overwrite( @@ -3026,7 +3024,7 @@ readd: (!f2fs_compress_write_end(inode, fsdata, page->index, 1) || !f2fs_all_cluster_page_ready(&cc, - &pvec, i, nr_pages, false))) { + pages, i, nr_pages, false))) { retry = 1; break; } @@ -3116,7 +3114,7 @@ next: if (need_readd) goto readd; } - pagevec_release(&pvec); + release_pages(pages, nr_pages); cond_resched(); } #ifdef CONFIG_F2FS_FS_COMPRESSION diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 35383f5353a4..eddfd35eadb6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -598,6 +598,8 @@ enum { #define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS #define RECOVERY_MIN_RA_BLOCKS 1 +#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -4198,7 +4200,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct pagevec *pvec, +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); -- cgit v1.2.3-59-g8ed1b