diff options
author | 2025-08-04 16:27:21 -0700 | |
---|---|---|
committer | 2025-08-04 16:27:21 -0700 | |
commit | 0974f486f3dde9df1ad979d4ff341dc9c2d545f5 (patch) | |
tree | 4a3ccf82756062f9b8b50cbb5138ca0db56160d7 | |
parent | Merge tag 'printk-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux (diff) | |
parent | f2fs: drop inode from the donation list when the last file is closed (diff) | |
download | wireguard-linux-0974f486f3dde9df1ad979d4ff341dc9c2d545f5.tar.xz wireguard-linux-0974f486f3dde9df1ad979d4ff341dc9c2d545f5.zip |
Merge tag 'f2fs-for-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"Three main updates: folio conversion by Matthew, switch to a new mount
API by Hongbo and Eric, and several sysfs entries to tune GCs for ZUFS
with finer granularity by Daeho.
There are also patches to address bugs and issues in the existing
features such as GCs, file pinning, write-while-dio-read, contingous
block allocation, and memory access violations.
Enhancements:
- switch to new mount API and folio conversion
- add sysfs nodes to controle F2FS GCs for ZUFS
- improve performance on the nat entry cache
- drop inode from the donation list when the last file is closed
- avoid splitting bio when reading multiple pages
Bug fixes:
- fix to trigger foreground gc during f2fs_map_blocks() in lfs mode
- make sure zoned device GC to use FG_GC in shortage of free section
- fix to calculate dirty data during has_not_enough_free_secs()
- fix to update upper_p in __get_secs_required() correctly
- wait for inflight dio completion, excluding pinned files read using dio
- don't break allocation when crossing contiguous sections
- vm_unmap_ram() may be called from an invalid context
- fix to avoid out-of-boundary access in dnode page
- fix to avoid panic in f2fs_evict_inode
- fix to avoid UAF in f2fs_sync_inode_meta()
- fix to use f2fs_is_valid_blkaddr_raw() in do_write_page()
- fix UAF of f2fs_inode_info in f2fs_free_dic
- fix to avoid invalid wait context issue
- fix bio memleak when committing super block
- handle nat.blkaddr corruption in f2fs_get_node_info()
In addition, there are also clean-ups and minor bug fixes"
* tag 'f2fs-for-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (109 commits)
f2fs: drop inode from the donation list when the last file is closed
f2fs: add gc_boost_gc_greedy sysfs node
f2fs: add gc_boost_gc_multiple sysfs node
f2fs: fix to trigger foreground gc during f2fs_map_blocks() in lfs mode
f2fs: fix to calculate dirty data during has_not_enough_free_secs()
f2fs: fix to update upper_p in __get_secs_required() correctly
f2fs: directly add newly allocated pre-dirty nat entry to dirty set list
f2fs: avoid redundant clean nat entry move in lru list
f2fs: zone: wait for inflight dio completion, excluding pinned files read using dio
f2fs: ignore valid ratio when free section count is low
f2fs: don't break allocation when crossing contiguous sections
f2fs: remove unnecessary tracepoint enabled check
f2fs: merge the two conditions to avoid code duplication
f2fs: vm_unmap_ram() may be called from an invalid context
f2fs: fix to avoid out-of-boundary access in dnode page
f2fs: switch to the new mount api
f2fs: introduce fs_context_operation structure
f2fs: separate the options parsing and options checking
f2fs: Add f2fs_fs_context to record the mount options
f2fs: Allow sbi to be NULL in f2fs_printk
...
-rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 22 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.rst | 6 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 8 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 120 | ||||
-rw-r--r-- | fs/f2fs/data.c | 183 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 21 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 4 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 10 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 151 | ||||
-rw-r--r-- | fs/f2fs/file.c | 107 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 54 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 5 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 20 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 84 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 12 | ||||
-rw-r--r-- | fs/f2fs/node.c | 261 | ||||
-rw-r--r-- | fs/f2fs/node.h | 77 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 116 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 62 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 59 | ||||
-rw-r--r-- | fs/f2fs/super.c | 2111 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 48 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 2 | ||||
-rw-r--r-- | include/linux/fscrypt.h | 10 |
24 files changed, 2019 insertions, 1534 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index bf03263b9f46..bc0e7fefc39d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -861,3 +861,25 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags, SB_ENC_STRICT_MODE_FL 0x00000001 SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002 ============================ ========== + +What: /sys/fs/f2fs/<disk>/reserved_pin_section +Date: June 2025 +Contact: "Chao Yu" <chao@kernel.org> +Description: This threshold is used to control triggering garbage collection while + fallocating on pinned file, so, it can guarantee there is enough free + reserved section before preallocating on pinned file. + By default, the value is ovp_sections, especially, for zoned ufs, the + value is 1. + +What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple +Date: June 2025 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: Set a multiplier for the background GC migration window when F2FS GC is + boosted. The range should be from 1 to the segment count in a section. + Default: 5 + +What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy +Date: June 2025 +Contact: "Daeho Jeong" <daehojeong@google.com> +Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy + Default: 1 diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8eeb7ea14f61..e5bb89452aff 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -238,9 +238,9 @@ usrjquota=<file> Appoint specified file and type during mount, so that quota grpjquota=<file> information can be properly updated during recovery flow, prjjquota=<file> <quota file>: must be in root directory; jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1]. -offusrjquota Turn off user journalled quota. -offgrpjquota Turn off group journalled quota. -offprjjquota Turn off project journalled quota. +usrjquota= Turn off user journalled quota. +grpjquota= Turn off group journalled quota. +prjjquota= Turn off project journalled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. alloc_mode=%s Adjust block allocation policy, which supports "reuse" diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f149ec28aefd..db3831f7f2f5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -82,7 +82,7 @@ repeat: if (folio_test_uptodate(folio)) goto out; - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); if (err) { @@ -309,7 +309,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, continue; } - fio.page = &folio->page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); f2fs_folio_put(folio, err ? true : false); @@ -485,7 +485,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping, folio_mark_uptodate(folio); if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; @@ -1045,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); } void f2fs_remove_dirty_inode(struct inode *inode) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index b3c1df93a163..5c1f47e45dab 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -23,20 +23,18 @@ static struct kmem_cache *cic_entry_slab; static struct kmem_cache *dic_entry_slab; -static void *page_array_alloc(struct inode *inode, int nr) +static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (likely(size <= sbi->page_array_slab_size)) return f2fs_kmem_cache_alloc(sbi->page_array_slab, - GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); + GFP_F2FS_ZERO, false, sbi); return f2fs_kzalloc(sbi, size, GFP_NOFS); } -static void page_array_free(struct inode *inode, void *pages, int nr) +static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (!pages) @@ -73,17 +71,15 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) return cc->cluster_idx << cc->log_cluster_size; } -bool f2fs_is_compressed_page(struct page *page) +bool f2fs_is_compressed_page(struct folio *folio) { - if (!PagePrivate(page)) - return false; - if (!page_private(page)) + if (!folio->private) return false; - if (page_private_nonpointer(page)) + if (folio_test_f2fs_nonpointer(folio)) return false; - f2fs_bug_on(F2FS_P_SB(page), - *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + f2fs_bug_on(F2FS_F_SB(folio), + *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC); return true; } @@ -149,13 +145,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) if (cc->rpages) return 0; - cc->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size); return cc->rpages ? 0 : -ENOMEM; } void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { - page_array_free(cc->inode, cc->rpages, cc->cluster_size); + page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; @@ -216,13 +212,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lzo invalid rlen:%zu, expected:%lu", dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -296,13 +292,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "lz4 invalid ret:%d, expected:%lu", ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -424,13 +420,13 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) workspace_size = zstd_dstream_workspace_bound(max_window_size); - workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size); + workspace = f2fs_vmalloc(dic->sbi, workspace_size); if (!workspace) return -ENOMEM; stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_init_dstream failed", __func__); vfree(workspace); return -EIO; @@ -466,14 +462,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + f2fs_err_ratelimited(dic->sbi, "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); @@ -622,6 +618,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count) static int f2fs_compress_pages(struct compress_ctx *cc) { + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct f2fs_inode_info *fi = F2FS_I(cc->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -642,7 +639,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); cc->valid_nr_cpages = cc->nr_cpages; - cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); + cc->cpages = page_array_alloc(sbi, cc->nr_cpages); if (!cc->cpages) { ret = -ENOMEM; goto destroy_compress_ctx; @@ -716,7 +713,7 @@ out_free_cpages: if (cc->cpages[i]) f2fs_compress_free_page(cc->cpages[i]); } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; destroy_compress_ctx: if (cops->destroy_compress_ctx) @@ -734,7 +731,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -796,25 +793,27 @@ out_end_io: f2fs_decompress_end_io(dic, ret, in_task); } +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr); + /* * This is called when a page of a compressed cluster has been read from disk * (or failed to be read from disk). It checks whether this page was the last * page being waited on in the cluster, and if so, it decompresses the cluster * (or in the case of a failure, cleans up without actually decompressing). */ -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct decompress_io_ctx *dic = folio->private; + struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, page, + f2fs_cache_compressed_page(sbi, folio, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) @@ -1340,7 +1339,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; atomic_set(&cic->pending_pages, cc->valid_nr_cpages); - cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!cic->rpages) goto out_put_cic; @@ -1420,7 +1419,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); - unlock_page(fio.page); + folio_unlock(fio.folio); } if (fio.compr_blocks) @@ -1442,13 +1441,13 @@ unlock_continue: spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: - page_array_free(cc->inode, cic->rpages, cc->cluster_size); + page_array_free(sbi, cic->rpages, cc->cluster_size); for (--i; i >= 0; i--) { if (!cc->cpages[i]) @@ -1469,18 +1468,18 @@ out_free: f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; return -EAGAIN; } -void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) { + struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; - struct compress_io_ctx *cic = - (struct compress_io_ctx *)page_private(page); - enum count_type type = WB_DATA_TYPE(page, - f2fs_is_compressed_page(page)); + struct compress_io_ctx *cic = folio->private; + enum count_type type = WB_DATA_TYPE(folio, + f2fs_is_compressed_page(folio)); int i; if (unlikely(bio->bi_status != BLK_STS_OK)) @@ -1499,7 +1498,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) end_page_writeback(cic->rpages[i]); } - page_array_free(cic->inode, cic->rpages, cic->nr_rpages); + page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); } @@ -1633,14 +1632,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; int i; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return 0; - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1670,10 +1668,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, bool bypass_destroy_callback, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return; if (!bypass_destroy_callback && cops->destroy_decompress_ctx) @@ -1700,7 +1697,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) if (!dic) return ERR_PTR(-ENOMEM); - dic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + dic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!dic->rpages) { kmem_cache_free(dic_entry_slab, dic); return ERR_PTR(-ENOMEM); @@ -1708,6 +1705,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; + dic->sbi = sbi; + dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; atomic_set(&dic->remaining_pages, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; @@ -1721,7 +1720,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->rpages[i] = cc->rpages[i]; dic->nr_rpages = cc->cluster_size; - dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); + dic->cpages = page_array_alloc(sbi, dic->nr_cpages); if (!dic->cpages) { ret = -ENOMEM; goto out_free; @@ -1751,6 +1750,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; + /* use sbi in dic to avoid UFA of dic->inode*/ + struct f2fs_sb_info *sbi = dic->sbi; f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1762,7 +1763,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->tpages[i]); } - page_array_free(dic->inode, dic->tpages, dic->cluster_size); + page_array_free(sbi, dic->tpages, dic->cluster_size); } if (dic->cpages) { @@ -1771,10 +1772,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->cpages[i]); } - page_array_free(dic->inode, dic->cpages, dic->nr_cpages); + page_array_free(sbi, dic->cpages, dic->nr_cpages); } - page_array_free(dic->inode, dic->rpages, dic->nr_rpages); + page_array_free(sbi, dic->rpages, dic->nr_rpages); kmem_cache_free(dic_entry_slab, dic); } @@ -1793,8 +1794,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) f2fs_free_dic(dic, false); } else { INIT_WORK(&dic->free_work, f2fs_late_free_dic); - queue_work(F2FS_I_SB(dic->inode)->post_read_wq, - &dic->free_work); + queue_work(dic->sbi->post_read_wq, &dic->free_work); } } } @@ -1921,8 +1921,8 @@ void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr) +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr) { struct folio *cfolio; int ret; @@ -1953,9 +1953,9 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, return; } - set_page_private_data(&cfolio->page, ino); + folio_set_f2fs_data(cfolio, ino); - memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); + memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE); folio_mark_uptodate(cfolio); f2fs_folio_put(cfolio, true); } @@ -2012,7 +2012,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) continue; } - if (ino != get_page_private_data(&folio->page)) { + if (ino != folio_get_f2fs_data(folio)) { folio_unlock(folio); continue; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 711ad80b38d0..7961e0ddfca3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,14 +47,14 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -bool f2fs_is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(const struct folio *folio) { - struct address_space *mapping = page_folio(page)->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; - if (fscrypt_is_bounce_page(page)) - return page_private_gcing(fscrypt_pagecache_page(page)); + if (fscrypt_is_bounce_folio(folio)) + return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -65,7 +65,7 @@ bool f2fs_is_cp_guaranteed(struct page *page) return true; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || - page_private_gcing(page)) + folio_test_f2fs_gcing(folio)) return true; return false; } @@ -142,9 +142,9 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) bio_for_each_folio_all(fi, bio) { struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(&folio->page)) { + if (f2fs_is_compressed_page(folio)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(&folio->page, true, 0, + f2fs_end_read_compressed_page(folio, true, 0, in_task); f2fs_put_folio_dic(folio, in_task); continue; @@ -181,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work) * as those were handled separately by f2fs_end_read_compressed_page(). */ if (may_have_compressed_pages) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(page) && - !fsverity_verify_page(page)) { + if (!f2fs_is_compressed_page(folio) && + !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; } @@ -233,16 +232,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; bool all_compressed = true; block_t blkaddr = ctx->fs_blkaddr; - bio_for_each_segment_all(bv, ctx->bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, ctx->bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, false, blkaddr, + if (f2fs_is_compressed_page(folio)) + f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else all_compressed = false; @@ -280,9 +278,9 @@ static void f2fs_post_read_work(struct work_struct *work) static void f2fs_read_end_io(struct bio *bio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; - bool intask = in_task(); + bool intask = in_task() && !irqs_disabled(); iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; @@ -339,13 +337,13 @@ static void f2fs_write_end_io(struct bio *bio) } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_is_compressed_page(&folio->page)) { - f2fs_compress_write_end_io(bio, &folio->page); + if (f2fs_is_compressed_page(folio)) { + f2fs_compress_write_end_io(bio, folio); continue; } #endif - type = WB_DATA_TYPE(&folio->page, false); + type = WB_DATA_TYPE(folio, false); if (unlikely(bio->bi_status != BLK_STS_OK)) { mapping_set_error(folio->mapping, -EIO); @@ -355,12 +353,12 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, is_node_folio(folio) && - folio->index != nid_of_node(&folio->page)); + folio->index != nid_of_node(folio)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, folio)) f2fs_del_fsync_node_entry(sbi, folio); - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); folio_end_writeback(folio); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && @@ -419,7 +417,6 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); - struct folio *fio_folio = page_folio(fio->page); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; @@ -447,7 +444,7 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) op_flags |= REQ_FUA; if (fio->type == DATA && - F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) + F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) op_flags |= REQ_PRIO; return op_flags; @@ -546,14 +543,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) } static bool __has_merged_page(struct bio *bio, struct inode *inode, - struct page *page, nid_t ino) + struct folio *folio, nid_t ino) { struct folio_iter fi; if (!bio) return false; - if (!inode && !page && !ino) + if (!inode && !folio && !ino) return true; bio_for_each_folio_all(fi, bio) { @@ -564,7 +561,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (IS_ERR(target)) continue; } - if (f2fs_is_compressed_page(&target->page)) { + if (f2fs_is_compressed_page(target)) { target = f2fs_compress_control_folio(target); if (IS_ERR(target)) continue; @@ -572,9 +569,9 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (inode && inode == target->mapping->host) return true; - if (page && page == &target->page) + if (folio && folio == target) return true; - if (ino && ino == ino_of_node(&target->page)) + if (ino && ino == ino_of_node(target)) return true; } @@ -641,7 +638,7 @@ unlock_out: } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type, bool force) { enum temp_type temp; @@ -653,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, page, ino); + ret = __has_merged_page(io->bio, inode, folio, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -671,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, page, ino, type, false); + __submit_merged_write_cond(sbi, inode, folio, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -691,7 +688,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct folio *data_folio = fio->encrypted_page ? page_folio(fio->encrypted_page) : fio_folio; @@ -713,7 +710,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -779,7 +776,7 @@ static void del_bio_entry(struct bio_entry *be) static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { - struct folio *fio_folio = page_folio(fio->page); + struct folio *fio_folio = fio->folio; struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; @@ -848,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) break; } @@ -865,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - &folio->page, 0); + folio, 0); if (found) { target = be->bio; del_bio_entry(be); @@ -886,15 +883,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; - struct folio *folio = page_folio(fio->page); + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio->folio; + struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_folio_bio(page_folio(page), fio); + trace_f2fs_submit_folio_bio(data_folio, fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -905,16 +902,16 @@ alloc_new: f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, folio->index, fio, GFP_NOIO); - add_bio_entry(fio->sbi, bio, page, fio->temp); + add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp); } else { - if (add_ipu_page(fio, &bio, page)) + if (add_ipu_page(fio, &bio, &data_folio->page)) goto alloc_new; } if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); + inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -949,7 +946,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; - struct page *bio_page; + struct folio *bio_folio; enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -980,44 +977,44 @@ next: verify_fio_blkaddr(fio); if (fio->encrypted_page) - bio_page = fio->encrypted_page; + bio_folio = page_folio(fio->encrypted_page); else if (fio->compressed_page) - bio_page = fio->compressed_page; + bio_folio = page_folio(fio->compressed_page); else - bio_page = fio->page; + bio_folio = fio->folio; /* set submitted = true as a return value */ fio->submitted = 1; - type = WB_DATA_TYPE(bio_page, fio->compressed_page); + type = WB_DATA_TYPE(bio_folio, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio))) + bio_folio->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio), - page_folio(bio_page)->index, fio, GFP_NOIO); + bio_folio->index, fio, GFP_NOIO); io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) { __submit_merged_bio(io); goto alloc_new; } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), - PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->folio, + folio_size(fio->folio)); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_folio_write(page_folio(fio->page), fio); + trace_f2fs_submit_folio_write(fio->folio, fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1553,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) unsigned int start_pgofs; int bidx = 0; bool is_hole; + bool lfs_dio_write; if (!maxblocks) return 0; + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; @@ -1572,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) end = pgofs + maxblocks; next_dnode: - if (map->m_may_create) + if (map->m_may_create) { + if (f2fs_lfs_mode(sbi)) + f2fs_balance_fs(sbi, true); f2fs_map_lock(sbi, flag); + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1589,7 +1593,7 @@ next_dnode: start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); @@ -1603,7 +1607,7 @@ next_block: /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && (is_hole || (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - !f2fs_is_pinned_file(inode)))) { + !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1687,10 +1691,15 @@ next_block: if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; + + if (lfs_dio_write) + map->m_last_pblk = NULL_ADDR; } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { + if (lfs_dio_write && !f2fs_is_pinned_file(inode)) + map->m_last_pblk = blkaddr; goto sync_out; } @@ -1715,14 +1724,6 @@ skip: dn.ofs_in_node = end_offset; } - if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && - map->m_may_create) { - /* the next block to be allocated may not be contiguous. */ - if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == - CAP_BLKS_PER_SEC(sbi) - 1) - goto sync_out; - } - if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -2303,7 +2304,7 @@ submit_and_realloc: } if (!bio) { - bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i, f2fs_ra_op_flags(rac), folio->index, for_write); if (IS_ERR(bio)) { @@ -2376,6 +2377,14 @@ static int f2fs_mpage_readpages(struct inode *inode, unsigned max_nr_pages = nr_pages; int ret = 0; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + index = rac ? readahead_index(rac) : folio->index; + max_nr_pages = round_up(index + nr_pages, cc.cluster_size) - + round_down(index, cc.cluster_size); + } +#endif + map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; @@ -2642,7 +2651,7 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; @@ -2652,7 +2661,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* Use COW inode to make dnode_of_data for atomic write */ atomic_commit = f2fs_is_atomic_file(inode) && - page_private_atomic(folio_page(folio, 0)); + folio_test_f2fs_atomic(folio); if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else @@ -2683,7 +2692,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { folio_clear_uptodate(folio); - clear_page_private_gcing(folio_page(folio, 0)); + folio_clear_f2fs_gcing(folio); goto out_writepage; } got_it: @@ -2753,7 +2762,7 @@ got_it: trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (atomic_commit) - clear_page_private_atomic(folio_page(folio, 0)); + folio_clear_f2fs_atomic(folio); out_writepage: f2fs_put_dnode(&dn); out: @@ -2771,7 +2780,6 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, bool allow_balance) { struct inode *inode = folio->mapping->host; - struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) @@ -2788,7 +2796,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), .old_blkaddr = NULL_ADDR, - .page = page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, @@ -2890,7 +2898,7 @@ out: inode_dec_dirty_pages(inode); if (err) { folio_clear_uptodate(folio); - clear_page_private_gcing(page); + folio_clear_f2fs_gcing(folio); } folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && @@ -3376,7 +3384,7 @@ restart: f2fs_do_read_inline_data(folio, ifolio); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_page_private_inline(&ifolio->page); + folio_set_f2fs_inline(ifolio); goto out; } err = f2fs_convert_inline_folio(&dn, folio); @@ -3698,7 +3706,7 @@ static int f2fs_write_end(const struct kiocb *iocb, folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) - set_page_private_atomic(folio_page(folio, 0)); + folio_set_f2fs_atomic(folio); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { @@ -3733,7 +3741,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) f2fs_remove_dirty_inode(inode); } } - clear_page_private_all(&folio->page); + folio_detach_private(folio); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) @@ -3742,7 +3750,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait) if (folio_test_dirty(folio)) return false; - clear_page_private_all(&folio->page); + folio_detach_private(folio); return true; } @@ -4160,7 +4168,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { - struct f2fs_map_blocks map = {}; + struct f2fs_map_blocks map = { NULL, }; pgoff_t next_pgofs = 0; int err; @@ -4169,6 +4177,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); + if (flags & IOMAP_WRITE && iomap->private) { + map.m_last_pblk = (unsigned long)iomap->private; + iomap->private = NULL; + } /* * If the blocks being overwritten are already allocated, @@ -4207,6 +4219,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); + + if (flags & IOMAP_WRITE && map.m_last_pblk) + iomap->private = (void *)map.m_last_pblk; } else { if (flags & IOMAP_WRITE) return -ENOTBLK; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 16c2dfb4f595..43a83bbd3bc5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); +static DEFINE_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -91,7 +91,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) seg_blks = get_seg_entry(sbi, j)->valid_blocks; /* update segment stats */ - if (IS_CURSEG(sbi, j)) + if (is_curseg(sbi, j)) dev_stats[i].devstats[0][DEVSTAT_INUSE]++; else if (seg_blks == BLKS_PER_SEG(sbi)) dev_stats[i].devstats[0][DEVSTAT_FULL]++; @@ -109,7 +109,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi) sec_blks = get_sec_entry(sbi, j)->valid_blocks; /* update section stats */ - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j))) dev_stats[i].devstats[1][DEVSTAT_INUSE]++; else if (sec_blks == BLKS_PER_SEC(sbi)) dev_stats[i].devstats[1][DEVSTAT_FULL]++; @@ -439,9 +439,8 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_for_each_entry(si, &f2fs_stat_list, stat_list) { struct f2fs_sb_info *sbi = si->sbi; @@ -753,7 +752,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -765,7 +764,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; struct f2fs_dev_stats *dev_stats; - unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -817,9 +815,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_add_tail(&si->stat_list, &f2fs_stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -827,11 +825,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_del(&si->stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); kfree(si->dev_stats); kfree(si); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c36b3b22bfff..fffd7749d6d1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -454,7 +454,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode, f2fs_folio_wait_writeback(ifolio, NODE, true, true); /* copy name info. to this inode folio */ - ri = F2FS_INODE(&ifolio->page); + ri = F2FS_INODE(ifolio); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { @@ -897,7 +897,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, f2fs_clear_page_cache_dirty_tag(folio); folio_clear_dirty_for_io(folio); folio_clear_uptodate(folio); - clear_page_private_all(&folio->page); + folio_detach_private(folio); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index cfe925a3d555..199c1e7a83ef 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,10 +19,10 @@ #include "node.h" #include <trace/events/f2fs.h> -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_info ei; int devi; @@ -411,10 +411,10 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; - struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_tree *et; struct extent_node *en; - struct extent_info ei; + struct extent_info ei = {0}; if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ @@ -934,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ if (!__may_extent_tree(dn->inode, type)) return; - ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + dn->ofs_in_node; ei.len = 1; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c78464792ceb..46be7560548c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -386,7 +386,7 @@ struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ struct discard_info di; /* discard info */ struct list_head list; /* command list */ - struct completion wait; /* compleation */ + struct completion wait; /* completion */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ @@ -732,6 +732,7 @@ struct f2fs_map_blocks { block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; @@ -875,6 +876,7 @@ struct f2fs_inode_info { /* linked in global inode list for cache donation */ struct list_head gdonate_list; pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */ struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; @@ -1123,8 +1125,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p, f) \ - (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(folio, f) \ + (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -1240,7 +1242,10 @@ struct f2fs_io_info { blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ - struct page *page; /* page to be written */ + union { + struct page *page; /* page to be written */ + struct folio *folio; + }; struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ @@ -1286,7 +1291,7 @@ struct f2fs_bio_info { struct f2fs_dev_info { struct file *bdev_file; struct block_device *bdev; - char path[MAX_PATH_LEN]; + char path[MAX_PATH_LEN + 1]; unsigned int total_segments; block_t start_blk; block_t end_blk; @@ -1427,7 +1432,7 @@ enum { enum { MEMORY_MODE_NORMAL, /* memory mode for normal devices */ - MEMORY_MODE_LOW, /* memory mode for low memry devices */ + MEMORY_MODE_LOW, /* memory mode for low memory devices */ }; enum errors_option { @@ -1491,7 +1496,7 @@ enum compress_flag { #define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* compressed data chksum */ + __le32 chksum; /* compressed data checksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; @@ -1536,6 +1541,7 @@ struct compress_io_ctx { struct decompress_io_ctx { u32 magic; /* magic number to indicate page is compressed */ struct inode *inode; /* inode the context belong to */ + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ pgoff_t cluster_idx; /* cluster index number */ unsigned int cluster_size; /* page count in cluster */ unsigned int log_cluster_size; /* log of cluster size */ @@ -1576,6 +1582,7 @@ struct decompress_io_ctx { bool failed; /* IO error occurred before decompression? */ bool need_verity; /* need fs-verity verification after decompression? */ + unsigned char compress_algorithm; /* backup algorithm type */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ @@ -1724,6 +1731,9 @@ struct f2fs_sb_info { /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + /* threshold for gc trials on pinned files */ unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; @@ -2013,16 +2023,11 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } -static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) { return F2FS_M_SB(folio->mapping); } -static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) -{ - return F2FS_F_SB(page_folio(page)); -} - static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) { return (struct f2fs_super_block *)(sbi->raw_super); @@ -2043,14 +2048,14 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(const struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct folio *folio) { - return (struct f2fs_node *)page_address(page); + return (struct f2fs_node *)folio_address(folio); } -static inline struct f2fs_inode *F2FS_INODE(struct page *page) +static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) { - return &((struct f2fs_node *)page_address(page))->i; + return &((struct f2fs_node *)folio_address(folio))->i; } static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) @@ -2453,6 +2458,13 @@ release_quota: } #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool folio_test_f2fs_##name(const struct folio *folio) \ +{ \ + unsigned long priv = (unsigned long)folio->private; \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + return (priv & v) == v; \ +} \ static inline bool page_private_##name(struct page *page) \ { \ return PagePrivate(page) && \ @@ -2461,6 +2473,17 @@ static inline bool page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void folio_set_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + if (!folio->private) \ + folio_attach_private(folio, (void *)v); \ + else { \ + v |= (unsigned long)folio->private; \ + folio->private = (void *)v; \ + } \ +} \ static inline void set_page_private_##name(struct page *page) \ { \ if (!PagePrivate(page)) \ @@ -2470,6 +2493,16 @@ static inline void set_page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void folio_clear_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (unsigned long)folio->private; \ + \ + v &= ~(1UL << PAGE_PRIVATE_##flagname); \ + if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \ + folio_detach_private(folio); \ + else \ + folio->private = (void *)v; \ +} \ static inline void clear_page_private_##name(struct page *page) \ { \ clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ @@ -2492,39 +2525,23 @@ PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); -static inline unsigned long get_page_private_data(struct page *page) +static inline unsigned long folio_get_f2fs_data(struct folio *folio) { - unsigned long data = page_private(page); + unsigned long data = (unsigned long)folio->private; if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) return 0; return data >> PAGE_PRIVATE_MAX; } -static inline void set_page_private_data(struct page *page, unsigned long data) +static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) { - if (!PagePrivate(page)) - attach_page_private(page, (void *)0); - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); - page_private(page) |= data << PAGE_PRIVATE_MAX; -} - -static inline void clear_page_private_data(struct page *page) -{ - page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0); - if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) - detach_page_private(page); -} + data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX); -static inline void clear_page_private_all(struct page *page) -{ - clear_page_private_data(page); - clear_page_private_reference(page); - clear_page_private_gcing(page); - clear_page_private_inline(page); - clear_page_private_atomic(page); - - f2fs_bug_on(F2FS_P_SB(page), page_private(page)); + if (!folio_test_private(folio)) + folio_attach_private(folio, (void *)data); + else + folio->private = (void *)((unsigned long)folio->private | data); } static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, @@ -3011,9 +3028,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) -static inline bool IS_INODE(struct page *page) +static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(page); + struct f2fs_node *p = F2FS_NODE(folio); return RAW_IS_INODE(p); } @@ -3031,20 +3048,20 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) static inline int f2fs_has_extra_attr(struct inode *inode); static inline unsigned int get_dnode_base(struct inode *inode, - struct page *node_page) + struct folio *node_folio) { - if (!IS_INODE(node_page)) + if (!IS_INODE(node_folio)) return 0; return inode ? get_extra_isize(inode) : - offset_in_addr(&F2FS_NODE(node_page)->i); + offset_in_addr(&F2FS_NODE(node_folio)->i); } static inline __le32 *get_dnode_addr(struct inode *inode, struct folio *node_folio) { - return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + - get_dnode_base(inode, &node_folio->page); + return blkaddr_in_node(F2FS_NODE(node_folio)) + + get_dnode_base(inode, node_folio); } static inline block_t data_blkaddr(struct inode *inode, @@ -3366,9 +3383,10 @@ static inline unsigned int addrs_per_page(struct inode *inode, return addrs; } -static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio) +static inline +void *inline_xattr_addr(struct inode *inode, const struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(&folio->page); + struct f2fs_inode *ri = F2FS_INODE(folio); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)]); @@ -3628,13 +3646,14 @@ int f2fs_pin_file_control(struct inode *inode, bool inc); */ void f2fs_set_inode_flags(struct inode *inode); bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_update_inode(struct inode *inode, struct folio *node_folio); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); void f2fs_handle_failed_inode(struct inode *inode); @@ -3784,8 +3803,8 @@ void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); -int f2fs_recover_xattr_data(struct inode *inode, struct page *page); -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); @@ -3852,7 +3871,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, bool recover_newaddr); enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, enum log_type seg_type); -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); @@ -3886,7 +3905,7 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, static inline struct inode *fio_inode(struct f2fs_io_info *fio) { - return page_folio(fio->page)->mapping->host; + return fio->folio->mapping->host; } #define DEF_FRAGMENT_SIZE 4 @@ -3953,7 +3972,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -bool f2fs_is_cp_guaranteed(struct page *page); +bool f2fs_is_cp_guaranteed(const struct folio *folio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, @@ -3961,7 +3980,7 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, struct bio **bio, struct folio *folio); @@ -4303,7 +4322,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, @@ -4345,7 +4364,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); @@ -4435,20 +4454,20 @@ enum cluster_check_type { CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ }; -bool f2fs_is_compressed_page(struct page *page); +bool f2fs_is_compressed_page(struct folio *folio); struct folio *f2fs_compress_control_folio(struct folio *folio); int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); -void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio); bool f2fs_is_compress_backend_ready(struct inode *inode); bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); @@ -4486,8 +4505,6 @@ void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len); -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr); bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr); void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); @@ -4504,7 +4521,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); sbi->compr_saved_block += diff; \ } while (0) #else -static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) { if (!f2fs_compressed_file(inode)) @@ -4522,7 +4539,7 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { } -static inline void f2fs_end_read_compressed_page(struct page *page, +static inline void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); @@ -4542,8 +4559,6 @@ static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int len) { } -static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, nid_t ino, block_t blkaddr) { } static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr) { return false; } static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c677230699fd..42faaed6a02d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -489,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -629,7 +629,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - return finish_preallocate_blocks(inode); + err = finish_preallocate_blocks(inode); + if (!err) + atomic_inc(&F2FS_I(inode)->open_count); + return err; } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -708,7 +711,7 @@ next: * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); @@ -815,12 +818,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + count = ADDRS_PER_PAGE(dn.node_folio, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) { + if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } @@ -1043,11 +1046,24 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = setattr_prepare(idmap, dentry, attr); + if (err) + return err; + + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; @@ -1064,20 +1080,19 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, !IS_ALIGNED(attr->ia_size, F2FS_BLK_TO_BYTES(fi->i_cluster_size))) return -EINVAL; + /* + * To prevent scattered pin block generation, we don't allow + * smaller/equal size unaligned truncation for pinned file. + * We only support overwrite IO to pinned file, so don't + * care about larger size truncation. + */ + if (f2fs_is_pinned_file(inode) && + attr->ia_size <= i_size_read(inode) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) + return -EINVAL; } - err = setattr_prepare(idmap, dentry, attr); - if (err) - return err; - - err = fscrypt_prepare_setattr(dentry, attr); - if (err) - return err; - - err = fsverity_prepare_setattr(dentry, attr); - if (err) - return err; - if (is_quota_modification(idmap, inode, attr)) { err = f2fs_dquot_initialize(inode); if (err) @@ -1085,12 +1100,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (i_uid_needs_update(idmap, attr, inode) || i_gid_needs_update(idmap, attr, inode)) { - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); err = dquot_transfer(idmap, inode, attr); if (err) { - set_sbi_flag(F2FS_I_SB(inode), - SBI_QUOTA_NEED_REPAIR); - f2fs_unlock_op(F2FS_I_SB(inode)); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(sbi); return err; } /* @@ -1100,7 +1114,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); } if (attr->ia_valid & ATTR_SIZE) { @@ -1163,7 +1177,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); return err; } @@ -1223,7 +1237,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1322,7 +1336,7 @@ next_dnode: goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1411,7 +1425,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) - + ADDRS_PER_PAGE(dn.node_folio, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1453,7 +1467,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); folio_mark_dirty(fdst); - set_page_private_gcing(&fdst->page); + folio_set_f2fs_gcing(fdst); f2fs_folio_put(fdst, true); f2fs_folio_put(fsrc, true); @@ -1707,7 +1721,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -1888,9 +1902,8 @@ next_alloc: } } - if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? - ZONED_PIN_SEC_REQUIRED_COUNT : - GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + if (has_not_enough_free_secs(sbi, 0, + sbi->reserved_pin_section)) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); @@ -2028,6 +2041,9 @@ out: static int f2fs_release_file(struct inode *inode, struct file *filp) { + if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) + f2fs_remove_donate_inode(inode); + /* * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. @@ -2978,7 +2994,7 @@ do_map: f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); f2fs_folio_put(folio, true); idx++; @@ -3876,7 +3892,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4054,7 +4070,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4218,7 +4234,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; @@ -4415,7 +4431,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) f2fs_folio_wait_writeback(folio, DATA, true, true); folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); redirty_idx = folio_next_index(folio); folio_unlock(folio); folio_put_refs(folio, 2); @@ -4825,6 +4841,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4833,12 +4850,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + dio = f2fs_should_use_dio(inode, iocb, to); + /* In LFS mode, if there is inflight dio, wait for its completion */ if (f2fs_lfs_mode(F2FS_I_SB(inode)) && - get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + (!f2fs_is_pinned_file(inode) || !dio)) inode_dio_wait(inode); - if (f2fs_should_use_dio(inode, iocb, to)) { + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); @@ -4846,8 +4866,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); } - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -4870,8 +4889,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -5216,8 +5234,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_dio_write_iter(iocb, from, &may_need_sync) : f2fs_buffered_write_iter(iocb, from); - if (trace_f2fs_datawrite_end_enabled()) - trace_f2fs_datawrite_end(inode, orig_pos, ret); + trace_f2fs_datawrite_end(inode, orig_pos, ret); } /* Don't leave any preallocated blocks around past i_size. */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3cb5242f4ddf..098e9f71421e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -141,10 +141,10 @@ do_gc: FOREGROUND : BACKGROUND); sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || - gc_control.one_time; + (gc_control.one_time && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ - if (foreground) + if (foreground && !f2fs_sb_has_blkzoned(sbi)) sync_mode = false; gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC; @@ -197,6 +197,8 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; + gc_th->boost_gc_greedy = GC_GREEDY; if (f2fs_sb_has_blkzoned(sbi)) { gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; @@ -278,12 +280,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode == SSR) { - p->gc_mode = GC_GREEDY; - p->dirty_bitmap = dirty_i->dirty_segmap[type]; - p->max_search = dirty_i->nr_dirty[type]; - p->ofs_unit = 1; - } else if (p->alloc_mode == AT_SSR) { + if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) { p->gc_mode = GC_GREEDY; p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; @@ -389,14 +386,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) } static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) + unsigned int segno, struct victim_sel_policy *p, + unsigned int valid_thresh_ratio) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; - if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= - CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / - 100)) + if (p->one_time_gc && (valid_thresh_ratio < 100) && + (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100)) return UINT_MAX; /* alloc_mode == LFS */ @@ -777,6 +775,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; + unsigned int valid_thresh_ratio = 100; bool is_atgc; int ret = 0; @@ -786,7 +785,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; - p.one_time_gc = one_time; + if (one_time) { + p.one_time_gc = one_time; + if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG)) + valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio; + } retry: select_policy(sbi, gc_type, type, &p); @@ -912,7 +915,7 @@ retry: goto next; } - cost = get_gc_cost(sbi, segno, &p); + cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio); if (p.min_cost > cost) { p.min_segno = segno; @@ -1162,8 +1165,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - if (IS_INODE(&node_folio->page)) { - base = offset_in_addr(F2FS_INODE(&node_folio->page)); + if (IS_INODE(node_folio)) { + base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { base = 0; @@ -1177,7 +1180,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - *nofs = ofs_of_node(&node_folio->page); + *nofs = ofs_of_node(node_folio); source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node); f2fs_folio_put(node_folio, true); @@ -1249,7 +1252,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } got_it: /* read folio */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; /* @@ -1353,7 +1356,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_out; /* read page */ - fio.page = &folio->page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) @@ -1473,7 +1476,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; } folio_mark_dirty(folio); - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1483,7 +1486,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .need_lock = LOCK_REQ, .io_type = FS_GC_DATA_IO, @@ -1499,11 +1502,11 @@ retry: f2fs_remove_dirty_inode(inode); } - set_page_private_gcing(&folio->page); + folio_set_f2fs_gcing(folio); err = f2fs_do_write_data_page(&fio); if (err) { - clear_page_private_gcing(&folio->page); + folio_clear_f2fs_gcing(folio); if (err == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto retry; @@ -1749,7 +1752,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, !has_enough_free_blocks(sbi, sbi->gc_thread->boost_zoned_gc_percent)) window_granularity *= - BOOST_GC_MULTIPLE; + sbi->gc_thread->boost_gc_multiple; end_segno = start_segno + window_granularity; } @@ -1891,6 +1894,7 @@ gc_more: /* Let's run FG_GC, if we don't have enough space. */ if (has_not_enough_free_secs(sbi, 0, 0)) { gc_type = FG_GC; + gc_control->one_time = false; /* * For example, if there are many prefree_segments below given @@ -2064,7 +2068,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) continue; do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5c1eaf55e127..24e8b1c27acc 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -68,6 +68,8 @@ struct f2fs_gc_kthread { unsigned int no_zoned_gc_percent; unsigned int boost_zoned_gc_percent; unsigned int valid_thresh_ratio; + unsigned int boost_gc_multiple; + unsigned int boost_gc_greedy; }; struct gc_inode_list { @@ -194,6 +196,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) - return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent); return has_enough_invalid_blocks(sbi); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 901c630685ce..58ac831ef704 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -33,9 +33,9 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -static bool inode_has_blocks(struct inode *inode, struct page *ipage) +static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) { - struct f2fs_inode *ri = F2FS_INODE(ipage); + struct f2fs_inode *ri = F2FS_INODE(ifolio); int i; if (F2FS_HAS_BLOCKS(inode)) @@ -48,12 +48,12 @@ static bool inode_has_blocks(struct inode *inode, struct page *ipage) return false; } -bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) { if (!f2fs_has_inline_data(inode)) return false; - if (inode_has_blocks(inode, ipage)) + if (inode_has_blocks(inode, ifolio)) return false; if (!support_inline_data(inode)) @@ -150,7 +150,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .io_type = FS_DATA_IO, }; @@ -206,7 +206,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) /* clear inline data and flag after data writeback */ f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0); - clear_page_private_inline(&dn->inode_folio->page); + folio_clear_f2fs_inline(dn->inode_folio); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -286,7 +286,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio) set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(&ifolio->page); + folio_clear_f2fs_inline(ifolio); f2fs_folio_put(ifolio, 1); return 0; } @@ -305,8 +305,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ - if (IS_INODE(&nfolio->page)) - ri = F2FS_INODE(&nfolio->page); + if (IS_INODE(nfolio)) + ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { @@ -825,7 +825,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(&ifolio->page); + (char *)F2FS_INODE(ifolio); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 083d52a42bfb..8c4eafe9ffac 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -108,7 +108,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) f2fs_folio_wait_writeback(ifolio, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); - set_raw_inline(inode, F2FS_INODE(&ifolio->page)); + set_raw_inline(inode, F2FS_INODE(ifolio)); folio_mark_dirty(ifolio); return; } @@ -116,14 +116,15 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio) return; } -static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static +bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), @@ -133,9 +134,9 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page return true; } -static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_node *node = F2FS_NODE(folio); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -164,34 +165,34 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) return true; #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_enable_inode_chksum(sbi, &folio->page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) #else - if (!f2fs_enable_inode_chksum(sbi, &folio->page) || + if (!f2fs_enable_inode_chksum(sbi, folio) || folio_test_dirty(folio) || folio_test_writeback(folio)) #endif return true; - ri = &F2FS_NODE(&folio->page)->i; + ri = &F2FS_NODE(folio)->i; provided = le32_to_cpu(ri->i_inode_checksum); - calculated = f2fs_inode_chksum(sbi, &folio->page); + calculated = f2fs_inode_chksum(sbi, folio); if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - folio->index, ino_of_node(&folio->page), + folio->index, ino_of_node(folio), provided, calculated); return provided == calculated; } -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio)); } static bool sanity_check_compress_inode(struct inode *inode, @@ -266,28 +267,28 @@ err_level: return false; } -static bool sanity_check_inode(struct inode *inode, struct page *node_page) +static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(node_page); + struct f2fs_inode *ri = F2FS_INODE(node_folio); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks); if (!iblocks) { f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); return false; } - if (ino_of_node(node_page) != nid_of_node(node_page)) { + if (ino_of_node(node_folio) != nid_of_node(node_folio)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + ino_of_node(node_folio), nid_of_node(node_folio)); return false; } - if (ino_of_node(node_page) == fi->i_xattr_nid) { + if (ino_of_node(node_folio) == fi->i_xattr_nid) { f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", __func__, inode->i_ino, fi->i_xattr_nid); return false; @@ -354,7 +355,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode, node_page)) { + if (f2fs_sanity_check_inline_data(inode, node_folio)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; @@ -419,7 +420,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -469,7 +470,7 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } - if (!sanity_check_inode(inode, &node_folio->page)) { + if (!sanity_check_inode(inode, node_folio)) { f2fs_folio_put(node_folio, true); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); @@ -481,9 +482,9 @@ static int do_read_inode(struct inode *inode) __recover_inline_status(inode, node_folio); /* try to recover cold bit for non-dir inode */ - if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) { f2fs_folio_wait_writeback(node_folio, NODE, true, true); - set_cold_node(&node_folio->page, false); + set_cold_node(node_folio, false); folio_mark_dirty(node_folio); } @@ -531,7 +532,7 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - if (!sanity_check_extent_cache(inode, &node_folio->page)) { + if (!sanity_check_extent_cache(inode, node_folio)) { f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; @@ -669,7 +670,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) f2fs_inode_synced(inode); - ri = F2FS_INODE(&node_folio->page); + ri = F2FS_INODE(node_folio); ri->i_mode = cpu_to_le16(inode->i_mode); ri->i_advise = fi->i_advise; @@ -748,11 +749,11 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) /* deleted inode */ if (inode->i_nlink == 0) - clear_page_private_inline(&node_folio->page); + folio_clear_f2fs_inline(node_folio); init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page); + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio); #endif } @@ -820,7 +821,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } -static void f2fs_remove_donate_inode(struct inode *inode) +void f2fs_remove_donate_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -933,6 +934,19 @@ retry: f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + /* + * If both f2fs_truncate() and f2fs_update_inode_page() failed + * due to fuzzed corrupted inode, call f2fs_inode_synced() to + * avoid triggering later f2fs_bug_on(). + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(sbi, + "f2fs_evict_inode: inode is dirty, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } } if (freeze_protected) sb_end_intwrite(inode->i_sb); @@ -949,8 +963,12 @@ no_delete: if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); - else - f2fs_inode_synced(inode); + + /* + * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META] + * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint. + */ + f2fs_inode_synced(inode); /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 07e333ee21b7..b882771e4699 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1298,19 +1298,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; const char *target; if (!dentry) return ERR_PTR(-ECHILD); - page = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + folio = read_mapping_folio(inode->i_mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); - target = fscrypt_get_symlink(inode, page_address(page), + target = fscrypt_get_symlink(inode, folio_address(folio), inode->i_sb->s_blocksize, done); - put_page(page); + folio_put(folio); return target; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bfe104db284e..27743b93e186 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -135,7 +135,7 @@ static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); } -static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { struct folio *src_folio; struct folio *dst_folio; @@ -149,7 +149,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_folio = get_current_nat_folio(sbi, nid); if (IS_ERR(src_folio)) - return &src_folio->page; + return src_folio; dst_folio = f2fs_grab_meta_folio(sbi, dst_off); f2fs_bug_on(sbi, folio_test_dirty(src_folio)); @@ -161,7 +161,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) set_to_next_nat(nm_i, nid); - return &dst_folio->page; + return dst_folio; } static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, @@ -185,7 +185,7 @@ static void __free_nat_entry(struct nat_entry *e) /* must be locked by nat_tree_lock */ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, - struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty) { if (no_fail) f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); @@ -195,6 +195,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + if (init_dirty) { + INIT_LIST_HEAD(&ne->list); + nm_i->nat_cnt[TOTAL_NAT]++; + return ne; + } + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); @@ -204,14 +210,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, return ne; } -static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty) { struct nat_entry *ne; ne = radix_tree_lookup(&nm_i->nat_root, n); - /* for recent accessed nat entry, move it to tail of lru list */ - if (ne && !get_nat_flag(ne, IS_DIRTY)) { + /* + * for recent accessed nat entry which will not be dirtied soon + * later, move it to tail of lru list. + */ + if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) { spin_lock(&nm_i->nat_list_lock); if (!list_empty(&ne->list)) list_move_tail(&ne->list, &nm_i->nat_entries); @@ -256,7 +265,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry *ne, bool init_dirty) { struct nat_entry_set *head; bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; @@ -279,7 +288,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, goto refresh_list; nm_i->nat_cnt[DIRTY_NAT]++; - nm_i->nat_cnt[RECLAIMABLE_NAT]--; + if (!init_dirty) + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -312,8 +322,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { - return is_node_folio(folio) && IS_DNODE(&folio->page) && - is_cold_node(&folio->page); + return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -384,7 +393,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) bool need = false; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) @@ -401,7 +410,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) bool is_cp = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; f2fs_up_read(&nm_i->nat_tree_lock); @@ -415,7 +424,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) bool need_update = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); + e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) @@ -440,9 +449,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, return; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (!e) - e = __init_nat_entry(nm_i, new, ne, false); + e = __init_nat_entry(nm_i, new, ne, false, false); else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != @@ -459,11 +468,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); + bool init_dirty = false; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ni->nid); + e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { - e = __init_nat_entry(nm_i, new, NULL, true); + init_dirty = true; + e = __init_nat_entry(nm_i, new, NULL, true, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -499,11 +510,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, nat_set_blkaddr(e, new_blkaddr); if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); - __set_nat_cache_dirty(nm_i, e); + __set_nat_cache_dirty(nm_i, e, init_dirty); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) - e = __lookup_nat_cache(nm_i, ni->ino); + e = __lookup_nat_cache(nm_i, ni->ino, false); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); @@ -555,20 +566,24 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; - block_t blkaddr; int i; + bool need_cache = true; ni->flag = 0; ni->nid = nid; retry: /* Check nat cache */ f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); f2fs_up_read(&nm_i->nat_tree_lock); + if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) { + need_cache = false; + goto sanity_check; + } return 0; } @@ -594,7 +609,7 @@ retry: up_read(&curseg->journal_rwsem); if (i >= 0) { f2fs_up_read(&nm_i->nat_tree_lock); - goto cache; + goto sanity_check; } /* Fill node_info from nat page */ @@ -609,14 +624,23 @@ retry: ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_folio_put(folio, true); -cache: - blkaddr = le32_to_cpu(ne.block_addr); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; +sanity_check: + if (__is_valid_data_blkaddr(ni->blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni->blk_addr, + DATA_GENERIC_ENHANCE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "f2fs_get_node_info of %pS: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + __builtin_return_address(0), + ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } /* cache nat entry */ - cache_nat_entry(sbi, nid, &ne); + if (need_cache) + cache_nat_entry(sbi, nid, &ne); return 0; } @@ -636,7 +660,7 @@ static void f2fs_ra_node_pages(struct folio *parent, int start, int n) end = start + n; end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { - nid = get_nid(&parent->page, i, false); + nid = get_nid(parent, i, false); f2fs_ra_node_page(sbi, nid); } @@ -795,7 +819,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) parent = nfolio[0]; if (level != 0) - nids[1] = get_nid(&parent->page, offset[0], true); + nids[1] = get_nid(parent, offset[0], true); dn->inode_folio = nfolio[0]; dn->inode_folio_locked = true; @@ -803,6 +827,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) for (i = 1; i <= level; i++) { bool done = false; + if (nids[i] && nids[i] == dn->inode->i_ino) { + err = -EFSCORRUPTED; + f2fs_err_ratelimited(sbi, + "inode mapping table is corrupted, run fsck to fix it, " + "ino:%lu, nid:%u, level:%d, offset:%d", + dn->inode->i_ino, nids[i], level, offset[level]); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto release_pages; + } + if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ if (!f2fs_alloc_nid(sbi, &(nids[i]))) { @@ -846,7 +880,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } if (i < level) { parent = nfolio[i]; - nids[i + 1] = get_nid(&parent->page, offset[i], false); + nids[i + 1] = get_nid(parent, offset[i], false); } } dn->nid = nids[level]; @@ -961,9 +995,9 @@ static int truncate_dnode(struct dnode_of_data *dn) else if (IS_ERR(folio)) return PTR_ERR(folio); - if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) { + if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", - dn->inode->i_ino, dn->nid, ino_of_node(&folio->page)); + dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); f2fs_folio_put(folio, true); @@ -1007,7 +1041,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -1070,7 +1104,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, int i; int idx = depth - 2; - nid[0] = get_nid(&dn->inode_folio->page, offset[0], true); + nid[0] = get_nid(dn->inode_folio, offset[0], true); if (!nid[0]) return 0; @@ -1083,14 +1117,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, idx = i - 1; goto fail; } - nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false); + nid[i + 1] = get_nid(folios[i], offset[i + 1], false); } f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(&folios[idx]->page, i, false); + child_nid = get_nid(folios[idx], i, false); if (!child_nid) continue; dn->nid = child_nid; @@ -1159,7 +1193,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) set_new_dnode(&dn, inode, folio, NULL, 0); folio_unlock(folio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -1188,7 +1222,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = get_nid(&folio->page, offset[0], true); + dn.nid = get_nid(folio, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1220,7 +1254,7 @@ skip_partial: } if (err < 0) goto fail; - if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) { + if (offset[1] == 0 && get_nid(folio, offset[0], true)) { folio_lock(folio); BUG_ON(!is_node_folio(folio)); set_nid(folio, offset[0], 0, true); @@ -1367,8 +1401,8 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) set_node_addr(sbi, &new_ni, NEW_ADDR, false); f2fs_folio_wait_writeback(folio, NODE, true, true); - fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode)); + fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); if (folio_mark_dirty(folio)) @@ -1400,7 +1434,7 @@ static int read_node_folio(struct folio *folio, blk_opf_t op_flags) .type = NODE, .op = REQ_OP_READ, .op_flags = op_flags, - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, }; int err; @@ -1462,17 +1496,15 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi, struct folio *folio, pgoff_t nid, enum node_type ntype) { - struct page *page = &folio->page; - - if (unlikely(nid != nid_of_node(page) || - (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || + if (unlikely(nid != nid_of_node(folio) || + (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) || (ntype == NODE_TYPE_XATTR && - !f2fs_has_xattr_block(ofs_of_node(page))) || + !f2fs_has_xattr_block(ofs_of_node(folio))) || time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - ntype, nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), + ntype, nid, nid_of_node(folio), ino_of_node(folio), + ofs_of_node(folio), cpver_of_node(folio), next_blkaddr_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); @@ -1553,7 +1585,7 @@ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) { struct f2fs_sb_info *sbi = F2FS_F_SB(parent); - nid_t nid = get_nid(&parent->page, start, false); + nid_t nid = get_nid(parent, start, false); return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); } @@ -1618,9 +1650,9 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return ERR_PTR(-EIO); } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1630,7 +1662,7 @@ continue_unlock: folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio)) { @@ -1660,11 +1692,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, - .ino = ino_of_node(&folio->page), + .ino = ino_of_node(folio), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), - .page = &folio->page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .io_type = io_type, @@ -1689,11 +1721,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(&folio->page) && is_cold_node(&folio->page)) + IS_DNODE(folio) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ - nid = nid_of_node(&folio->page); + nid = nid_of_node(folio); f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) @@ -1731,7 +1763,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page)); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio)); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); @@ -1827,9 +1859,9 @@ retry: goto out; } - if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) continue; folio_lock(folio); @@ -1839,7 +1871,7 @@ continue_unlock: folio_unlock(folio); continue; } - if (ino_of_node(&folio->page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; if (!folio_test_dirty(folio) && folio != last_folio) { @@ -1849,17 +1881,17 @@ continue_unlock: f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_fsync_mark(folio, 0); + set_dentry_mark(folio, 0); if (!atomic || folio == last_folio) { - set_fsync_mark(&folio->page, 1); + set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - set_dentry_mark(&folio->page, + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ @@ -1935,7 +1967,7 @@ static bool flush_dirty_inode(struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct inode *inode; - nid_t ino = ino_of_node(&folio->page); + nid_t ino = ino_of_node(folio); inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); if (!inode) @@ -1964,7 +1996,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; - if (!IS_INODE(&folio->page)) + if (!IS_INODE(folio)) continue; folio_lock(folio); @@ -1975,10 +2007,10 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) goto unlock; /* flush inline_data, if it's async context. */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); continue; } unlock: @@ -2027,13 +2059,13 @@ next_step: * 1. dentry dnodes * 2. file dnodes */ - if (step == 0 && IS_DNODE(&folio->page)) + if (step == 0 && IS_DNODE(folio)) continue; - if (step == 1 && (!IS_DNODE(&folio->page) || - is_cold_node(&folio->page))) + if (step == 1 && (!IS_DNODE(folio) || + is_cold_node(folio))) continue; - if (step == 2 && (!IS_DNODE(&folio->page) || - !is_cold_node(&folio->page))) + if (step == 2 && (!IS_DNODE(folio) || + !is_cold_node(folio))) continue; lock_node: if (wbc->sync_mode == WB_SYNC_ALL) @@ -2057,15 +2089,15 @@ continue_unlock: goto write_node; /* flush inline_data */ - if (page_private_inline(&folio->page)) { - clear_page_private_inline(&folio->page); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); folio_unlock(folio); - flush_inline_data(sbi, ino_of_node(&folio->page)); + flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; } /* flush dirty inode */ - if (IS_INODE(&folio->page) && flush_dirty_inode(folio)) + if (IS_INODE(folio) && flush_dirty_inode(folio)) goto lock_node; write_node: f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -2073,8 +2105,8 @@ write_node: if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(&folio->page, 0); - set_dentry_mark(&folio->page, 0); + set_fsync_mark(folio, 0); + set_dentry_mark(folio, 0); if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { @@ -2201,12 +2233,12 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS - if (IS_INODE(&folio->page)) - f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page); + if (IS_INODE(folio)) + f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; @@ -2351,7 +2383,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * - __remove_nid_from_list(PREALLOC_NID) * - __insert_nid_to_list(FREE_NID) */ - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, false); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) goto err_out; @@ -2714,7 +2746,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) if (IS_ERR(ifolio)) return PTR_ERR(ifolio); - ri = F2FS_INODE(&folio->page); + ri = F2FS_INODE(folio); if (ri->i_inline & F2FS_INLINE_XATTR) { if (!f2fs_has_inline_xattr(inode)) { set_inode_flag(inode, FI_INLINE_XATTR); @@ -2740,7 +2772,7 @@ update_inode: return 0; } -int f2fs_recover_xattr_data(struct inode *inode, struct page *page) +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; @@ -2778,8 +2810,8 @@ recover_xnid: f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - if (page) { - memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page), + if (folio) { + memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio), VALID_XATTR_BLOCK_SIZE); folio_mark_dirty(xfolio); } @@ -2788,10 +2820,10 @@ recover_xnid: return 0; } -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(folio); struct node_info old_ni, new_ni; struct folio *ifolio; int err; @@ -2814,11 +2846,11 @@ retry: if (!folio_test_uptodate(ifolio)) folio_mark_uptodate(ifolio); - fill_node_footer(&ifolio->page, ino, ino, 0, true); - set_cold_node(&ifolio->page, false); + fill_node_footer(ifolio, ino, ino, 0, true); + set_cold_node(ifolio, false); - src = F2FS_INODE(page); - dst = F2FS_INODE(&ifolio->page); + src = F2FS_INODE(folio); + dst = F2FS_INODE(ifolio); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; @@ -2884,7 +2916,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, if (IS_ERR(folio)) return PTR_ERR(folio); - rn = F2FS_NODE(&folio->page); + rn = F2FS_NODE(folio); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; @@ -2904,6 +2936,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; int i; + bool init_dirty; down_write(&curseg->journal_rwsem); for (i = 0; i < nats_in_cursum(journal); i++) { @@ -2914,12 +2947,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) if (f2fs_check_nid_range(sbi, nid)) continue; + init_dirty = false; + raw_ne = nat_in_journal(journal, i); - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { + init_dirty = true; ne = __alloc_nat_entry(sbi, nid, true); - __init_nat_entry(nm_i, ne, &raw_ne, true); + __init_nat_entry(nm_i, ne, &raw_ne, true, true); } /* @@ -2934,7 +2970,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); } - __set_nat_cache_dirty(nm_i, ne); + __set_nat_cache_dirty(nm_i, ne, init_dirty); } update_nats_in_cursum(journal, -i); up_write(&curseg->journal_rwsem); @@ -2959,11 +2995,10 @@ add_out: } static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, - struct page *page) + const struct f2fs_nat_block *nat_blk) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; - struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; int i = 0; @@ -3000,7 +3035,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, bool to_journal = true; struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; - struct page *page = NULL; + struct folio *folio = NULL; /* * there are two steps to flush nat entries: @@ -3014,11 +3049,11 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_nat_page(sbi, start_nid); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = get_next_nat_folio(sbi, start_nid); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = page_address(page); + nat_blk = folio_address(folio); f2fs_bug_on(sbi, !nat_blk); } @@ -3054,8 +3089,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - __update_nat_bits(sbi, start_nid, page); - f2fs_put_page(page, 1); + __update_nat_bits(sbi, start_nid, nat_blk); + f2fs_folio_put(folio, true); } /* Allow dirty nats by node block allocation in write_begin */ @@ -3395,10 +3430,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) } kvfree(nm_i->free_nid_count); - kvfree(nm_i->nat_bitmap); + kfree(nm_i->nat_bitmap); kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(nm_i->nat_bitmap_mir); + kfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1446c433b3ec..030390543b54 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,7 +31,7 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 -/* control total # of node writes used for roll-fowrad recovery */ +/* control total # of node writes used for roll-forward recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ @@ -243,41 +243,41 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) #endif } -static inline nid_t ino_of_node(struct page *node_page) +static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.ino); } -static inline nid_t nid_of_node(struct page *node_page) +static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.nid); } -static inline unsigned int ofs_of_node(const struct page *node_page) +static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(struct page *node_page) +static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le64_to_cpu(rn->footer.cp_ver); } -static inline block_t next_blkaddr_of_node(struct folio *node_folio) +static inline block_t next_blkaddr_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(&node_folio->page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.next_blkaddr); } -static inline void fill_node_footer(struct page *page, nid_t nid, +static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int old_flag = 0; if (reset) @@ -293,17 +293,18 @@ static inline void fill_node_footer(struct page *page, nid_t nid, (old_flag & OFFSET_BIT_MASK)); } -static inline void copy_node_footer(struct page *dst, struct page *src) +static inline void copy_node_footer(const struct folio *dst, + const struct folio *src) { struct f2fs_node *src_rn = F2FS_NODE(src); struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } -static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); + struct f2fs_node *rn = F2FS_NODE(folio); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) @@ -313,19 +314,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } -static inline bool is_recoverable_dnode(struct page *page) +static inline bool is_recoverable_dnode(const struct folio *folio) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(page) << 32); + return (cp_ver << 32) == (cpver_of_node(folio) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(page); + return cp_ver == cpver_of_node(folio); } /* @@ -349,9 +350,9 @@ static inline bool is_recoverable_dnode(struct page *page) * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ -static inline bool IS_DNODE(const struct page *node_page) +static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(node_page); + unsigned int ofs = ofs_of_node(node_folio); if (f2fs_has_xattr_block(ofs)) return true; @@ -369,7 +370,7 @@ static inline bool IS_DNODE(const struct page *node_page) static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) { - struct f2fs_node *rn = F2FS_NODE(&folio->page); + struct f2fs_node *rn = F2FS_NODE(folio); f2fs_folio_wait_writeback(folio, NODE, true, true); @@ -380,9 +381,9 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) return folio_mark_dirty(folio); } -static inline nid_t get_nid(struct page *p, int off, bool i) +static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(folio); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); @@ -396,19 +397,19 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold data pages in page cache */ -static inline int is_node(const struct page *page, int type) +static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) -#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) +#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) -static inline void set_cold_node(struct page *page, bool is_dir) +static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) @@ -418,9 +419,9 @@ static inline void set_cold_node(struct page *page, bool is_dir) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_mark(struct page *page, int mark, int type) +static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); @@ -429,8 +430,8 @@ static inline void set_mark(struct page *page, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_P_SB(page), page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); #endif } -#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 51ebed4e1521..4cb3a91801b4 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -157,10 +157,10 @@ static int init_recovered_filename(const struct inode *dir, return 0; } -static int recover_dentry(struct inode *inode, struct page *ipage, +static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(ipage); + struct f2fs_inode *raw_inode = F2FS_INODE(ifolio); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; @@ -233,14 +233,14 @@ out: else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, + __func__, ino_of_node(ifolio), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -static int recover_quota_data(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -277,16 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static int recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, page); + err = recover_quota_data(inode, folio); if (err) return err; @@ -333,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page) if (file_enc_name(inode)) name = "<encrypted>"; else - name = F2FS_INODE(page)->i_name; + name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + ino_of_node(folio), name, raw->i_inline); return 0; } @@ -375,7 +375,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, if (IS_ERR(folio)) return PTR_ERR(folio); - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); *is_detecting = false; return 0; @@ -424,22 +424,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } - if (!is_fsync_dnode(&folio->page)) + if (!is_fsync_dnode(folio)) goto next; - entry = get_fsync_inode(head, ino_of_node(&folio->page)); + entry = get_fsync_inode(head, ino_of_node(folio)); if (!entry) { bool quota_inode = false; if (!check_only && - IS_INODE(&folio->page) && - is_dent_dnode(&folio->page)) { - err = f2fs_recover_inode_page(sbi, &folio->page); + IS_INODE(folio) && + is_dent_dnode(folio)) { + err = f2fs_recover_inode_page(sbi, folio); if (err) { f2fs_folio_put(folio, true); break; @@ -451,7 +451,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page), + entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); @@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, } entry->blkaddr = blkaddr; - if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) + if (IS_INODE(folio) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ @@ -527,7 +527,7 @@ got_it: nid = le32_to_cpu(sum.nid); ofs_in_node = le16_to_cpu(sum.ofs_in_node); - max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode); + max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode); if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", ofs_in_node, dn->inode->i_ino, nid, max_addrs); @@ -552,8 +552,8 @@ got_it: if (IS_ERR(node_folio)) return PTR_ERR(node_folio); - offset = ofs_of_node(&node_folio->page); - ino = ino_of_node(&node_folio->page); + offset = ofs_of_node(node_folio); + ino = ino_of_node(node_folio); f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { @@ -624,16 +624,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, { struct dnode_of_data dn; struct node_info ni; - unsigned int start, end; + unsigned int start = 0, end = 0, index; int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(&folio->page)) { + if (IS_INODE(folio)) { err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; - } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { - err = f2fs_recover_xattr_data(inode, &folio->page); + } else if (f2fs_has_xattr_block(ofs_of_node(folio))) { + err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; goto out; @@ -648,8 +648,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode); - end = start + ADDRS_PER_PAGE(&folio->page, inode); + start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); + end = start + ADDRS_PER_PAGE(folio, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: @@ -668,18 +668,18 @@ retry_dn: if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); - if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { + if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(&dn.node_folio->page), - ofs_of_node(&folio->page)); + inode->i_ino, ofs_of_node(dn.node_folio), + ofs_of_node(folio)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; } - for (; start < end; start++, dn.ofs_in_node++) { + for (index = start; index < end; index++, dn.ofs_in_node++) { block_t src, dest; src = f2fs_data_blkaddr(&dn); @@ -708,9 +708,9 @@ retry_dn: } if (!file_keep_isize(inode) && - (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT))) f2fs_i_size_write(inode, - (loff_t)(start + 1) << PAGE_SHIFT); + (loff_t)(index + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -758,16 +758,18 @@ retry_prev: } } - copy_node_footer(&dn.node_folio->page, &folio->page); - fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, - ofs_of_node(&folio->page), false); + copy_node_footer(dn.node_folio, folio); + fill_node_footer(dn.node_folio, dn.nid, ni.ino, + ofs_of_node(folio), false); folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); out: - f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " + "range (%u, %u), recovered = %d, err = %d", + inode->i_ino, nid_of_node(folio), + file_keep_isize(inode) ? "keep" : "recover", + start, end, recovered, err); return err; } @@ -778,6 +780,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, int err = 0; block_t blkaddr; unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + unsigned int recoverable_dnode = 0; + unsigned int fsynced_dnode = 0; + unsigned int total_dnode = 0; + unsigned int recovered_inode = 0; + unsigned int recovered_dentry = 0; + unsigned int recovered_dnode = 0; + + f2fs_notice(sbi, "do_recover_data: start to recover dnode"); /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -796,38 +806,43 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, break; } - if (!is_recoverable_dnode(&folio->page)) { + if (!is_recoverable_dnode(folio)) { f2fs_folio_put(folio, true); break; } + recoverable_dnode++; - entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); + entry = get_fsync_inode(inode_list, ino_of_node(folio)); if (!entry) goto next; + fsynced_dnode++; /* * inode(x) | CP | inode(x) | dnode(F) * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(&folio->page)) { - err = recover_inode(entry->inode, &folio->page); + if (IS_INODE(folio)) { + err = recover_inode(entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; } + recovered_inode++; } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, &folio->page, dir_list); + err = recover_dentry(entry->inode, folio, dir_list); if (err) { f2fs_folio_put(folio, true); break; } + recovered_dentry++; } err = do_recover_data(sbi, entry->inode, folio); if (err) { f2fs_folio_put(folio, true); break; } + recovered_dnode++; if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); @@ -840,9 +855,15 @@ next: f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + total_dnode++; } if (!err) err = f2fs_allocate_new_segments(sbi); + + f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, " + "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d", + recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode, + recovered_dentry, recovered_dnode, err); return err; } @@ -855,6 +876,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " + "check_only: %d", check_only); + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ae1223ef648f..cc82d42ef14c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode) goto next; } - blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode), + blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { @@ -455,7 +455,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } else { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, - .init_gc_type = BG_GC, + .init_gc_type = f2fs_sb_has_blkzoned(sbi) ? + FG_GC : BG_GC, .no_bg_gc = true, .should_migrate_blocks = false, .err_gc_skipped = false, @@ -772,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); /* need not be added */ - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) return; if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) @@ -799,7 +800,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, !valid_blocks) || valid_blocks == CAP_BLKS_PER_SEC(sbi)); - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -838,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, return; } - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -855,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) unsigned short valid_blocks, ckpt_valid_blocks; unsigned int usable_blocks; - if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + if (segno == NULL_SEGNO || is_curseg(sbi, segno)) return; usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); @@ -888,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) continue; __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); @@ -2107,7 +2108,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (!force) { if (!f2fs_realtime_discard_enable(sbi) || (!se->valid_blocks && - !IS_CURSEG(sbi, cpc->trim_start)) || + !is_curseg(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -2235,7 +2236,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, next: secno = GET_SEC_FROM_SEG(sbi, start); start_segno = GET_SEG_FROM_SEC(sbi, secno); - if (!IS_CURSEC(sbi, secno) && + if (!is_cursec(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), BLKS_PER_SEC(sbi)); @@ -3619,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->page)) + if (IS_DNODE(fio->folio) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3665,8 +3666,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, - page_folio(fio->page)->index); + type = __get_age_segment_type(inode, fio->folio->index); if (type != NO_CHECK_TYPE) return type; @@ -3677,8 +3677,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); } else { - if (IS_DNODE(fio->page)) - return is_cold_node(fio->page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->folio)) + return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } @@ -3746,7 +3746,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3850,10 +3850,10 @@ skip_new_segment: up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(curseg->seg_type)) { - fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (folio && IS_NODESEG(curseg->seg_type)) { + fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, page); + f2fs_inode_chksum_set(sbi, folio); } if (fio) { @@ -3931,7 +3931,7 @@ static int log_type_to_seg_type(enum log_type type) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - struct folio *folio = page_folio(fio->page); + struct folio *folio = fio->folio; enum log_type type = __get_segment_type(fio); int seg_type = log_type_to_seg_type(type); bool keep_order = (f2fs_lfs_mode(fio->sbi) && @@ -3940,15 +3940,21 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); - if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio)) { if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); folio_end_writeback(folio); if (f2fs_in_warm_node_list(fio->sbi, folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); + f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, + CP_ERROR_FLAG)); goto out; } + + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi, + fio->new_blkaddr, DATA_GENERIC_ENHANCE)); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); @@ -3972,7 +3978,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = folio->index, .new_blkaddr = folio->index, - .page = folio_page(folio, 0), + .folio = folio, .encrypted_page = NULL, .in_list = 0, }; @@ -4100,14 +4106,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg) { /* for recovery flow */ - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) { if (old_blkaddr == NULL_ADDR) type = CURSEG_COLD_DATA; else type = CURSEG_WARM_DATA; } } else { - if (IS_CURSEG(sbi, segno)) { + if (is_curseg(sbi, segno)) { /* se->type is volatile as SSR allocation */ type = __f2fs_get_curseg(sbi, segno); f2fs_bug_on(sbi, type == NO_CHECK_TYPE); @@ -4191,7 +4197,7 @@ void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ - f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type); + f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type); /* submit cached IPU IO */ f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { @@ -5143,7 +5149,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; - if (IS_CURSEC(sbi, secno)) + if (is_cursec(sbi, secno)) continue; set_bit(secno, dirty_i->dirty_secmap); } @@ -5279,7 +5285,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); - if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); @@ -5806,9 +5812,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kvfree(sit_i->sit_bitmap); + kfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(sit_i->sit_bitmap_mir); + kfree(sit_i->sit_bitmap_mir); kvfree(sit_i->invalid_segmap); #endif kfree(sit_i); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index db619fd2f51a..5e2ee5c686b1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -34,34 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_CURSEG(sbi, seg) \ - (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno)) - -#define IS_CURSEC(sbi, secno) \ - (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - SEGS_PER_SEC(sbi)) || \ - ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - SEGS_PER_SEC(sbi))) - #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) @@ -318,6 +290,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } +static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (segno == CURSEG_I(sbi, i)->segno) + return true; + } + return false; +} + +static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno)) + return true; + } + return false; +} + static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, unsigned int segno) { @@ -509,7 +503,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_segments++; - if (!inmem && IS_CURSEC(sbi, secno)) + if (!inmem && is_cursec(sbi, secno)) goto unlock_out; /* check large section */ @@ -674,8 +668,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int data_blocks = 0; - if (f2fs_lfs_mode(sbi) && - unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (f2fs_lfs_mode(sbi)) { total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); @@ -684,7 +677,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, if (lower_p) *lower_p = node_secs + dent_secs + data_secs; if (upper_p) - *upper_p = node_secs + dent_secs + + *upper_p = node_secs + dent_secs + data_secs + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + (data_blocks ? 1 : 0); if (curseg_p) @@ -986,7 +979,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { - if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno)) return true; return false; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bbf1dad6843f..e16c4e2830c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -27,6 +27,8 @@ #include <linux/part_stat.h> #include <linux/zstd.h> #include <linux/lz4.h> +#include <linux/ctype.h> +#include <linux/fs_parser.h> #include "f2fs.h" #include "node.h" @@ -125,29 +127,20 @@ enum { Opt_disable_roll_forward, Opt_norecovery, Opt_discard, - Opt_nodiscard, Opt_noheap, Opt_heap, Opt_user_xattr, - Opt_nouser_xattr, Opt_acl, - Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, - Opt_noinline_xattr, Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, - Opt_noinline_dentry, Opt_flush_merge, - Opt_noflush_merge, Opt_barrier, - Opt_nobarrier, Opt_fastboot, Opt_extent_cache, - Opt_noextent_cache, - Opt_noinline_data, Opt_data_flush, Opt_reserve_root, Opt_resgid, @@ -156,21 +149,13 @@ enum { Opt_fault_injection, Opt_fault_type, Opt_lazytime, - Opt_nolazytime, Opt_quota, - Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_usrjquota, Opt_grpjquota, Opt_prjjquota, - Opt_offusrjquota, - Opt_offgrpjquota, - Opt_offprjjquota, - Opt_jqfmt_vfsold, - Opt_jqfmt_vfsv0, - Opt_jqfmt_vfsv1, Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, @@ -180,107 +165,209 @@ enum { Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, Opt_checkpoint_merge, - Opt_nocheckpoint_merge, Opt_compress_algorithm, Opt_compress_log_size, - Opt_compress_extension, Opt_nocompress_extension, + Opt_compress_extension, Opt_compress_chksum, Opt_compress_mode, Opt_compress_cache, Opt_atgc, Opt_gc_merge, - Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, Opt_errors, Opt_nat_bits, + Opt_jqfmt, + Opt_checkpoint, Opt_err, }; -static match_table_t f2fs_tokens = { - {Opt_gc_background, "background_gc=%s"}, - {Opt_disable_roll_forward, "disable_roll_forward"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_noheap, "no_heap"}, - {Opt_heap, "heap"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_active_logs, "active_logs=%u"}, - {Opt_disable_ext_identify, "disable_ext_identify"}, - {Opt_inline_xattr, "inline_xattr"}, - {Opt_noinline_xattr, "noinline_xattr"}, - {Opt_inline_xattr_size, "inline_xattr_size=%u"}, - {Opt_inline_data, "inline_data"}, - {Opt_inline_dentry, "inline_dentry"}, - {Opt_noinline_dentry, "noinline_dentry"}, - {Opt_flush_merge, "flush_merge"}, - {Opt_noflush_merge, "noflush_merge"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_fastboot, "fastboot"}, - {Opt_extent_cache, "extent_cache"}, - {Opt_noextent_cache, "noextent_cache"}, - {Opt_noinline_data, "noinline_data"}, - {Opt_data_flush, "data_flush"}, - {Opt_reserve_root, "reserve_root=%u"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_mode, "mode=%s"}, - {Opt_fault_injection, "fault_injection=%u"}, - {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_prjjquota, "prjjquota=%s"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_offprjjquota, "prjjquota="}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_alloc, "alloc_mode=%s"}, - {Opt_fsync, "fsync_mode=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_checkpoint_disable, "checkpoint=disable"}, - {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, - {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, - {Opt_checkpoint_enable, "checkpoint=enable"}, - {Opt_checkpoint_merge, "checkpoint_merge"}, - {Opt_nocheckpoint_merge, "nocheckpoint_merge"}, - {Opt_compress_algorithm, "compress_algorithm=%s"}, - {Opt_compress_log_size, "compress_log_size=%u"}, - {Opt_compress_extension, "compress_extension=%s"}, - {Opt_nocompress_extension, "nocompress_extension=%s"}, - {Opt_compress_chksum, "compress_chksum"}, - {Opt_compress_mode, "compress_mode=%s"}, - {Opt_compress_cache, "compress_cache"}, - {Opt_atgc, "atgc"}, - {Opt_gc_merge, "gc_merge"}, - {Opt_nogc_merge, "nogc_merge"}, - {Opt_discard_unit, "discard_unit=%s"}, - {Opt_memory_mode, "memory=%s"}, - {Opt_age_extent_cache, "age_extent_cache"}, - {Opt_errors, "errors=%s"}, - {Opt_nat_bits, "nat_bits"}, +static const struct constant_table f2fs_param_background_gc[] = { + {"on", BGGC_MODE_ON}, + {"off", BGGC_MODE_OFF}, + {"sync", BGGC_MODE_SYNC}, + {} +}; + +static const struct constant_table f2fs_param_mode[] = { + {"adaptive", FS_MODE_ADAPTIVE}, + {"lfs", FS_MODE_LFS}, + {"fragment:segment", FS_MODE_FRAGMENT_SEG}, + {"fragment:block", FS_MODE_FRAGMENT_BLK}, + {} +}; + +static const struct constant_table f2fs_param_jqfmt[] = { + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, + {} +}; + +static const struct constant_table f2fs_param_alloc_mode[] = { + {"default", ALLOC_MODE_DEFAULT}, + {"reuse", ALLOC_MODE_REUSE}, + {} +}; +static const struct constant_table f2fs_param_fsync_mode[] = { + {"posix", FSYNC_MODE_POSIX}, + {"strict", FSYNC_MODE_STRICT}, + {"nobarrier", FSYNC_MODE_NOBARRIER}, + {} +}; + +static const struct constant_table f2fs_param_compress_mode[] = { + {"fs", COMPR_MODE_FS}, + {"user", COMPR_MODE_USER}, + {} +}; + +static const struct constant_table f2fs_param_discard_unit[] = { + {"block", DISCARD_UNIT_BLOCK}, + {"segment", DISCARD_UNIT_SEGMENT}, + {"section", DISCARD_UNIT_SECTION}, + {} +}; + +static const struct constant_table f2fs_param_memory_mode[] = { + {"normal", MEMORY_MODE_NORMAL}, + {"low", MEMORY_MODE_LOW}, + {} +}; + +static const struct constant_table f2fs_param_errors[] = { + {"remount-ro", MOUNT_ERRORS_READONLY}, + {"continue", MOUNT_ERRORS_CONTINUE}, + {"panic", MOUNT_ERRORS_PANIC}, + {} +}; + +static const struct fs_parameter_spec f2fs_param_specs[] = { + fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc), + fsparam_flag("disable_roll_forward", Opt_disable_roll_forward), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag_no("discard", Opt_discard), + fsparam_flag("no_heap", Opt_noheap), + fsparam_flag("heap", Opt_heap), + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("acl", Opt_acl), + fsparam_s32("active_logs", Opt_active_logs), + fsparam_flag("disable_ext_identify", Opt_disable_ext_identify), + fsparam_flag_no("inline_xattr", Opt_inline_xattr), + fsparam_s32("inline_xattr_size", Opt_inline_xattr_size), + fsparam_flag_no("inline_data", Opt_inline_data), + fsparam_flag_no("inline_dentry", Opt_inline_dentry), + fsparam_flag_no("flush_merge", Opt_flush_merge), + fsparam_flag_no("barrier", Opt_barrier), + fsparam_flag("fastboot", Opt_fastboot), + fsparam_flag_no("extent_cache", Opt_extent_cache), + fsparam_flag("data_flush", Opt_data_flush), + fsparam_u32("reserve_root", Opt_reserve_root), + fsparam_gid("resgid", Opt_resgid), + fsparam_uid("resuid", Opt_resuid), + fsparam_enum("mode", Opt_mode, f2fs_param_mode), + fsparam_s32("fault_injection", Opt_fault_injection), + fsparam_u32("fault_type", Opt_fault_type), + fsparam_flag_no("lazytime", Opt_lazytime), + fsparam_flag_no("quota", Opt_quota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_string_empty("usrjquota", Opt_usrjquota), + fsparam_string_empty("grpjquota", Opt_grpjquota), + fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_flag("nat_bits", Opt_nat_bits), + fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), + fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), + fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode), + fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("inlinecrypt", Opt_inlinecrypt), + fsparam_string("checkpoint", Opt_checkpoint), + fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge), + fsparam_string("compress_algorithm", Opt_compress_algorithm), + fsparam_u32("compress_log_size", Opt_compress_log_size), + fsparam_string("compress_extension", Opt_compress_extension), + fsparam_string("nocompress_extension", Opt_nocompress_extension), + fsparam_flag("compress_chksum", Opt_compress_chksum), + fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode), + fsparam_flag("compress_cache", Opt_compress_cache), + fsparam_flag("atgc", Opt_atgc), + fsparam_flag_no("gc_merge", Opt_gc_merge), + fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit), + fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode), + fsparam_flag("age_extent_cache", Opt_age_extent_cache), + fsparam_enum("errors", Opt_errors, f2fs_param_errors), + {} +}; + +/* Resort to a match_table for this interestingly formatted option */ +static match_table_t f2fs_checkpoint_tokens = { + {Opt_checkpoint_disable, "disable"}, + {Opt_checkpoint_disable_cap, "disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "disable:%u%%"}, + {Opt_checkpoint_enable, "enable"}, {Opt_err, NULL}, }; +#define F2FS_SPEC_background_gc (1 << 0) +#define F2FS_SPEC_inline_xattr_size (1 << 1) +#define F2FS_SPEC_active_logs (1 << 2) +#define F2FS_SPEC_reserve_root (1 << 3) +#define F2FS_SPEC_resgid (1 << 4) +#define F2FS_SPEC_resuid (1 << 5) +#define F2FS_SPEC_mode (1 << 6) +#define F2FS_SPEC_fault_injection (1 << 7) +#define F2FS_SPEC_fault_type (1 << 8) +#define F2FS_SPEC_jqfmt (1 << 9) +#define F2FS_SPEC_alloc_mode (1 << 10) +#define F2FS_SPEC_fsync_mode (1 << 11) +#define F2FS_SPEC_checkpoint_disable_cap (1 << 12) +#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13) +#define F2FS_SPEC_compress_level (1 << 14) +#define F2FS_SPEC_compress_algorithm (1 << 15) +#define F2FS_SPEC_compress_log_size (1 << 16) +#define F2FS_SPEC_compress_extension (1 << 17) +#define F2FS_SPEC_nocompress_extension (1 << 18) +#define F2FS_SPEC_compress_chksum (1 << 19) +#define F2FS_SPEC_compress_mode (1 << 20) +#define F2FS_SPEC_discard_unit (1 << 21) +#define F2FS_SPEC_memory_mode (1 << 22) +#define F2FS_SPEC_errors (1 << 23) + +struct f2fs_fs_context { + struct f2fs_mount_info info; + unsigned int opt_mask; /* Bits changed */ + unsigned int spec_mask; + unsigned short qname_mask; +}; + +#define F2FS_CTX_INFO(ctx) ((ctx)->info) + +static inline void ctx_set_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt |= flag; + ctx->opt_mask |= flag; +} + +static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + ctx->info.opt &= ~flag; + ctx->opt_mask |= flag; +} + +static inline bool ctx_test_opt(struct f2fs_fs_context *ctx, + unsigned int flag) +{ + return ctx->info.opt & flag; +} + void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, - const char *fmt, ...) + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -292,11 +379,19 @@ void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (limit_rate) - printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk_ratelimited("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); else - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (sbi) + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); va_end(args); } @@ -390,159 +485,90 @@ static void init_once(void *foo) #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) -static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, - substring_t *args) +/* + * Note the name of the specified quota file. + */ +static int f2fs_note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; char *qname; - int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + if (param->size < 1) { + f2fs_err(NULL, "Missing quota name"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + if (strchr(param->string, '/')) { + f2fs_err(NULL, "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->info.s_qf_names[qtype]) { + if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) { + f2fs_err(NULL, "Quota file already specified"); + return -EINVAL; + } return 0; } - qname = match_strdup(args); + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { - f2fs_err(sbi, "Not enough memory for storing quotafile name"); + f2fs_err(NULL, "Not enough memory for storing quotafile name"); return -ENOMEM; } - if (F2FS_OPTION(sbi).s_qf_names[qtype]) { - if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) - ret = 0; - else - f2fs_err(sbi, "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - f2fs_err(sbi, "quotafile must be on filesystem root"); - goto errout; - } - F2FS_OPTION(sbi).s_qf_names[qtype] = qname; - set_opt(sbi, QUOTA); + F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname; + ctx->qname_mask |= 1 << qtype; return 0; -errout: - kfree(qname); - return ret; } -static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype) +/* + * Clear the name of the specified quota file. + */ +static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) { - struct super_block *sb = sbi->sb; + struct f2fs_fs_context *ctx = fc->fs_private; - if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); - return -EINVAL; - } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); - F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; + kfree(ctx->info.s_qf_names[qtype]); + ctx->info.s_qf_names[qtype] = NULL; + ctx->qname_mask |= 1 << qtype; return 0; } -static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) +static void f2fs_unnote_qf_name_all(struct fs_context *fc) { - /* - * We do the test below only for project quotas. 'usrquota' and - * 'grpquota' mount options are allowed even without quota feature - * to support legacy quotas in quota files. - */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); - return -1; - } - if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && - F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) - clear_opt(sbi, USRQUOTA); - - if (test_opt(sbi, GRPQUOTA) && - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) - clear_opt(sbi, GRPQUOTA); - - if (test_opt(sbi, PRJQUOTA) && - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) - clear_opt(sbi, PRJQUOTA); - - if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || - test_opt(sbi, PRJQUOTA)) { - f2fs_err(sbi, "old and new quota format mixing"); - return -1; - } - - if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_err(sbi, "journaled quota format not specified"); - return -1; - } - } + int i; - if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); - F2FS_OPTION(sbi).s_jquota_fmt = 0; - } - return 0; + for (i = 0; i < MAXQUOTAS; i++) + f2fs_unnote_qf_name(fc, i); } #endif -static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, - const char *opt, - const substring_t *arg, - bool is_remount) +static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, + struct f2fs_fs_context *ctx) { - struct fs_parameter param = { - .type = fs_value_is_string, - .string = arg->from ? arg->from : "", - }; - struct fscrypt_dummy_policy *policy = - &F2FS_OPTION(sbi).dummy_enc_policy; int err; if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { - f2fs_warn(sbi, "test_dummy_encryption option not supported"); + f2fs_warn(NULL, "test_dummy_encryption option not supported"); return -EINVAL; } - - if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, "Encrypt feature is off"); - return -EINVAL; - } - - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { - f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); - return -EINVAL; - } - - err = fscrypt_parse_test_dummy_encryption(¶m, policy); + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->info.dummy_enc_policy); if (err) { - if (err == -EEXIST) - f2fs_warn(sbi, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", - opt); + if (err == -EINVAL) + f2fs_warn(NULL, "Value of option \"%s\" is unrecognized", + param->key); + else if (err == -EEXIST) + f2fs_warn(NULL, "Conflicting test_dummy_encryption options"); else - f2fs_warn(sbi, "Error processing option \"%s\" [%d]", - opt, err); + f2fs_warn(NULL, "Error processing option \"%s\" [%d]", + param->key, err); return -EINVAL; } - f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; } #ifdef CONFIG_F2FS_FS_COMPRESSION -static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, +static bool is_compress_extension_exist(struct f2fs_mount_info *info, const char *new_ext, bool is_ext) { unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -550,11 +576,11 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, int i; if (is_ext) { - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ext = info->extensions; + ext_cnt = info->compress_ext_cnt; } else { - ext = F2FS_OPTION(sbi).noextensions; - ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + ext = info->noextensions; + ext_cnt = info->nocompress_ext_cnt; } for (i = 0; i < ext_cnt; i++) { @@ -572,28 +598,28 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi, * extension will be treated as special cases and will not be compressed. * 3. Don't allow the non-compress extension specifies all files. */ -static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) +static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN], + int noext_cnt, + unsigned char (*ext)[F2FS_EXTENSION_LEN], + int ext_cnt) { - unsigned char (*ext)[F2FS_EXTENSION_LEN]; - unsigned char (*noext)[F2FS_EXTENSION_LEN]; - int ext_cnt, noext_cnt, index = 0, no_index = 0; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + int index = 0, no_index = 0; if (!noext_cnt) return 0; for (no_index = 0; no_index < noext_cnt; no_index++) { + if (strlen(noext[no_index]) == 0) + continue; if (!strcasecmp("*", noext[no_index])) { - f2fs_info(sbi, "Don't allow the nocompress extension specifies all files"); + f2fs_info(NULL, "Don't allow the nocompress extension specifies all files"); return -EINVAL; } for (index = 0; index < ext_cnt; index++) { + if (strlen(ext[index]) == 0) + continue; if (!strcasecmp(ext[index], noext[no_index])) { - f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension", + f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension", ext[index]); return -EINVAL; } @@ -603,58 +629,62 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) } #ifdef CONFIG_F2FS_FS_LZ4 -static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str) { #ifdef CONFIG_F2FS_FS_LZ4HC unsigned int level; if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += 3; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>"); + f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>"); return -EINVAL; } if (kstrtouint(str + 1, 10, &level)) return -EINVAL; if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { - f2fs_info(sbi, "invalid lz4hc compress level: %d", level); + f2fs_info(NULL, "invalid lz4hc compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; #else if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } - f2fs_info(sbi, "kernel doesn't support lz4hc compression"); + f2fs_info(NULL, "kernel doesn't support lz4hc compression"); return -EINVAL; #endif } #endif #ifdef CONFIG_F2FS_FS_ZSTD -static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) { int level; int len = 4; if (strlen(str) == len) { - F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += len; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>"); + f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>"); return -EINVAL; } if (kstrtoint(str + 1, 10, &level)) @@ -662,685 +692,750 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) /* f2fs does not support negative compress level now */ if (level < 0) { - f2fs_info(sbi, "do not support negative compress level: %d", level); + f2fs_info(NULL, "do not support negative compress level: %d", level); return -ERANGE; } if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { - f2fs_info(sbi, "invalid zstd compress level: %d", level); + f2fs_info(NULL, "invalid zstd compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } #endif #endif -static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) +static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; + struct f2fs_fs_context *ctx = fc->fs_private; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; int ext_cnt, noext_cnt; + char *name; #endif - char *p, *name; - int arg = 0; - kuid_t uid; - kgid_t gid; - int ret; - - if (!options) - return 0; - - while ((p = strsep(&options, ",")) != NULL) { - int token; + substring_t args[MAX_OPT_ARGS]; + struct fs_parse_result result; + int token, ret, arg; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); + token = fs_parse(fc, f2fs_param_specs, param, &result); + if (token < 0) + return token; - switch (token) { - case Opt_gc_background: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "on")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (!strcmp(name, "off")) { - if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_warn(sbi, "zoned devices need bggc"); - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (!strcmp(name, "sync")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_norecovery: - /* requires ro mount, checked in f2fs_default_check */ - set_opt(sbi, NORECOVERY); - break; - case Opt_discard: - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); - break; - } - set_opt(sbi, DISCARD); - break; - case Opt_nodiscard: - if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(sbi, "discard is required for zoned block devices"); - return -EINVAL; - } - clear_opt(sbi, DISCARD); - break; - case Opt_noheap: - case Opt_heap: - f2fs_warn(sbi, "heap/no_heap options were deprecated"); - break; + switch (token) { + case Opt_gc_background: + F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_background_gc; + break; + case Opt_disable_roll_forward: + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD); + break; + case Opt_norecovery: + /* requires ro mount, checked in f2fs_validate_options */ + ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); + break; + case Opt_discard: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + else + ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); + break; + case Opt_noheap: + case Opt_heap: + f2fs_warn(NULL, "heap/no_heap options were deprecated"); + break; #ifdef CONFIG_F2FS_FS_XATTR - case Opt_user_xattr: - set_opt(sbi, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; - case Opt_inline_xattr: - set_opt(sbi, INLINE_XATTR); - break; - case Opt_noinline_xattr: - clear_opt(sbi, INLINE_XATTR); - break; - case Opt_inline_xattr_size: - if (args->from && match_int(args, &arg)) - return -EINVAL; - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = arg; - break; + case Opt_user_xattr: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER); + else + ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER); + break; + case Opt_inline_xattr: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); + break; + case Opt_inline_xattr_size: + if (result.int_32 < MIN_INLINE_XATTR_SIZE || + result.int_32 > MAX_INLINE_XATTR_SIZE) { + f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u", + (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE); + return -EINVAL; + } + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); + F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; + ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; + break; #else - case Opt_user_xattr: - case Opt_nouser_xattr: - case Opt_inline_xattr: - case Opt_noinline_xattr: - case Opt_inline_xattr_size: - f2fs_info(sbi, "xattr options not supported"); - break; + case Opt_user_xattr: + case Opt_inline_xattr: + case Opt_inline_xattr_size: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; + case Opt_acl: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL); + else + ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL); + break; #else - case Opt_acl: - case Opt_noacl: - f2fs_info(sbi, "acl options not supported"); - break; + case Opt_acl: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && - arg != NR_CURSEG_PERSIST_TYPE) - return -EINVAL; - F2FS_OPTION(sbi).active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - case Opt_inline_data: - set_opt(sbi, INLINE_DATA); - break; - case Opt_inline_dentry: - set_opt(sbi, INLINE_DENTRY); - break; - case Opt_noinline_dentry: - clear_opt(sbi, INLINE_DENTRY); - break; - case Opt_flush_merge: - set_opt(sbi, FLUSH_MERGE); - break; - case Opt_noflush_merge: - clear_opt(sbi, FLUSH_MERGE); - break; - case Opt_nobarrier: - set_opt(sbi, NOBARRIER); - break; - case Opt_barrier: - clear_opt(sbi, NOBARRIER); - break; - case Opt_fastboot: - set_opt(sbi, FASTBOOT); - break; - case Opt_extent_cache: - set_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noextent_cache: - if (f2fs_sb_has_device_alias(sbi)) { - f2fs_err(sbi, "device aliasing requires extent cache"); - return -EINVAL; - } - clear_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noinline_data: - clear_opt(sbi, INLINE_DATA); - break; - case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); - break; - case Opt_reserve_root: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - F2FS_OPTION(sbi).root_reserved_blocks = arg; - set_opt(sbi, RESERVE_ROOT); - } - break; - case Opt_resuid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - uid = make_kuid(current_user_ns(), arg); - if (!uid_valid(uid)) { - f2fs_err(sbi, "Invalid uid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resuid = uid; - break; - case Opt_resgid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - gid = make_kgid(current_user_ns(), arg); - if (!gid_valid(gid)) { - f2fs_err(sbi, "Invalid gid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resgid = gid; - break; - case Opt_mode: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "adaptive")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (!strcmp(name, "lfs")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - } else if (!strcmp(name, "fragment:segment")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; - } else if (!strcmp(name, "fragment:block")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; + case Opt_active_logs: + if (result.int_32 != 2 && result.int_32 != 4 && + result.int_32 != NR_CURSEG_PERSIST_TYPE) + return -EINVAL; + ctx->spec_mask |= F2FS_SPEC_active_logs; + F2FS_CTX_INFO(ctx).active_logs = result.int_32; + break; + case Opt_disable_ext_identify: + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY); + break; + case Opt_inline_data: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA); + break; + case Opt_inline_dentry: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); + break; + case Opt_flush_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); + else + ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); + break; + case Opt_barrier: + if (result.negated) + ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER); + else + ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER); + break; + case Opt_fastboot: + ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); + break; + case Opt_extent_cache: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); + else + ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); + break; + case Opt_data_flush: + ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); + break; + case Opt_reserve_root: + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; + break; + case Opt_resuid: + F2FS_CTX_INFO(ctx).s_resuid = result.uid; + ctx->spec_mask |= F2FS_SPEC_resuid; + break; + case Opt_resgid: + F2FS_CTX_INFO(ctx).s_resgid = result.gid; + ctx->spec_mask |= F2FS_SPEC_resgid; + break; + case Opt_mode: + F2FS_CTX_INFO(ctx).fs_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_mode; + break; #ifdef CONFIG_F2FS_FAULT_INJECTION - case Opt_fault_injection: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_injection: + F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; + ctx->spec_mask |= F2FS_SPEC_fault_injection; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); + break; - case Opt_fault_type: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE)) - return -EINVAL; - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_type: + if (result.uint_32 > BIT(FAULT_MAX)) + return -EINVAL; + F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fault_type; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); + break; #else - case Opt_fault_injection: - case Opt_fault_type: - f2fs_info(sbi, "fault injection options not supported"); - break; + case Opt_fault_injection: + case Opt_fault_type: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif - case Opt_lazytime: - set_opt(sbi, LAZYTIME); - break; - case Opt_nolazytime: - clear_opt(sbi, LAZYTIME); - break; + case Opt_lazytime: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME); + else + ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME); + break; #ifdef CONFIG_QUOTA - case Opt_quota: - case Opt_usrquota: - set_opt(sbi, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi, GRPQUOTA); - break; - case Opt_prjquota: - set_opt(sbi, PRJQUOTA); - break; - case Opt_usrjquota: - ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_grpjquota: - ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_prjjquota: - ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_offusrjquota: - ret = f2fs_clear_qf_name(sbi, USRQUOTA); - if (ret) - return ret; - break; - case Opt_offgrpjquota: - ret = f2fs_clear_qf_name(sbi, GRPQUOTA); - if (ret) - return ret; - break; - case Opt_offprjjquota: - ret = f2fs_clear_qf_name(sbi, PRJQUOTA); - if (ret) - return ret; - break; - case Opt_jqfmt_vfsold: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; - break; - case Opt_jqfmt_vfsv0: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; - break; - case Opt_jqfmt_vfsv1: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; - break; - case Opt_noquota: - clear_opt(sbi, QUOTA); - clear_opt(sbi, USRQUOTA); - clear_opt(sbi, GRPQUOTA); - clear_opt(sbi, PRJQUOTA); - break; + case Opt_quota: + if (result.negated) { + ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + } else + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); + break; + case Opt_usrquota: + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); + break; + case Opt_grpquota: + ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA); + break; + case Opt_prjquota: + ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); + break; + case Opt_usrjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, USRQUOTA); + else + ret = f2fs_note_qf_name(fc, USRQUOTA, param); + if (ret) + return ret; + break; + case Opt_grpjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); + else + ret = f2fs_note_qf_name(fc, GRPQUOTA, param); + if (ret) + return ret; + break; + case Opt_prjjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); + else + ret = f2fs_note_qf_name(fc, PRJQUOTA, param); + if (ret) + return ret; + break; + case Opt_jqfmt: + F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32; + ctx->spec_mask |= F2FS_SPEC_jqfmt; + break; #else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - case Opt_prjquota: - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_prjjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_offprjjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - case Opt_noquota: - f2fs_info(sbi, "quota operations not supported"); - break; + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + f2fs_info(NULL, "quota operations not supported"); + break; #endif - case Opt_alloc: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (!strcmp(name, "default")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (!strcmp(name, "reuse")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_fsync: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "posix")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (!strcmp(name, "strict")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (!strcmp(name, "nobarrier")) { - F2FS_OPTION(sbi).fsync_mode = - FSYNC_MODE_NOBARRIER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0], - is_remount); - if (ret) - return ret; - break; - case Opt_inlinecrypt: + case Opt_alloc: + F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_alloc_mode; + break; + case Opt_fsync: + F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fsync_mode; + break; + case Opt_test_dummy_encryption: + ret = f2fs_parse_test_dummy_encryption(param, ctx); + if (ret) + return ret; + break; + case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - set_opt(sbi, INLINECRYPT); + ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(NULL, "inline encryption not supported"); #endif - break; + break; + case Opt_checkpoint: + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].from = args[0].to = NULL; + arg = 0; + + /* revert to match_table for checkpoint= options */ + token = match_token(param->string, f2fs_checkpoint_tokens, args); + switch (token) { case Opt_checkpoint_disable_cap_perc: if (args->from && match_int(args, &arg)) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap_perc = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap_perc = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: if (args->from && match_int(args, &arg)) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable: - set_opt(sbi, DISABLE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_enable: - clear_opt(sbi, DISABLE_CHECKPOINT); - break; - case Opt_checkpoint_merge: - set_opt(sbi, MERGE_CHECKPOINT); - break; - case Opt_nocheckpoint_merge: - clear_opt(sbi, MERGE_CHECKPOINT); + ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; + default: + return -EINVAL; + } + break; + case Opt_checkpoint_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); + else + ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); + break; #ifdef CONFIG_F2FS_FS_COMPRESSION - case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "lzo")) { + case Opt_compress_algorithm: + name = param->string; + if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZO; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lzo compression"); + f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif - } else if (!strncmp(name, "lz4", 3)) { + } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZ4; + ret = f2fs_set_lz4hc_level(ctx, name); + if (ret) + return -EINVAL; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); + f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif - } else if (!strncmp(name, "zstd", 4)) { + } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_ZSTD; + ret = f2fs_set_zstd_level(ctx, name); + if (ret) + return -EINVAL; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support zstd compression"); + f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif - } else if (!strcmp(name, "lzo-rle")) { + } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZORLE; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); + f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif - } else { - kfree(name); - return -EINVAL; - } - kfree(name); + } else + return -EINVAL; + break; + case Opt_compress_log_size: + if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || + result.uint_32 > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(NULL, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_log_size; + break; + case Opt_compress_extension: + name = param->string; + ext = F2FS_CTX_INFO(ctx).extensions; + ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(NULL, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(&ctx->info, name, true)) break; - case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg < MIN_COMPRESS_LOG_SIZE || - arg > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, - "Compress cluster log size is out of range"); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_log_size = arg; + + ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_CTX_INFO(ctx).compress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_compress_extension; + break; + case Opt_nocompress_extension: + name = param->string; + noext = F2FS_CTX_INFO(ctx).noextensions; + noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + noext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(NULL, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(&ctx->info, name, false)) break; - case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_CTX_INFO(ctx).nocompress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_nocompress_extension; + break; + case Opt_compress_chksum: + F2FS_CTX_INFO(ctx).compress_chksum = true; + ctx->spec_mask |= F2FS_SPEC_compress_chksum; + break; + case Opt_compress_mode: + F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_mode; + break; + case Opt_compress_cache: + ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); + break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + case Opt_nocompress_extension: + case Opt_compress_chksum: + case Opt_compress_mode: + case Opt_compress_cache: + f2fs_info(NULL, "compression options not supported"); + break; +#endif + case Opt_atgc: + ctx_set_opt(ctx, F2FS_MOUNT_ATGC); + break; + case Opt_gc_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE); + else + ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE); + break; + case Opt_discard_unit: + F2FS_CTX_INFO(ctx).discard_unit = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_discard_unit; + break; + case Opt_memory_mode: + F2FS_CTX_INFO(ctx).memory_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_memory_mode; + break; + case Opt_age_extent_cache: + ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE); + break; + case Opt_errors: + F2FS_CTX_INFO(ctx).errors = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_errors; + break; + case Opt_nat_bits: + ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS); + break; + } + return 0; +} - if (strlen(name) >= F2FS_EXTENSION_LEN || - ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } +/* + * Check quota settings consistency. + */ +static int f2fs_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + #ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + bool quota_turnon = sb_any_quota_loaded(sb); + char *old_qname, *new_qname; + bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota; + int i; - if (is_compress_extension_exist(sbi, name, true)) { - kfree(name); - break; - } + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) && + !f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); + return -EINVAL; + } - ret = strscpy(ext[ext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).compress_ext_cnt++; - kfree(name); - break; - case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; + if (ctx->qname_mask) { + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + old_qname = F2FS_OPTION(sbi).s_qf_names[i]; + new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (quota_turnon && + !!old_qname != !!new_qname) + goto err_jquota_change; - if (strlen(name) >= F2FS_EXTENSION_LEN || - noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; + if (old_qname) { + if (strcmp(old_qname, new_qname) == 0) { + ctx->qname_mask &= ~(1 << i); + continue; + } + goto err_jquota_specified; } - if (is_compress_extension_exist(sbi, name, false)) { - kfree(name); - break; + if (quota_feature) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + ctx->qname_mask &= ~(1 << i); + kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]); + F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL; } + } + } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA]; + grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA]; + prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA]; + usrquota = test_opt(sbi, USRQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA); + grpquota = test_opt(sbi, GRPQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA); + prjquota = test_opt(sbi, PRJQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA); + + if (usr_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + grpquota = false; + } + if (prj_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + prjquota = false; + } + if (usr_qf_name || grp_qf_name || prj_qf_name) { + if (grpquota || usrquota || prjquota) { + f2fs_err(sbi, "old and new quota format mixing"); + return -EINVAL; + } + if (!(ctx->spec_mask & F2FS_SPEC_jqfmt || + F2FS_OPTION(sbi).s_jquota_fmt)) { + f2fs_err(sbi, "journaled quota format not specified"); + return -EINVAL; + } + } + return 0; + +err_jquota_change: + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + return -EINVAL; +err_jquota_specified: + f2fs_err(sbi, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; - ret = strscpy(noext[noext_cnt], name); - if (ret < 0) { - kfree(name); - return ret; - } - F2FS_OPTION(sbi).nocompress_ext_cnt++; - kfree(name); - break; - case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - F2FS_OPTION(sbi).compress_chksum = true; - break; - case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "fs")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; - } else if (!strcmp(name, "user")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - set_opt(sbi, COMPRESS_CACHE); - break; #else - case Opt_compress_algorithm: - case Opt_compress_log_size: - case Opt_compress_extension: - case Opt_nocompress_extension: - case Opt_compress_chksum: - case Opt_compress_mode: - case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); - break; + if (f2fs_readonly(sbi->sb)) + return 0; + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + + return 0; #endif - case Opt_atgc: - set_opt(sbi, ATGC); - break; - case Opt_gc_merge: - set_opt(sbi, GC_MERGE); - break; - case Opt_nogc_merge: - clear_opt(sbi, GC_MERGE); - break; - case Opt_discard_unit: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "block")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_BLOCK; - } else if (!strcmp(name, "segment")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SEGMENT; - } else if (!strcmp(name, "section")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_memory_mode: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "normal")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_NORMAL; - } else if (!strcmp(name, "low")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_LOW; - } else { - kfree(name); - return -EINVAL; +} + +static int f2fs_check_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); + return -EINVAL; + } + + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy, + &F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount"); + return -EINVAL; + } + return 0; +} + +static inline bool test_compression_spec(unsigned int mask) +{ + return mask & (F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static inline void clear_compression_spec(struct f2fs_fs_context *ctx) +{ + ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static int f2fs_check_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i, cnt; + + if (!f2fs_sb_has_compression(sbi)) { + if (test_compression_spec(ctx->spec_mask) || + ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE)) + f2fs_info(sbi, "Image doesn't support compression"); + clear_compression_spec(ctx); + ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE; + return 0; + } + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).extensions[i], true)) { + F2FS_CTX_INFO(ctx).extensions[i][0] = '\0'; + cnt--; } - kfree(name); - break; - case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); - break; - case Opt_errors: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "remount-ro")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_READONLY; - } else if (!strcmp(name, "continue")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_CONTINUE; - } else if (!strcmp(name, "panic")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_PANIC; - } else { - kfree(name); - return -EINVAL; + } + if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).noextensions[i], false)) { + F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0'; + cnt--; } - kfree(name); - break; - case Opt_nat_bits: - set_opt(sbi, NAT_BITS); - break; - default: - f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", - p); + } + if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid noextension length/number"); return -EINVAL; } } + + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with new extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_OPTION(sbi).extensions, + F2FS_OPTION(sbi).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with old extensions"); + return -EINVAL; + } + if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions, + F2FS_OPTION(sbi).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new extensions conflicts with old noextensions"); + return -EINVAL; + } +#endif return 0; } -static int f2fs_default_check(struct f2fs_sb_info *sbi) +static int f2fs_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) { -#ifdef CONFIG_QUOTA - if (f2fs_check_quota_options(sbi)) + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb)) return -EINVAL; -#else - if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + + if (f2fs_hw_should_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + + if (!f2fs_hw_support_discard(sbi) && + (ctx->opt_mask & F2FS_MOUNT_DISCARD) && + ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "device does not support discard"); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + ctx->opt_mask &= ~F2FS_MOUNT_DISCARD; + } + + if (f2fs_sb_has_device_alias(sbi) && + (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) && + !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } -#endif + + if (test_opt(sbi, RESERVE_ROOT) && + (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) && + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT; + } + + err = f2fs_check_test_dummy_encryption(fc, sb); + if (err) + return err; + + err = f2fs_check_compression(fc, sb); + if (err) + return err; + + err = f2fs_check_quota_consistency(fc, sb); + if (err) + return err; if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, @@ -1354,15 +1449,19 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi) * devices, but mandatory for host-managed zoned block devices. */ if (f2fs_sb_has_blkzoned(sbi)) { + if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) { + f2fs_warn(sbi, "zoned devices need bggc"); + return -EINVAL; + } #ifdef CONFIG_BLK_DEV_ZONED - if (F2FS_OPTION(sbi).discard_unit != - DISCARD_UNIT_SECTION) { + if ((ctx->spec_mask & F2FS_SPEC_discard_unit) && + F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) { f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; + F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION; } - if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { + if ((ctx->spec_mask & F2FS_SPEC_mode) && + F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) { f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); return -EINVAL; } @@ -1372,43 +1471,25 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi) #endif } -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_test_compress_extension(sbi)) { - f2fs_err(sbi, "invalid compress or nocompress extension"); - return -EINVAL; - } -#endif - - if (test_opt(sbi, INLINE_XATTR_SIZE)) { - int min_size, max_size; - + if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) { if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } - if (!test_opt(sbi, INLINE_XATTR)) { + if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) { f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } - - min_size = MIN_INLINE_XATTR_SIZE; - max_size = MAX_INLINE_XATTR_SIZE; - - if (F2FS_OPTION(sbi).inline_xattr_size < min_size || - F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", - min_size, max_size); - return -EINVAL; - } } - if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { + if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) && + F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) { f2fs_err(sbi, "LFS is not compatible with ATGC"); return -EINVAL; } - if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { + if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) { f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } @@ -1417,12 +1498,190 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; } + return 0; +} + +static void f2fs_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + char *qname; + int i; + + if (quota_feature) + return; + + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (qname) { + qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i], + GFP_KERNEL | __GFP_NOFAIL); + set_opt(sbi, QUOTA); + } + F2FS_OPTION(sbi).s_qf_names[i] = qname; + } + + if (ctx->spec_mask & F2FS_SPEC_jqfmt) + F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt; + + if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); + F2FS_OPTION(sbi).s_jquota_fmt = 0; + } +#endif +} + +static void f2fs_apply_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy)) + return; + swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy); + f2fs_warn(sbi, "Test dummy encryption mode enabled"); +} - if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "norecovery requires readonly mount"); +static void f2fs_apply_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN]; + unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN]; + int ctx_cnt, sbi_cnt, i; + + if (ctx->spec_mask & F2FS_SPEC_compress_level) + F2FS_OPTION(sbi).compress_level = + F2FS_CTX_INFO(ctx).compress_level; + if (ctx->spec_mask & F2FS_SPEC_compress_algorithm) + F2FS_OPTION(sbi).compress_algorithm = + F2FS_CTX_INFO(ctx).compress_algorithm; + if (ctx->spec_mask & F2FS_SPEC_compress_log_size) + F2FS_OPTION(sbi).compress_log_size = + F2FS_CTX_INFO(ctx).compress_log_size; + if (ctx->spec_mask & F2FS_SPEC_compress_chksum) + F2FS_OPTION(sbi).compress_chksum = + F2FS_CTX_INFO(ctx).compress_chksum; + if (ctx->spec_mask & F2FS_SPEC_compress_mode) + F2FS_OPTION(sbi).compress_mode = + F2FS_CTX_INFO(ctx).compress_mode; + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).extensions; + ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).extensions; + sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt; + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).noextensions; + ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).noextensions; + sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt; + } +#endif +} + +static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + F2FS_OPTION(sbi).opt &= ~ctx->opt_mask; + F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt; + + if (ctx->spec_mask & F2FS_SPEC_background_gc) + F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode; + if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size) + F2FS_OPTION(sbi).inline_xattr_size = + F2FS_CTX_INFO(ctx).inline_xattr_size; + if (ctx->spec_mask & F2FS_SPEC_active_logs) + F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs; + if (ctx->spec_mask & F2FS_SPEC_reserve_root) + F2FS_OPTION(sbi).root_reserved_blocks = + F2FS_CTX_INFO(ctx).root_reserved_blocks; + if (ctx->spec_mask & F2FS_SPEC_resgid) + F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid; + if (ctx->spec_mask & F2FS_SPEC_resuid) + F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid; + if (ctx->spec_mask & F2FS_SPEC_mode) + F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (ctx->spec_mask & F2FS_SPEC_fault_injection) + (void)f2fs_build_fault_attr(sbi, + F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE); + if (ctx->spec_mask & F2FS_SPEC_fault_type) + (void)f2fs_build_fault_attr(sbi, 0, + F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE); +#endif + if (ctx->spec_mask & F2FS_SPEC_alloc_mode) + F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode; + if (ctx->spec_mask & F2FS_SPEC_fsync_mode) + F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap) + F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc) + F2FS_OPTION(sbi).unusable_cap_perc = + F2FS_CTX_INFO(ctx).unusable_cap_perc; + if (ctx->spec_mask & F2FS_SPEC_discard_unit) + F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit; + if (ctx->spec_mask & F2FS_SPEC_memory_mode) + F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode; + if (ctx->spec_mask & F2FS_SPEC_errors) + F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors; + + f2fs_apply_compression(fc, sb); + f2fs_apply_test_dummy_encryption(fc, sb); + f2fs_apply_quota_options(fc, sb); +} + +static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount) +{ + if (f2fs_sb_has_device_alias(sbi) && + !test_opt(sbi, READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); return -EINVAL; } + if (!remount) + return 0; + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } +#endif + if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { + f2fs_warn(sbi, "LFS is not compatible with IPU"); + return -EINVAL; + } return 0; } @@ -1442,6 +1701,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); + atomic_set(&fi->open_count, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); @@ -1718,7 +1978,7 @@ static void f2fs_put_super(struct super_block *sb) destroy_percpu_info(sbi); f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -2329,11 +2589,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) f2fs_flush_ckpt_thread(sbi); } -static int f2fs_remount(struct super_block *sb, int *flags, char *data) +static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; + unsigned int flags = fc->sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; bool need_restart_flush = false, need_stop_flush = false; @@ -2379,7 +2640,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", err); @@ -2389,23 +2650,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); - /* parse mount options */ - err = parse_options(sbi, data, true); + err = f2fs_check_opt_consistency(fc, sb); if (err) goto restore_opts; -#ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi) && - sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { - f2fs_err(sbi, - "zoned: max open zones %u is too small, need at least %u open zones", - sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); - err = -EINVAL; - goto restore_opts; - } -#endif + f2fs_apply_options(fc, sb); - err = f2fs_default_check(sbi); + err = f2fs_sanity_check_options(sbi, true); if (err) goto restore_opts; @@ -2416,20 +2667,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) + if (f2fs_readonly(sb) && (flags & SB_RDONLY)) goto skip; - if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) { + if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) { err = -EROFS; goto restore_opts; } #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { + if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { + } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { @@ -2441,12 +2692,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } #endif - if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "LFS is not compatible with IPU"); - goto restore_opts; - } - /* disallow enable atgc dynamically */ if (no_atgc == !!test_opt(sbi, ATGC)) { err = -EINVAL; @@ -2485,7 +2730,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; @@ -2496,7 +2741,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || + if ((flags & SB_RDONLY) || (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { @@ -2510,7 +2755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (flags & SB_RDONLY) { sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2523,7 +2768,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); need_restart_flush = true; @@ -2565,11 +2810,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * triggered while remount and we need to take care of it before * returning from remount. */ - if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || !test_opt(sbi, MERGE_CHECKPOINT)) { f2fs_stop_ckpt_thread(sbi); } else { - /* Flush if the prevous checkpoint, if exists. */ + /* Flush if the previous checkpoint, if exists. */ f2fs_flush_ckpt_thread(sbi); err = f2fs_start_ckpt_thread(sbi); @@ -2592,7 +2837,7 @@ skip: (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); sbi->umount_lock_holder = NULL; return 0; @@ -3263,7 +3508,6 @@ static const struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, - .remount_fs = f2fs_remount, .shutdown = f2fs_shutdown, }; @@ -3451,6 +3695,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, f2fs_bug_on(sbi, 1); ret = submit_bio_wait(bio); + bio_put(bio); folio_end_writeback(folio); return ret; @@ -4522,14 +4767,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = true; } -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; - char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; int retry_cnt = 1; @@ -4592,18 +4837,14 @@ try_onemore: sizeof(raw_super->uuid)); default_options(sbi, false); - /* parse mount options */ - options = kstrdup((const char *)data, GFP_KERNEL); - if (data && !options) { - err = -ENOMEM; - goto free_sb_buf; - } - err = parse_options(sbi, options, false); + err = f2fs_check_opt_consistency(fc, sb); if (err) - goto free_options; + goto free_sb_buf; + + f2fs_apply_options(fc, sb); - err = f2fs_default_check(sbi); + err = f2fs_sanity_check_options(sbi, false); if (err) goto free_options; @@ -4770,6 +5011,10 @@ try_onemore: /* get segno of first zoned block device */ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); + sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) @@ -4930,7 +5175,6 @@ reset_checkpoint: if (err) goto sync_free_meta; } - kvfree(options); /* recover broken superblock */ if (recovery) { @@ -5013,7 +5257,7 @@ free_iostat: f2fs_destroy_iostat(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -5024,8 +5268,8 @@ free_options: for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); - kvfree(options); + /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ + swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); free_sb_buf: kfree(raw_super); free_sbi: @@ -5041,12 +5285,39 @@ free_sbi: return err; } -static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int f2fs_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); + return get_tree_bdev(fc, f2fs_fill_super); } +static int f2fs_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + + return __f2fs_remount(fc, sb); +} + +static void f2fs_fc_free(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + + if (!ctx) + return; + +#ifdef CONFIG_QUOTA + f2fs_unnote_qf_name_all(fc); +#endif + fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy); + kfree(ctx); +} + +static const struct fs_context_operations f2fs_context_ops = { + .parse_param = f2fs_parse_param, + .get_tree = f2fs_get_tree, + .reconfigure = f2fs_reconfigure, + .free = f2fs_fc_free, +}; + static void kill_f2fs_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -5088,10 +5359,24 @@ static void kill_f2fs_super(struct super_block *sb) } } +static int f2fs_init_fs_context(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &f2fs_context_ops; + + return 0; +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", - .mount = f2fs_mount, + .init_fs_context = f2fs_init_fs_context, .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 75134d69a0bd..f736052dea50 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -628,6 +628,27 @@ out: return count; } + if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; @@ -824,6 +845,27 @@ out: return count; } + if (!strcmp(a->attr.name, "reserved_pin_section")) { + if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi))) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { + if (t < 1 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { + if (t > GC_GREEDY) + return -EINVAL; + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -1050,6 +1092,8 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); +GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -1130,6 +1174,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif F2FS_SBI_GENERAL_RW_ATTR(carve_out); +F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS @@ -1220,6 +1265,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_zoned_gc_percent), ATTR_LIST(gc_boost_zoned_gc_percent), ATTR_LIST(gc_valid_thresh_ratio), + ATTR_LIST(gc_boost_gc_multiple), + ATTR_LIST(gc_boost_gc_greedy), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), @@ -1323,6 +1370,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(last_age_weight), ATTR_LIST(max_read_extent_count), ATTR_LIST(carve_out), + ATTR_LIST(reserved_pin_section), NULL, }; ATTRIBUTE_GROUPS(f2fs); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 5206d63b3386..2f8b8bfc0e73 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -268,7 +268,7 @@ struct node_footer { /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) +#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8d0e3ad89b94..10dd161690a2 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return (struct page *)page_private(bounce_page); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return folio->mapping == NULL; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { return bounce_folio->private; } @@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_is_bounce_folio(struct folio *folio) +static inline bool fscrypt_is_bounce_folio(const struct folio *folio) { return false; } -static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) +static inline +struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); |